-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: improve pd.io.json_normalize #57811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
606e96d
cc7aec8
0793c40
9647d1d
391ff1a
a3a22e8
15901a9
abc9974
3769b1f
0b91678
dc0d6f8
d7bd075
03f1dc1
6b5d760
9a72855
194ff85
eb71e1b
dfc6376
61e8dcc
aeae288
c9ef7de
add1882
c924ba3
3fe7870
68b2a15
f24dc1b
69c5243
296ccc5
51179b1
1e8d43a
6849ffe
fde9565
7e10e5a
2ef598d
ef857ba
9caace9
a73e49f
0db9759
a27d7a9
51062d7
750a270
11138d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -106,6 +106,15 @@ def missing_metadata(): | |||||
], | ||||||
"previous_residences": {"cities": [{"city_name": "Barmingham"}]}, | ||||||
}, | ||||||
{ | ||||||
"name": "Minnie", | ||||||
"addresses": [ | ||||||
{ | ||||||
"number": 8449, | ||||||
} | ||||||
], | ||||||
"previous_residences": {"cities": []}, | ||||||
}, | ||||||
] | ||||||
|
||||||
|
||||||
|
@@ -631,14 +640,15 @@ def test_missing_meta(self, missing_metadata): | |||||
ex_data = [ | ||||||
[9562, "Morris St.", "Massillon", "OH", 44646, "Alice"], | ||||||
[8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan], | ||||||
[8449, np.nan, np.nan, np.nan, np.nan, "Minnie"], | ||||||
] | ||||||
columns = ["number", "street", "city", "state", "zip", "name"] | ||||||
expected = DataFrame(ex_data, columns=columns) | ||||||
tm.assert_frame_equal(result, expected) | ||||||
|
||||||
def test_missing_nested_meta(self): | ||||||
def test_missing_nested_meta_traverse_none_errors_ignore(self): | ||||||
# GH44312 | ||||||
# If errors="ignore" and nested metadata is null, we should return nan | ||||||
# If errors="ignore" and nested metadata is nullable, return nan | ||||||
data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]} | ||||||
result = json_normalize( | ||||||
data, | ||||||
|
@@ -653,8 +663,11 @@ def test_missing_nested_meta(self): | |||||
) | ||||||
tm.assert_frame_equal(result, expected) | ||||||
|
||||||
# If errors="raise" and nested metadata is null, we should raise with the | ||||||
# key of the first missing level | ||||||
def test_missing_nested_meta_traverse_none_errors_raise(self): | ||||||
# GH44312 | ||||||
# If errors="raise" and nested metadata is null, should raise | ||||||
data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]} | ||||||
|
||||||
with pytest.raises(KeyError, match="'leaf' not found"): | ||||||
json_normalize( | ||||||
data, | ||||||
|
@@ -663,6 +676,22 @@ def test_missing_nested_meta(self): | |||||
errors="raise", | ||||||
) | ||||||
|
||||||
def test_missing_nested_meta_traverse_empty_list_errors_ignore(self): | ||||||
# If errors="ignore" and nested metadata is nullable, return nan | ||||||
data = {"meta": "foo", "nested_meta": [], "value": [{"rec": 1}, {"rec": 2}]} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I have lost track a bit on what we are trying to accomplish but this doesn't seem right to ignore an error when the meta argument starts peering into in an array instead of an object. Is this a TypeError on main or a KeyError? I feel like we are mixing the two up when we should not be |
||||||
result = json_normalize( | ||||||
data, | ||||||
record_path="value", | ||||||
meta=["meta", ["nested_meta", "leaf"]], | ||||||
errors="ignore", | ||||||
) | ||||||
ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]] | ||||||
columns = ["rec", "meta", "nested_meta.leaf"] | ||||||
expected = DataFrame(ex_data, columns=columns).astype( | ||||||
{"nested_meta.leaf": object} | ||||||
) | ||||||
tm.assert_frame_equal(result, expected) | ||||||
|
||||||
def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata): | ||||||
# GH41876 | ||||||
# Ensure errors='raise' works as intended even when a record_path of length | ||||||
|
@@ -681,8 +710,8 @@ def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata | |||||
|
||||||
def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata): | ||||||
# GH41876 | ||||||
# Ensure errors='ignore' works as intended even when a record_path of length | ||||||
# greater than one is passed in | ||||||
# Ensure errors='ignore' works as intended | ||||||
# even when a record_path of length greater than one is passed in | ||||||
result = json_normalize( | ||||||
data=missing_metadata, | ||||||
record_path=["previous_residences", "cities"], | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.