diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 72a2f84c4aaee..d672aadf03c6f 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -42,6 +42,7 @@ Bug fixes - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) +- Fixed bug in :func:`pandas.io.json_normalize` raising with errors='ignore' while traversing empty list (:issue:`57810`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 7d3eefae39679..a3c215ded4ea6 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -432,14 +432,14 @@ def _pull_field( ) -> Scalar | Iterable: """Internal function to pull field""" result = js + if not isinstance(spec, list): + spec = [spec] try: - if isinstance(spec, list): - for field in spec: - if result is None: - raise KeyError(field) - result = result[field] - else: - result = result[spec] + for field in spec: + # GH 57810 + if result is None or not len(result): + raise KeyError(field) + result = result[field] except KeyError as e: if extract_record: raise KeyError( diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d83e7b4641e88..7098903359e28 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -106,6 +106,15 @@ def missing_metadata(): ], "previous_residences": {"cities": [{"city_name": "Barmingham"}]}, }, + { + "name": "Minnie", + "addresses": [ + { + "number": 8449, + } + ], + "previous_residences": {"cities": []}, + }, ] @@ -631,14 +640,15 @@ def test_missing_meta(self, missing_metadata): ex_data = [ [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"], [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan], + [8449, np.nan, np.nan, np.nan, np.nan, "Minnie"], ] columns = ["number", "street", "city", "state", "zip", "name"] expected = DataFrame(ex_data, columns=columns) tm.assert_frame_equal(result, expected) - def test_missing_nested_meta(self): + def test_missing_nested_meta_traverse_none_errors_ignore(self): # GH44312 - # If errors="ignore" and nested metadata is null, we should return nan + # If errors="ignore" and nested metadata is nullable, return nan data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]} result = json_normalize( data, @@ -653,8 +663,11 @@ def test_missing_nested_meta(self): ) tm.assert_frame_equal(result, expected) - # If errors="raise" and nested metadata is null, we should raise with the - # key of the first missing level + def test_missing_nested_meta_traverse_none_errors_raise(self): + # GH44312 + # If errors="raise" and nested metadata is null, should raise + data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]} + with pytest.raises(KeyError, match="'leaf' not found"): json_normalize( data, @@ -663,6 +676,22 @@ def test_missing_nested_meta(self): errors="raise", ) + def test_missing_nested_meta_traverse_empty_list_errors_ignore(self): + # If errors="ignore" and nested metadata is nullable, return nan + data = {"meta": "foo", "nested_meta": [], "value": [{"rec": 1}, {"rec": 2}]} + result = json_normalize( + data, + record_path="value", + meta=["meta", ["nested_meta", "leaf"]], + errors="ignore", + ) + ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]] + columns = ["rec", "meta", "nested_meta.leaf"] + expected = DataFrame(ex_data, columns=columns).astype( + {"nested_meta.leaf": object} + ) + tm.assert_frame_equal(result, expected) + def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata): # GH41876 # Ensure errors='raise' works as intended even when a record_path of length @@ -681,8 +710,8 @@ def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata): # GH41876 - # Ensure errors='ignore' works as intended even when a record_path of length - # greater than one is passed in + # Ensure errors='ignore' works as intended + # even when a record_path of length greater than one is passed in result = json_normalize( data=missing_metadata, record_path=["previous_residences", "cities"],