pandas-dev · slavanorm · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
@@ -42,6 +42,7 @@ Bug fixes
 - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`)
 - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`)
 - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`)
+- Fixed bug in :func:`pandas.io.json_normalize` raising with errors='ignore' while traversing empty list (:issue:`57810`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_222.other:

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -432,14 +432,14 @@ def _pull_field(
     ) -> Scalar | Iterable:
         """Internal function to pull field"""
         result = js
+        if not isinstance(spec, list):
+            spec = [spec]
         try:
-            if isinstance(spec, list):
-                for field in spec:
-                    if result is None:
-                        raise KeyError(field)
-                    result = result[field]
-            else:
-                result = result[spec]
+            for field in spec:
+                # GH 57810
+                if result is None or not len(result):
+                    raise KeyError(field)
+                result = result[field]
         except KeyError as e:
             if extract_record:
                 raise KeyError(

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -106,6 +106,15 @@ def missing_metadata():
             ],
             "previous_residences": {"cities": [{"city_name": "Barmingham"}]},
         },
+        {
+            "name": "Minnie",
+            "addresses": [
+                {
+                    "number": 8449,
+                }
+            ],
+            "previous_residences": {"cities": []},
+        },
     ]
 
 
@@ -631,14 +640,15 @@ def test_missing_meta(self, missing_metadata):
         ex_data = [
             [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
             [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
+            [8449, np.nan, np.nan, np.nan, np.nan, "Minnie"],
         ]
         columns = ["number", "street", "city", "state", "zip", "name"]
         expected = DataFrame(ex_data, columns=columns)
         tm.assert_frame_equal(result, expected)
 
-    def test_missing_nested_meta(self):
+    def test_missing_nested_meta_traverse_none_errors_ignore(self):
         # GH44312
-        # If errors="ignore" and nested metadata is null, we should return nan
+        # If errors="ignore" and nested metadata is nullable, return nan
         data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
         result = json_normalize(
             data,
@@ -653,8 +663,11 @@ def test_missing_nested_meta(self):
         )
         tm.assert_frame_equal(result, expected)
 
-        # If errors="raise" and nested metadata is null, we should raise with the
-        # key of the first missing level
+    def test_missing_nested_meta_traverse_none_errors_raise(self):
+        # GH44312
+        # If errors="raise" and nested metadata is null, should raise
+        data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
+
         with pytest.raises(KeyError, match="'leaf' not found"):
             json_normalize(
                 data,
@@ -663,6 +676,22 @@ def test_missing_nested_meta(self):
                 errors="raise",
             )
 
+    def test_missing_nested_meta_traverse_empty_list_errors_ignore(self):
+        # If errors="ignore" and nested metadata is nullable, return nan
+        data = {"meta": "foo", "nested_meta": [], "value": [{"rec": 1}, {"rec": 2}]}
-        data = {"meta": "foo", "nested_meta": [], "value": [{"rec": 1}, {"rec": 2}]}
+        data = {"meta": "foo", "nested_meta": {}, "value": [{"rec": 1}, {"rec": 2}]}
-        data = {"meta": "foo", "nested_meta": [], "value": [{"rec": 1}, {"rec": 2}]}
+        data = {"meta": "foo", "nested_meta": {}, "value": [{"rec": 1}, {"rec": 2}]}
+        result = json_normalize(
+            data,
+            record_path="value",
+            meta=["meta", ["nested_meta", "leaf"]],
+            errors="ignore",
+        )
+        ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
+        columns = ["rec", "meta", "nested_meta.leaf"]
+        expected = DataFrame(ex_data, columns=columns).astype(
+            {"nested_meta.leaf": object}
+        )
+        tm.assert_frame_equal(result, expected)
+
     def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
         # GH41876
         # Ensure errors='raise' works as intended even when a record_path of length
@@ -681,8 +710,8 @@ def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata
 
     def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata):
         # GH41876
-        # Ensure errors='ignore' works as intended even when a record_path of length
-        # greater than one is passed in
+        # Ensure errors='ignore' works as intended
+        # even when a record_path of length greater than one is passed in
         result = json_normalize(
             data=missing_metadata,
             record_path=["previous_residences", "cities"],