From 5e70d1f46e848a04d068253d665e197ffc724058 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Nie=C5=82acny?= <piotr.nielacny@gmail.com>
Date: Thu, 16 Apr 2020 08:53:34 +0200
Subject: [PATCH] BUG: Raise a TypeError when record_path doesn't point to an
 array

When `record_path` points to something that is Iterable but is not
a sequence in JSON world we will receive odd results.

```
>>> json_normalize([{'key': 'value'}], record_path='key')
0
0  v
1  a
2  l
3  u
4  e
```

Based on RFC 8259 (https://tools.ietf.org/html/rfc8259) a JSON value MUST be
object, array, number, or string, false, null, true. But only two of them
should be treated as Iterable.

```
An object is an unordered *collection* of zero or more name/value
pairs, where a name is a string and a value is a string, number,
boolean, null, object, or array.

An array is an ordered *sequence* of zero or more values.

--
https://tools.ietf.org/html/rfc8259#page-3
```

Based on that `[{'key': 'value'}]` and `{'key': 'value'}` should not be
treated in the same way. In `json_normalize` documentation `record_path`
is described as `Path in each object to list of records`.

So when we want to translate JSON to Python like an object we need to take
into consideration a list (sequence). Based on that `record_path` should
point out to `list`, not `Iterable`.

In specs I added all possibile values that are allowed in JSON and
should not be treated as a collection. There is a special case for null
value that is already implemented.

|  type  |  value  | Iterable | Should be treated as list |
|--------|---------|----------|---------------------------|
| object | {}      | Yes      | No (unordered list)       |
| array  | []      | Yes      | Yes                       |
| number | 1       | No       | No                        |
| string | "value" | Yes      | No                        |
| false  | False   | No       | No                        |
| null   | Null    | No       | No (Check #30148)         |
| true   | True    | No       | No                        |
---
 doc/source/whatsnew/v1.1.0.rst         |  1 +
 pandas/io/json/_normalize.py           | 12 ++++++------
 pandas/tests/io/json/test_normalize.py | 12 +++++++-----
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 82c43811c0444..5762c60a0d10a 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -529,6 +529,7 @@ I/O
 - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`)
 - Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`)
 - Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`)
+- Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 69e9b111a6c20..e833fdc20d542 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -239,23 +239,23 @@ def _pull_field(
             result = result[spec]
         return result
 
-    def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
+    def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> List:
         """
         Interal function to pull field for records, and similar to
-        _pull_field, but require to return Iterable. And will raise error
+        _pull_field, but require to return list. And will raise error
         if has non iterable value.
         """
         result = _pull_field(js, spec)
 
-        # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not
+        # GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not
         # null, otherwise return an empty list
-        if not isinstance(result, Iterable):
+        if not isinstance(result, list):
             if pd.isnull(result):
                 result = []
             else:
                 raise TypeError(
-                    f"{js} has non iterable value {result} for path {spec}. "
-                    "Must be iterable or null."
+                    f"{js} has non list value {result} for path {spec}. "
+                    "Must be list or null."
                 )
         return result
 
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index b7a9918ff46da..4a32f3809c82b 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -475,13 +475,15 @@ def test_nonetype_record_path(self, nulls_fixture):
         expected = DataFrame({"i": 2}, index=[0])
         tm.assert_equal(result, expected)
 
-    def test_non_interable_record_path_errors(self):
-        # see gh-30148
-        test_input = {"state": "Texas", "info": 1}
+    @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"'])
+    def test_non_list_record_path_errors(self, value):
+        # see gh-30148, GH 26284
+        parsed_value = json.loads(value)
+        test_input = {"state": "Texas", "info": parsed_value}
         test_path = "info"
         msg = (
-            f"{test_input} has non iterable value 1 for path {test_path}. "
-            "Must be iterable or null."
+            f"{test_input} has non list value {parsed_value} for path {test_path}. "
+            "Must be list or null."
         )
         with pytest.raises(TypeError, match=msg):
             json_normalize([test_input], record_path=[test_path])