From daf9d3e8d0e9baf9455b31d5a950146f0a63d094 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sat, 9 Mar 2019 11:47:57 -0500
Subject: [PATCH 01/10] BUG: fixed json_normalize with nullable meta fields
 (#25468)

---
 doc/source/whatsnew/v0.25.0.rst        |  2 +-
 pandas/io/json/normalize.py            |  2 +-
 pandas/tests/io/json/test_normalize.py | 43 ++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index ea08a0a6fe07b..380f00903e0f4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -214,7 +214,7 @@ I/O
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
--
+- Bug in :func:`json_normalize` for ``errors='ignore'`` and nullable metadata fields, the null values in dataframe were literal nan string and not numpy.nan (:issue:`25468`)
 -
 -
 
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index 279630ccd107c..7c18f11eb13b0 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -281,6 +281,6 @@ def _recursive_extract(data, path, seen_meta, level=0):
             raise ValueError('Conflicting metadata name {name}, '
                              'need distinguishing prefix '.format(name=k))
 
-        result[k] = np.array(v).repeat(lengths)
+        result[k] = np.array(v, dtype=object).repeat(lengths)
 
     return result
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 3bf699cc8a1f0..8d7ba2f0cffaf 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -65,6 +65,24 @@ def author_missing_data():
          {'first': 'Jane', 'last_name': 'Doe'}
          }]
 
+@pytest.fixture
+def address_missing_data():
+    return [
+        {'name': 'Alice',
+         'addresses': [{'number': 9562,
+                        'street': 'Morris St.',
+                        'city': 'Massillon',
+                        'state': 'OH',
+                        'zip': 44646}]
+         },
+        {'addresses': [{'number':  8449,
+                        'street': 'Spring St.',
+                        'city': 'Elizabethton',
+                        'state': 'TN',
+                        'zip': 37643}]
+         }
+    ]
+
 
 class TestJSONNormalize(object):
 
@@ -378,6 +396,31 @@ def test_json_normalize_errors(self):
                       ['general', 'trade_version']],
                 errors='raise')
 
+    def test_missing_meta(self, address_missing_data):
+        # GH25468: If metadata is nullable with errors set to ignore, the null
+        # values should be numpy.nan values
+        result = json_normalize(
+            data=address_missing_data,
+            record_path='addresses',
+            meta='name',
+            errors='ignore')
+        ex_data = [
+            {'city': 'Massillon',
+             'number': 9562,
+             'state': 'OH',
+             'street': 'Morris St.',
+             'zip': 44646,
+             'name': 'Alice'},
+            {'city': 'Elizabethton',
+             'number': 8449,
+             'state': 'TN',
+             'street': 'Spring St.',
+             'zip': 37643,
+             'name': np.nan}
+        ]
+        expected = DataFrame(ex_data, columns=ex_data[0].keys())
+        tm.assert_frame_equal(result, expected)
+
     def test_donot_drop_nonevalues(self):
         # GH21356
         data = [

From 2cc267c26c0965406ce97444509d6f908f285877 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sat, 9 Mar 2019 11:55:50 -0500
Subject: [PATCH 02/10] fixed code style

---
 pandas/tests/io/json/test_normalize.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 8d7ba2f0cffaf..547823c7122f2 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -65,6 +65,7 @@ def author_missing_data():
          {'first': 'Jane', 'last_name': 'Doe'}
          }]
 
+
 @pytest.fixture
 def address_missing_data():
     return [
@@ -75,7 +76,7 @@ def address_missing_data():
                         'state': 'OH',
                         'zip': 44646}]
          },
-        {'addresses': [{'number':  8449,
+        {'addresses': [{'number': 8449,
                         'street': 'Spring St.',
                         'city': 'Elizabethton',
                         'state': 'TN',

From a24a2131eb019388d16828cc4a593de055b1361a Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sat, 9 Mar 2019 12:31:00 -0500
Subject: [PATCH 03/10] modified test to ignore order of columns

---
 pandas/tests/io/json/test_normalize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 547823c7122f2..a4fcd8f9034b2 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -419,8 +419,8 @@ def test_missing_meta(self, address_missing_data):
              'zip': 37643,
              'name': np.nan}
         ]
-        expected = DataFrame(ex_data, columns=ex_data[0].keys())
-        tm.assert_frame_equal(result, expected)
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected, check_like=True)
 
     def test_donot_drop_nonevalues(self):
         # GH21356

From cf93f8ae43e8a5c615814d5fff858b346b8202f5 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sat, 9 Mar 2019 13:17:56 -0500
Subject: [PATCH 04/10] fixed change log addition

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 380f00903e0f4..b064fecdf9617 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -214,7 +214,7 @@ I/O
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-- Bug in :func:`json_normalize` for ``errors='ignore'`` and nullable metadata fields, the null values in dataframe were literal nan string and not numpy.nan (:issue:`25468`)
+- Bug in :func:`json_normalize` for ``errors='ignore'`` and nullable metadata fields, the missing metadata values in dataframe were filled with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
 -
 -
 

From 2a8a7260aa402d35699513d90d0aa3f0c584b5c6 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sun, 10 Mar 2019 13:23:02 -0400
Subject: [PATCH 05/10] added requested changes

---
 doc/source/whatsnew/v0.25.0.rst        |  2 +-
 pandas/tests/io/json/test_normalize.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index b064fecdf9617..367962a534b3d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -214,7 +214,7 @@ I/O
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-- Bug in :func:`json_normalize` for ``errors='ignore'`` and nullable metadata fields, the missing metadata values in dataframe were filled with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
+- Bug in :func:`json_normalize` for ``errors='ignore'`` and missing metadata fields, the missing metadata values in dataframe were filled with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
 -
 -
 
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index a4fcd8f9034b2..1bf328b1fa5af 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -67,7 +67,7 @@ def author_missing_data():
 
 
 @pytest.fixture
-def address_missing_data():
+def missing_metadata():
     return [
         {'name': 'Alice',
          'addresses': [{'number': 9562,
@@ -397,11 +397,12 @@ def test_json_normalize_errors(self):
                       ['general', 'trade_version']],
                 errors='raise')
 
-    def test_missing_meta(self, address_missing_data):
-        # GH25468: If metadata is nullable with errors set to ignore, the null
-        # values should be numpy.nan values
+    def test_missing_meta(self, missing_metadata):
+        # GH25468
+        # If metadata is nullable with errors set to ignore, the null values
+        # should be numpy.nan values
         result = json_normalize(
-            data=address_missing_data,
+            data=missing_metadata,
             record_path='addresses',
             meta='name',
             errors='ignore')
@@ -419,8 +420,9 @@ def test_missing_meta(self, address_missing_data):
              'zip': 37643,
              'name': np.nan}
         ]
+        columns = ex_data[0].keys()
         expected = DataFrame(ex_data)
-        tm.assert_frame_equal(result, expected, check_like=True)
+        tm.assert_frame_equal(result[columns], expected[columns])
 
     def test_donot_drop_nonevalues(self):
         # GH21356

From dba832c5cef6db6e4d5b33632785dce96ee66a12 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sun, 10 Mar 2019 13:29:38 -0400
Subject: [PATCH 06/10] refactor test_json_normalize_errors

---
 pandas/tests/io/json/test_normalize.py | 64 ++++----------------------
 1 file changed, 9 insertions(+), 55 deletions(-)

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 1bf328b1fa5af..18efbf4535439 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -337,64 +337,18 @@ def test_nested_flattens(self):
 
         assert result == expected
 
-    def test_json_normalize_errors(self):
-        # GH14583: If meta keys are not always present
-        # a new option to set errors='ignore' has been implemented
-        i = {
-            "Trades": [{
-                "general": {
-                    "tradeid": 100,
-                    "trade_version": 1,
-                    "stocks": [{
-
-                        "symbol": "AAPL",
-                        "name": "Apple",
-                        "price": "0"
-                    }, {
-                        "symbol": "GOOG",
-                        "name": "Google",
-                        "price": "0"
-                    }
-                    ]
-                }
-            }, {
-                "general": {
-                    "tradeid": 100,
-                    "stocks": [{
-                        "symbol": "AAPL",
-                        "name": "Apple",
-                        "price": "0"
-                    }, {
-                        "symbol": "GOOG",
-                        "name": "Google",
-                        "price": "0"
-                    }
-                    ]
-                }
-            }
-            ]
-        }
-        j = json_normalize(data=i['Trades'],
-                           record_path=[['general', 'stocks']],
-                           meta=[['general', 'tradeid'],
-                                 ['general', 'trade_version']],
-                           errors='ignore')
-        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
-                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
-                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
-                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
-                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
-
-        assert j.fillna('').to_dict() == expected
-
-        msg = ("Try running with errors='ignore' as key 'trade_version'"
+    def test_json_normalize_errors(self, missing_metadata):
+        # GH14583:
+        # If meta keys are not always present a new option to set
+        # errors='ignore' has been implemented
+
+        msg = ("Try running with errors='ignore' as key 'name'"
                " is not always present")
         with pytest.raises(KeyError, match=msg):
             json_normalize(
-                data=i['Trades'],
-                record_path=[['general', 'stocks']],
-                meta=[['general', 'tradeid'],
-                      ['general', 'trade_version']],
+                data=missing_metadata,
+                record_path='addresses',
+                meta='name',
                 errors='raise')
 
     def test_missing_meta(self, missing_metadata):

From 5b6a8e749ed1242fe931ddfffe547d3e227ce5c9 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sun, 10 Mar 2019 15:12:20 -0400
Subject: [PATCH 07/10] build expected from list of lists to make order
 deterministic

---
 pandas/tests/io/json/test_normalize.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 18efbf4535439..5362274274d72 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -374,9 +374,13 @@ def test_missing_meta(self, missing_metadata):
              'zip': 37643,
              'name': np.nan}
         ]
-        columns = ex_data[0].keys()
-        expected = DataFrame(ex_data)
-        tm.assert_frame_equal(result[columns], expected[columns])
+        ex_data = [
+            ['Massillon', 9562, 'OH', 'Morris St.', 44646, 'Alice'],
+            ['Elizabethton', 8449, 'TN', 'Spring St.', 37643, np.nan]
+        ]
+        columns = ['city', 'number', 'state', 'street', 'zip', 'name']
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
 
     def test_donot_drop_nonevalues(self):
         # GH21356

From ba5902ef058a89259f8b1adef2667bb20ba84f35 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Sun, 10 Mar 2019 20:28:59 -0400
Subject: [PATCH 08/10] changed the entry for json_normalize

---
 doc/source/whatsnew/v0.25.0.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 367962a534b3d..3f74e3cfc1d9c 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -214,8 +214,7 @@ I/O
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-- Bug in :func:`json_normalize` for ``errors='ignore'`` and missing metadata fields, the missing metadata values in dataframe were filled with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
--
+- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
 -
 
 

From 49530126682272355176e818f8ea4e184f9c1485 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Wed, 13 Mar 2019 11:16:41 -0400
Subject: [PATCH 09/10] added one-liner

---
 pandas/io/json/normalize.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index 7c18f11eb13b0..7a8188dd07b6b 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -281,6 +281,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
             raise ValueError('Conflicting metadata name {name}, '
                              'need distinguishing prefix '.format(name=k))
 
+        # forcing dtype to object to avoid the metadata being casted to string
         result[k] = np.array(v, dtype=object).repeat(lengths)
 
     return result

From b3b744cf23dd12b603cff2b3e57751bf7150cda9 Mon Sep 17 00:00:00 2001
From: antoineviscardi <antoine.viscardi@gmail.com>
Date: Wed, 13 Mar 2019 11:18:51 -0400
Subject: [PATCH 10/10] resolved merging issue

---
 doc/source/whatsnew/v0.25.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 16804e8866bff..0b4b3519003aa 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -214,7 +214,6 @@ I/O
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-<<<<<<< HEAD
 - Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
 - :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
 -