From 9dbf1f0ee85a675817ae226b4ea03c6c837df371 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 2 Oct 2021 15:30:51 +0530 Subject: [PATCH 1/3] BUG: json_normalize prefixed multi-char sep to all keys --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/io/json/_normalize.py | 5 ++--- pandas/tests/io/json/test_normalize.py | 7 +++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e76b466ff2d87..0c744aec82fec 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -463,6 +463,7 @@ I/O - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`) - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) +- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is prefixed to every key (:issue:``43831) Period ^^^^^^ diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 729d60ca78944..ecc073eba56ee 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -148,9 +148,8 @@ def _normalise_json( _normalise_json( data=value, # to avoid adding the separator to the start of every key - key_string=new_key - if new_key[len(separator) - 1] != separator - else new_key[len(separator) :], + # GH#43831 avoid adding key if key_string blank + key_string=new_key if key_string == "" else new_key[len(separator) :], normalized_dict=normalized_dict, separator=separator, ) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index faf9fc903d7b5..a2b90f607e918 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -220,6 +220,13 @@ def test_simple_normalize_with_separator(self, deep_nested): expected = Index(["name", "pop", "country", "states_name"]).sort_values() assert result.columns.sort_values().equals(expected) + def test_normalize_with_multichar_separator(self): + # GH #43831 + data = {"a": [1, 2], "b": {"b_1": 2, "b_2": (3, 4)}} + result = json_normalize(data, sep="__") + expected = DataFrame([[[1, 2], 2, (3, 4)]], columns=["a", "b__b_1", "b__b_2"]) + tm.assert_frame_equal(result, expected) + def test_value_array_record_prefix(self): # GH 21536 result = json_normalize({"A": [1, 2]}, "A", record_prefix="Prefix.") From f4c437d8a6e3e2e3bf3af2f1b4d50f0add89e386 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Sat, 2 Oct 2021 15:44:50 +0530 Subject: [PATCH 2/3] minor change in whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 76be61009c5be..86f9d9ff624a4 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -463,7 +463,7 @@ I/O - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`) - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) -- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is prefixed to every key (:issue:``43831) +- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) - From 04b04381c2c3eb36f1a2a718f8a45242a7b5ab57 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 2 Oct 2021 20:08:22 +0530 Subject: [PATCH 3/3] check if only separator at new_key start --- pandas/io/json/_normalize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index ecc073eba56ee..90fd5d077d031 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -149,7 +149,9 @@ def _normalise_json( data=value, # to avoid adding the separator to the start of every key # GH#43831 avoid adding key if key_string blank - key_string=new_key if key_string == "" else new_key[len(separator) :], + key_string=new_key + if new_key[: len(separator)] != separator + else new_key[len(separator) :], normalized_dict=normalized_dict, separator=separator, )