From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/22] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From dea38f24c0067ae3fe9484b837c9649714213bba Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:26:31 +0100 Subject: [PATCH 02/22] fix issue 17038 --- pandas/core/reshape/pivot.py | 4 +++- pandas/tests/reshape/test_pivot.py | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b443ba142369c..9743d90f4dd04 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -117,7 +117,9 @@ def pivot_table( agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) table = agged - if table.index.nlevels > 1: + + # GH 17038, this check should only happen if index is specified + if table.index.nlevels > 1 and index: # Related GH #17123 # If index_names are integers, determine whether the integers refer # to the level position or name. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 743fc50c87e96..46a05123c9fdd 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -896,12 +896,6 @@ def _check_output( totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() - # no rows - rtable = self.data.pivot_table( - columns=["AA", "BB"], margins=True, aggfunc=np.mean - ) - assert isinstance(rtable, Series) - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] @@ -972,6 +966,20 @@ def test_pivot_integer_columns(self): tm.assert_frame_equal(table, table2, check_names=False) + @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) + def test_pivot_table_multiindex_only(self, cols): + # GH 17038 + df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]}) + + result = df2.pivot_table(values="v", columns=cols) + expected = DataFrame( + [[4, 5, 6]], + columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), + index=Index(["v"]), + ) + + tm.assert_frame_equal(result, expected) + def test_pivot_no_level_overlap(self): # GH #1181 From cd9e7ac3f31ffaf95cd628863df911dea9fa1248 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:29:43 +0100 Subject: [PATCH 03/22] revert change --- pandas/core/reshape/pivot.py | 3 +-- pandas/tests/reshape/test_pivot.py | 20 ++++++-------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 9743d90f4dd04..a7cdbb0da7a4e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -118,8 +118,7 @@ def pivot_table( table = agged - # GH 17038, this check should only happen if index is specified - if table.index.nlevels > 1 and index: + if table.index.nlevels > 1: # Related GH #17123 # If index_names are integers, determine whether the integers refer # to the level position or name. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 46a05123c9fdd..743fc50c87e96 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -896,6 +896,12 @@ def _check_output( totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() + # no rows + rtable = self.data.pivot_table( + columns=["AA", "BB"], margins=True, aggfunc=np.mean + ) + assert isinstance(rtable, Series) + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] @@ -966,20 +972,6 @@ def test_pivot_integer_columns(self): tm.assert_frame_equal(table, table2, check_names=False) - @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) - def test_pivot_table_multiindex_only(self, cols): - # GH 17038 - df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]}) - - result = df2.pivot_table(values="v", columns=cols) - expected = DataFrame( - [[4, 5, 6]], - columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), - index=Index(["v"]), - ) - - tm.assert_frame_equal(result, expected) - def test_pivot_no_level_overlap(self): # GH #1181 From e5e912be0f596943067a7df812442764d311a086 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:30:16 +0100 Subject: [PATCH 04/22] revert change --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index a7cdbb0da7a4e..b443ba142369c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -117,7 +117,6 @@ def pivot_table( agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) table = agged - if table.index.nlevels > 1: # Related GH #17123 # If index_names are integers, determine whether the integers refer From fcb4b8088b9314fd21934f6ceefb7e27584c3222 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 1 Feb 2020 00:12:07 +0100 Subject: [PATCH 05/22] fix uo --- pandas/io/json/_normalize.py | 7 ++++--- pandas/tests/io/json/test_normalize.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index b638bdc0bc1eb..e508f8964322f 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -230,7 +230,7 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_field(js: Dict[str, Any], spec: Union[List, str], is_meta: bool = True) -> Iterable: result = js # type: ignore if isinstance(spec, list): for field in spec: @@ -238,7 +238,8 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: else: result = result[spec] - if not isinstance(result, Iterable): + # GH 31507 iterable limit should only be used on record, not meta + if not isinstance(result, Iterable) and not is_meta: if pd.isnull(result): result = [] # type: ignore else: @@ -296,7 +297,7 @@ def _recursive_extract(data, path, seen_meta, level=0): _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - recs = _pull_field(obj, path[0]) + recs = _pull_field(obj, path[0], is_meta=False) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 91b204ed41ebc..4239162897a90 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -749,3 +749,13 @@ def test_series_non_zero_index(self): } ) tm.assert_frame_equal(result, expected) + + def test_meta_non_iterable(self): + # GH 31507 + data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" + + result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) + expected_values = [[1, 2, "99"]] + columns = ["one", "two", "id"] + expected = DataFrame(expected_values, columns=columns) + tm.assert_frame_equal(result, expected) From 8ec44509e65db55f90ff286081a4163558d1fae5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 1 Feb 2020 00:15:48 +0100 Subject: [PATCH 06/22] pep8 --- pandas/io/json/_normalize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index e508f8964322f..27521c49fc1ee 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -230,7 +230,9 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str], is_meta: bool = True) -> Iterable: + def _pull_field( + js: Dict[str, Any], spec: Union[List, str], is_meta: bool = True + ) -> Any: result = js # type: ignore if isinstance(spec, list): for field in spec: From a33d05ce02a080e8cbca6d079e1be3901faa7d99 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 1 Feb 2020 00:19:09 +0100 Subject: [PATCH 07/22] whatsnew --- doc/source/whatsnew/v1.0.1.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ff8433c7cafd9..aaafc0e6c3adc 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -88,6 +88,8 @@ I/O - - +- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`) + Plotting ^^^^^^^^ From 6bedc522f042f1dd0ce369c6657ee95a774696f3 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 1 Feb 2020 09:11:51 +0100 Subject: [PATCH 08/22] fix up --- pandas/tests/io/json/test_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 4239162897a90..3aa33bd43d32f 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -755,7 +755,7 @@ def test_meta_non_iterable(self): data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) - expected_values = [[1, 2, "99"]] + expected_values = np.array([[1, 2, 99]], dtype=object) columns = ["one", "two", "id"] expected = DataFrame(expected_values, columns=columns) tm.assert_frame_equal(result, expected) From 1f0f3bcd09fced8c79fb61b5c7d541cc06b48e75 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 1 Feb 2020 09:38:14 +0100 Subject: [PATCH 09/22] fixup --- pandas/tests/io/json/test_normalize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 3aa33bd43d32f..a5861f30856cc 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -755,7 +755,7 @@ def test_meta_non_iterable(self): data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) - expected_values = np.array([[1, 2, 99]], dtype=object) - columns = ["one", "two", "id"] - expected = DataFrame(expected_values, columns=columns) + expected = DataFrame( + {"one": [1], "two": [2], "id": np.array([99], dtype=object)} + ) tm.assert_frame_equal(result, expected) From 5de348cde2c0da39258ceb448ae25b7d8b5d3b4d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 2 Feb 2020 20:27:13 +0100 Subject: [PATCH 10/22] move around --- pandas/tests/io/json/test_normalize.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index a5861f30856cc..b7a9918ff46da 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -486,6 +486,16 @@ def test_non_interable_record_path_errors(self): with pytest.raises(TypeError, match=msg): json_normalize([test_input], record_path=[test_path]) + def test_meta_non_iterable(self): + # GH 31507 + data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" + + result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) + expected = DataFrame( + {"one": [1], "two": [2], "id": np.array([99], dtype=object)} + ) + tm.assert_frame_equal(result, expected) + class TestNestedToRecord: def test_flat_stays_flat(self): @@ -749,13 +759,3 @@ def test_series_non_zero_index(self): } ) tm.assert_frame_equal(result, expected) - - def test_meta_non_iterable(self): - # GH 31507 - data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" - - result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) - expected = DataFrame( - {"one": [1], "two": [2], "id": np.array([99], dtype=object)} - ) - tm.assert_frame_equal(result, expected) From 130d71bf57ab7475ccd04d063d1db89f38563ea4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 11 Feb 2020 20:51:13 +0100 Subject: [PATCH 11/22] fix conflict --- doc/source/whatsnew/v1.0.1.rst | 73 ---------------------------------- 1 file changed, 73 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index b068d39cd5385..ef3bb8161d13f 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -67,79 +67,6 @@ Bug fixes **Interval** -<<<<<<< HEAD -Interval -^^^^^^^^ - -- -- - -Indexing -^^^^^^^^ - -- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`) -- Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`) -- -- -- Bug where assigning to a :class:`Series` using a IntegerArray / BooleanArray as a mask would raise ``TypeError`` (:issue:`31446`) - -Missing -^^^^^^^ - -- -- - -MultiIndex -^^^^^^^^^^ - -- -- - -I/O -^^^ - -- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) -- -- - - -Plotting -^^^^^^^^ - -- -- - -Groupby/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- -- - - -Reshaping -^^^^^^^^^ - -- -- - -Sparse -^^^^^^ - -- -- - -ExtensionArray -^^^^^^^^^^^^^^ - -- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) -- - - -Other -^^^^^ -- Regression fixed in objTOJSON.c fix return-type warning (:issue:`31463`) -- Fixed a regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`) -- - Bug in :meth:`Series.shift` with ``interval`` dtype raising a ``TypeError`` when shifting an interval array of integers or datetimes (:issue:`34195`) .. --------------------------------------------------------------------------- From 3ef920f42f35a81041f924df1e4db189ce8b1292 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 11 Feb 2020 21:16:23 +0100 Subject: [PATCH 12/22] fixup --- pandas/io/json/_normalize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 01f48c2964c53..e0abe3858f9ea 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -239,7 +239,9 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: result = result[spec] return result - def _is_iterable(result: Any) -> Iterable: + def _is_iterable( + js: Dict[str, Any], spec: Union[List, str], result: Any + ) -> Iterable: """Interal function to check if result is Iterable.""" # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not # null, otherwise return an empty list @@ -296,7 +298,7 @@ def _recursive_extract(data, path, seen_meta, level=0): for val, key in zip(_meta, meta_keys): if level + 1 == len(val): value = _pull_field(obj, val[-1]) - seen_meta[key] = _is_iterable(value) + seen_meta[key] = _is_iterable(obj, val[-1], value) _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: @@ -317,7 +319,7 @@ def _recursive_extract(data, path, seen_meta, level=0): else: try: value = _pull_field(obj, val[level:]) - meta_val = _is_iterable(value) + meta_val = _is_iterable(obj, val[-1], value) except KeyError as e: if errors == "ignore": meta_val = np.nan From 0b46239b7a78c37e2797dbbdd54b5ab892fb6793 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 11 Feb 2020 22:23:56 +0100 Subject: [PATCH 13/22] fixup --- pandas/io/json/_normalize.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index e0abe3858f9ea..277b061761c7c 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -297,13 +297,13 @@ def _recursive_extract(data, path, seen_meta, level=0): for obj in data: for val, key in zip(_meta, meta_keys): if level + 1 == len(val): - value = _pull_field(obj, val[-1]) - seen_meta[key] = _is_iterable(obj, val[-1], value) + seen_meta[key] = _pull_field(obj, val[-1]) _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - recs = _pull_field(obj, path[0]) + value = _pull_field(obj, path[0]) + recs = _is_iterable(obj, path[0], value) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) @@ -318,8 +318,7 @@ def _recursive_extract(data, path, seen_meta, level=0): meta_val = seen_meta[key] else: try: - value = _pull_field(obj, val[level:]) - meta_val = _is_iterable(obj, val[-1], value) + meta_val = _pull_field(obj, val[level:]) except KeyError as e: if errors == "ignore": meta_val = np.nan From 6eee937f47dfb6ca952ed8d7605783eab643cdb4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 23 Feb 2020 11:06:44 +0100 Subject: [PATCH 14/22] clearer python --- pandas/io/json/_normalize.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 7bd58ded0c270..a39eb231061cd 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -226,7 +226,8 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: + def _pull_field_meta(js: Dict[str, Any], spec: Union[List, str]) -> Any: + """Internal function to pull field for meta""" result = js # type: ignore if isinstance(spec, list): for field in spec: @@ -235,10 +236,14 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: result = result[spec] return result - def _is_iterable( - js: Dict[str, Any], spec: Union[List, str], result: Any - ) -> Iterable: - """Interal function to check if result is Iterable.""" + def _pull_field_recs(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + """ + Interal function to pull field for records, and similar to + _pull_field_meta, but require to return Iterable. And will raise error + if has non iterable value. + """ + result = _pull_field_meta(js, spec) + # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not # null, otherwise return an empty list if not isinstance(result, Iterable): @@ -293,13 +298,12 @@ def _recursive_extract(data, path, seen_meta, level=0): for obj in data: for val, key in zip(_meta, meta_keys): if level + 1 == len(val): - seen_meta[key] = _pull_field(obj, val[-1]) + seen_meta[key] = _pull_field_meta(obj, val[-1]) _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - value = _pull_field(obj, path[0]) - recs = _is_iterable(obj, path[0], value) + recs = _pull_field_recs(obj, path[0]) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) @@ -314,7 +318,7 @@ def _recursive_extract(data, path, seen_meta, level=0): meta_val = seen_meta[key] else: try: - meta_val = _pull_field(obj, val[level:]) + meta_val = _pull_field_meta(obj, val[level:]) except KeyError as e: if errors == "ignore": meta_val = np.nan From 4c5d61bc461f5b7106f9c364c149fd072cb8d53c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 4 Mar 2020 21:07:09 +0100 Subject: [PATCH 15/22] rename methods --- pandas/io/json/_normalize.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 3d8d3a75ac167..c673892950e7a 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -226,8 +226,8 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field_meta(js: Dict[str, Any], spec: Union[List, str]) -> Any: - """Internal function to pull field for meta""" + def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: + """Internal function to pull field""" result = js # type: ignore if isinstance(spec, list): for field in spec: @@ -236,13 +236,13 @@ def _pull_field_meta(js: Dict[str, Any], spec: Union[List, str]) -> Any: result = result[spec] return result - def _pull_field_recs(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: """ Interal function to pull field for records, and similar to - _pull_field_meta, but require to return Iterable. And will raise error + _pull_field, but require to return Iterable. And will raise error if has non iterable value. """ - result = _pull_field_meta(js, spec) + result = _pull_field(js, spec) # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not # null, otherwise return an empty list @@ -298,12 +298,12 @@ def _recursive_extract(data, path, seen_meta, level=0): for obj in data: for val, key in zip(_meta, meta_keys): if level + 1 == len(val): - seen_meta[key] = _pull_field_meta(obj, val[-1]) + seen_meta[key] = _pull_field(obj, val[-1]) _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - recs = _pull_field_recs(obj, path[0]) + recs = _pull_records(obj, path[0]) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) @@ -318,7 +318,7 @@ def _recursive_extract(data, path, seen_meta, level=0): meta_val = seen_meta[key] else: try: - meta_val = _pull_field_meta(obj, val[level:]) + meta_val = _pull_field(obj, val[level:]) except KeyError as e: if errors == "ignore": meta_val = np.nan From 9726014955093850daa791c95dbbe34ffbe1ef65 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 4 Mar 2020 21:10:56 +0100 Subject: [PATCH 16/22] change typing --- pandas/io/json/_normalize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index c673892950e7a..8d5f7baac33a8 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -3,11 +3,12 @@ from collections import defaultdict import copy -from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union +from typing import DefaultDict, Dict, Iterable, List, Optional, Union import numpy as np from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar from pandas.util._decorators import deprecate import pandas as pd @@ -226,7 +227,7 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: + def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: """Internal function to pull field""" result = js # type: ignore if isinstance(spec, list): @@ -236,7 +237,7 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: result = result[spec] return result - def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: """ Interal function to pull field for records, and similar to _pull_field, but require to return Iterable. And will raise error From 67a43fe8f777645534cd10078ded9dd6228e4c9f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 4 Mar 2020 21:39:52 +0100 Subject: [PATCH 17/22] fix annotation --- pandas/io/json/_normalize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 8d5f7baac33a8..c3327bbb5c372 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -229,12 +229,12 @@ def _json_normalize( def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: """Internal function to pull field""" - result = js # type: ignore + result = js if isinstance(spec, list): for field in spec: - result = result[field] + result = result[field] # type: ignore else: - result = result[spec] + result = result[spec] # type: ignore return result def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: From 392e3d148f27d0cef8b54e16d2bc05e314b0c7bf Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 4 Mar 2020 21:58:34 +0100 Subject: [PATCH 18/22] change back to any --- pandas/io/json/_normalize.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index c3327bbb5c372..c673892950e7a 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -3,12 +3,11 @@ from collections import defaultdict import copy -from typing import DefaultDict, Dict, Iterable, List, Optional, Union +from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union import numpy as np from pandas._libs.writers import convert_json_to_lines -from pandas._typing import Scalar from pandas.util._decorators import deprecate import pandas as pd @@ -227,17 +226,17 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: + def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: """Internal function to pull field""" - result = js + result = js # type: ignore if isinstance(spec, list): for field in spec: - result = result[field] # type: ignore + result = result[field] else: - result = result[spec] # type: ignore + result = result[spec] return result - def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: """ Interal function to pull field for records, and similar to _pull_field, but require to return Iterable. And will raise error From 011dbb0bb65b24515eeaa257945b44fadb557f13 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 4 Mar 2020 22:40:10 +0100 Subject: [PATCH 19/22] change back to scalar --- pandas/io/json/_normalize.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index c673892950e7a..0e76e10e35617 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -3,11 +3,12 @@ from collections import defaultdict import copy -from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union +from typing import DefaultDict, Dict, Iterable, List, Optional, Union import numpy as np from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar from pandas.util._decorators import deprecate import pandas as pd @@ -226,17 +227,17 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Any: + def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: """Internal function to pull field""" - result = js # type: ignore + result = js if isinstance(spec, list): for field in spec: - result = result[field] + result = result[field] # type: ignore else: - result = result[spec] + result = result[spec] # type: ignore return result - def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: """ Interal function to pull field for records, and similar to _pull_field, but require to return Iterable. And will raise error From 9476af7e4825b8154c83620462008d71885e8bfc Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 10 Mar 2020 21:33:08 +0100 Subject: [PATCH 20/22] fixup --- pandas/io/json/_normalize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 0e76e10e35617..8d5f7baac33a8 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -229,12 +229,12 @@ def _json_normalize( def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: """Internal function to pull field""" - result = js + result = js # type: ignore if isinstance(spec, list): for field in spec: - result = result[field] # type: ignore + result = result[field] else: - result = result[spec] # type: ignore + result = result[spec] return result def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: From 6165467e09f39104de397e4de8b44dcb1db4c619 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 10 Mar 2020 21:49:35 +0100 Subject: [PATCH 21/22] add ignore type --- pandas/io/json/_normalize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 8d5f7baac33a8..2a7210fa56ff0 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -232,9 +232,9 @@ def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: result = js # type: ignore if isinstance(spec, list): for field in spec: - result = result[field] + result = result[field] # type: ignore else: - result = result[spec] + result = result[spec] # type: ignore return result def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: From 7a20b8c300881d0ab61164d8e3f7b6099f0a2b9d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 11 Mar 2020 08:54:33 +0100 Subject: [PATCH 22/22] fix annotation --- pandas/io/json/_normalize.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 2a7210fa56ff0..6e68c1cf5e27e 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -3,7 +3,7 @@ from collections import defaultdict import copy -from typing import DefaultDict, Dict, Iterable, List, Optional, Union +from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union import numpy as np @@ -227,17 +227,19 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Scalar], spec: Union[List, str]) -> Scalar: + def _pull_field( + js: Dict[str, Any], spec: Union[List, str] + ) -> Union[Scalar, Iterable]: """Internal function to pull field""" result = js # type: ignore if isinstance(spec, list): for field in spec: - result = result[field] # type: ignore + result = result[field] else: - result = result[spec] # type: ignore + result = result[spec] return result - def _pull_records(js: Dict[str, Scalar], spec: Union[List, str]) -> Iterable: + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: """ Interal function to pull field for records, and similar to _pull_field, but require to return Iterable. And will raise error