diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 012fe47c476d1..ab23f0adcb958 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -31,6 +31,7 @@ Other enhancements - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) +- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 49f95430d9bb9..f784004487646 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -19,7 +19,10 @@ from pandas._libs.writers import convert_json_to_lines import pandas as pd -from pandas import DataFrame +from pandas import ( + DataFrame, + Series, +) if TYPE_CHECKING: from collections.abc import Iterable @@ -266,7 +269,7 @@ def _simple_json_normalize( def json_normalize( - data: dict | list[dict], + data: dict | list[dict] | Series, record_path: str | list | None = None, meta: str | list[str | list[str]] | None = None, meta_prefix: str | None = None, @@ -280,7 +283,7 @@ def json_normalize( Parameters ---------- - data : dict or list of dicts + data : dict, list of dicts, or Series of dicts Unserialized JSON objects. record_path : str or list of str, default None Path in each object to list of records. If not passed, data will be @@ -365,6 +368,26 @@ def json_normalize( 1 NaN Mark Reg 130 60 2 2.0 Faye Raker 130 60 + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] + >>> series = pd.Series(data, index=pd.Index(["a", "b", "c"])) + >>> pd.json_normalize(series) + id name fitness.height fitness.weight + a 1.0 Cole Volk 130 60 + b NaN Mark Reg 130 60 + c 2.0 Faye Raker 130 60 + >>> data = [ ... { ... "state": "Florida", @@ -455,6 +478,11 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: ) return result + if isinstance(data, Series): + index = data.index + else: + index = None + if isinstance(data, list) and not data: return DataFrame() elif isinstance(data, dict): @@ -477,7 +505,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: and record_prefix is None and max_level is None ): - return DataFrame(_simple_json_normalize(data, sep=sep)) + return DataFrame(_simple_json_normalize(data, sep=sep), index=index) if record_path is None: if any([isinstance(x, dict) for x in y.values()] for y in data): @@ -489,7 +517,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: # TODO: handle record value which are lists, at least error # reasonably data = nested_to_record(data, sep=sep, max_level=max_level) - return DataFrame(data) + return DataFrame(data, index=index) elif not isinstance(record_path, list): record_path = [record_path] @@ -564,4 +592,6 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: values[i] = val result[k] = values.repeat(lengths) + if index is not None: + result.index = index.repeat(lengths) return result diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 0f33883feba3a..d83e7b4641e88 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -561,6 +561,14 @@ def test_top_column_with_leading_underscore(self): tm.assert_frame_equal(result, expected) + def test_series_index(self, state_data): + idx = Index([7, 8]) + series = Series(state_data, index=idx) + result = json_normalize(series) + tm.assert_index_equal(result.index, idx) + result = json_normalize(series, "counties") + tm.assert_index_equal(result.index, idx.repeat([3, 2])) + class TestNestedToRecord: def test_flat_stays_flat(self): @@ -891,6 +899,7 @@ def test_series_non_zero_index(self): "elements.a": [1.0, np.nan, np.nan], "elements.b": [np.nan, 2.0, np.nan], "elements.c": [np.nan, np.nan, 3.0], - } + }, + index=[1, 2, 3], ) tm.assert_frame_equal(result, expected)