From cbf6620139c574c62ab8753165b2464227042647 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Dec 2023 11:46:32 -0800 Subject: [PATCH 1/2] TYP: _json.py --- pandas/io/json/_json.py | 53 +++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 9c56089560507..dac1964bda278 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -14,6 +14,7 @@ Generic, Literal, TypeVar, + final, overload, ) import warnings @@ -42,6 +43,7 @@ from pandas import ( ArrowDtype, DataFrame, + Index, MultiIndex, Series, isna, @@ -1126,10 +1128,11 @@ class Parser: "us": 31536000000000, "ns": 31536000000000000, } + json: str def __init__( self, - json, + json: str, orient, dtype: DtypeArg | None = None, convert_axes: bool = True, @@ -1164,7 +1167,8 @@ def __init__( self.obj: DataFrame | Series | None = None self.dtype_backend = dtype_backend - def check_keys_split(self, decoded) -> None: + @final + def check_keys_split(self, decoded: dict) -> None: """ Checks that dict has only the appropriate keys for orient='split'. """ @@ -1173,6 +1177,7 @@ def check_keys_split(self, decoded) -> None: bad_keys_joined = ", ".join(bad_keys) raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") + @final def parse(self): self._parse() @@ -1186,6 +1191,7 @@ def parse(self): def _parse(self): raise AbstractMethodError(self) + @final def _convert_axes(self) -> None: """ Try to convert axes. @@ -1193,27 +1199,31 @@ def _convert_axes(self) -> None: obj = self.obj assert obj is not None # for mypy for axis_name in obj._AXIS_ORDERS: - new_axis, result = self._try_convert_data( + ax = obj._get_axis(axis_name) + ser = Series(ax, copy=False) + new_ser, result = self._try_convert_data( name=axis_name, - data=obj._get_axis(axis_name), + data=ser, use_dtypes=False, convert_dates=True, ) if result: + new_axis = Index(new_ser, copy=False) setattr(self.obj, axis_name, new_axis) def _try_convert_types(self): raise AbstractMethodError(self) + @final def _try_convert_data( self, name: Hashable, - data, + data: Series, use_dtypes: bool = True, convert_dates: bool | list[str] = True, - ): + ) -> tuple[Series, bool]: """ - Try to parse a ndarray like into a column by inferring dtype. + Try to parse a Series into a column by inferring dtype. """ # don't try to coerce, unless a force conversion if use_dtypes: @@ -1292,7 +1302,8 @@ def _try_convert_data( return data, True - def _try_convert_to_date(self, data): + @final + def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: """ Try to parse a ndarray like into a date column. @@ -1342,13 +1353,11 @@ def _try_convert_to_date(self, data): return new_data, True return data, False - def _try_convert_dates(self): - raise AbstractMethodError(self) - class SeriesParser(Parser): _default_orient = "index" _split_keys = ("name", "index", "data") + obj: Series | None def _parse(self) -> None: data = ujson_loads(self.json, precise_float=self.precise_float) @@ -1373,6 +1382,7 @@ def _try_convert_types(self) -> None: class FrameParser(Parser): _default_orient = "columns" _split_keys = ("columns", "index", "data") + obj: DataFrame | None def _parse(self) -> None: json = self.json @@ -1410,12 +1420,16 @@ def _parse(self) -> None: ujson_loads(json, precise_float=self.precise_float), dtype=None ) - def _process_converter(self, f, filt=None) -> None: + def _process_converter( + self, + f: Callable[[Hashable, Series], tuple[Series, bool]], + filt: Callable[[Hashable], bool] | None = None, + ) -> None: """ Take a conversion function and possibly recreate the frame. """ if filt is None: - filt = lambda col, c: True + filt = lambda col: True obj = self.obj assert obj is not None # for mypy @@ -1423,7 +1437,7 @@ def _process_converter(self, f, filt=None) -> None: needs_new_obj = False new_obj = {} for i, (col, c) in enumerate(obj.items()): - if filt(col, c): + if filt(col): new_data, result = f(col, c) if result: c = new_data @@ -1460,6 +1474,10 @@ def is_ok(col) -> bool: """ Return if this col is ok to try for a date parse. """ + if col in convert_dates: + return True + if not self.keep_default_dates: + return False if not isinstance(col, str): return False @@ -1474,9 +1492,4 @@ def is_ok(col) -> bool: return True return False - self._process_converter( - lambda col, c: self._try_convert_to_date(c), - lambda col, c: ( - (self.keep_default_dates and is_ok(col)) or col in convert_dates - ), - ) + self._process_converter(lambda col, c: self._try_convert_to_date(c), filt=is_ok) From 8663e269a936dbb78b88d58fa25e2d232061b51b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 09:24:05 -0800 Subject: [PATCH 2/2] fix index special-casing --- pandas/io/json/_json.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index afcf06b8fcea8..be269dffada35 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -38,7 +38,6 @@ is_string_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ABCIndex from pandas import ( ArrowDtype, @@ -1193,15 +1192,16 @@ def _convert_axes(self) -> None: assert obj is not None # for mypy for axis_name in obj._AXIS_ORDERS: ax = obj._get_axis(axis_name) - ser = Series(ax, copy=False) + ser = Series(ax, dtype=ax.dtype, copy=False) new_ser, result = self._try_convert_data( name=axis_name, data=ser, use_dtypes=False, convert_dates=True, + is_axis=True, ) if result: - new_axis = Index(new_ser, copy=False) + new_axis = Index(new_ser, dtype=new_ser.dtype, copy=False) setattr(self.obj, axis_name, new_axis) def _try_convert_types(self): @@ -1214,6 +1214,7 @@ def _try_convert_data( data: Series, use_dtypes: bool = True, convert_dates: bool | list[str] = True, + is_axis: bool = False, ) -> tuple[Series, bool]: """ Try to parse a Series into a column by inferring dtype. @@ -1252,7 +1253,7 @@ def _try_convert_data( if result: return new_data, True - if self.dtype_backend is not lib.no_default and not isinstance(data, ABCIndex): + if self.dtype_backend is not lib.no_default and not is_axis: # Fall through for conversion later on return data, True elif is_string_dtype(data.dtype):