diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b8fba4d173ae7..be269dffada35 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -14,6 +14,7 @@ Generic, Literal, TypeVar, + final, overload, ) import warnings @@ -37,11 +38,11 @@ is_string_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ABCIndex from pandas import ( ArrowDtype, DataFrame, + Index, MultiIndex, Series, isna, @@ -1119,10 +1120,11 @@ class Parser: "us": 31536000000000, "ns": 31536000000000000, } + json: str def __init__( self, - json, + json: str, orient, dtype: DtypeArg | None = None, convert_axes: bool = True, @@ -1157,7 +1159,8 @@ def __init__( self.obj: DataFrame | Series | None = None self.dtype_backend = dtype_backend - def check_keys_split(self, decoded) -> None: + @final + def check_keys_split(self, decoded: dict) -> None: """ Checks that dict has only the appropriate keys for orient='split'. """ @@ -1166,6 +1169,7 @@ def check_keys_split(self, decoded) -> None: bad_keys_joined = ", ".join(bad_keys) raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") + @final def parse(self): self._parse() @@ -1179,6 +1183,7 @@ def parse(self): def _parse(self): raise AbstractMethodError(self) + @final def _convert_axes(self) -> None: """ Try to convert axes. @@ -1186,27 +1191,33 @@ def _convert_axes(self) -> None: obj = self.obj assert obj is not None # for mypy for axis_name in obj._AXIS_ORDERS: - new_axis, result = self._try_convert_data( + ax = obj._get_axis(axis_name) + ser = Series(ax, dtype=ax.dtype, copy=False) + new_ser, result = self._try_convert_data( name=axis_name, - data=obj._get_axis(axis_name), + data=ser, use_dtypes=False, convert_dates=True, + is_axis=True, ) if result: + new_axis = Index(new_ser, dtype=new_ser.dtype, copy=False) setattr(self.obj, axis_name, new_axis) def _try_convert_types(self): raise AbstractMethodError(self) + @final def _try_convert_data( self, name: Hashable, - data, + data: Series, use_dtypes: bool = True, convert_dates: bool | list[str] = True, - ): + is_axis: bool = False, + ) -> tuple[Series, bool]: """ - Try to parse a ndarray like into a column by inferring dtype. + Try to parse a Series into a column by inferring dtype. """ # don't try to coerce, unless a force conversion if use_dtypes: @@ -1242,7 +1253,7 @@ def _try_convert_data( if result: return new_data, True - if self.dtype_backend is not lib.no_default and not isinstance(data, ABCIndex): + if self.dtype_backend is not lib.no_default and not is_axis: # Fall through for conversion later on return data, True elif is_string_dtype(data.dtype): @@ -1285,7 +1296,8 @@ def _try_convert_data( return data, True - def _try_convert_to_date(self, data): + @final + def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: """ Try to parse a ndarray like into a date column. @@ -1335,13 +1347,11 @@ def _try_convert_to_date(self, data): return new_data, True return data, False - def _try_convert_dates(self): - raise AbstractMethodError(self) - class SeriesParser(Parser): _default_orient = "index" _split_keys = ("name", "index", "data") + obj: Series | None def _parse(self) -> None: data = ujson_loads(self.json, precise_float=self.precise_float) @@ -1366,6 +1376,7 @@ def _try_convert_types(self) -> None: class FrameParser(Parser): _default_orient = "columns" _split_keys = ("columns", "index", "data") + obj: DataFrame | None def _parse(self) -> None: json = self.json @@ -1403,12 +1414,16 @@ def _parse(self) -> None: ujson_loads(json, precise_float=self.precise_float), dtype=None ) - def _process_converter(self, f, filt=None) -> None: + def _process_converter( + self, + f: Callable[[Hashable, Series], tuple[Series, bool]], + filt: Callable[[Hashable], bool] | None = None, + ) -> None: """ Take a conversion function and possibly recreate the frame. """ if filt is None: - filt = lambda col, c: True + filt = lambda col: True obj = self.obj assert obj is not None # for mypy @@ -1416,7 +1431,7 @@ def _process_converter(self, f, filt=None) -> None: needs_new_obj = False new_obj = {} for i, (col, c) in enumerate(obj.items()): - if filt(col, c): + if filt(col): new_data, result = f(col, c) if result: c = new_data @@ -1453,6 +1468,10 @@ def is_ok(col) -> bool: """ Return if this col is ok to try for a date parse. """ + if col in convert_dates: + return True + if not self.keep_default_dates: + return False if not isinstance(col, str): return False @@ -1467,9 +1486,4 @@ def is_ok(col) -> bool: return True return False - self._process_converter( - lambda col, c: self._try_convert_to_date(c), - lambda col, c: ( - (self.keep_default_dates and is_ok(col)) or col in convert_dates - ), - ) + self._process_converter(lambda col, c: self._try_convert_to_date(c), filt=is_ok)