From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 21 Dec 2021 13:39:55 -0500 Subject: [PATCH 1/7] fix column_arrays for array manager --- pandas/core/internals/array_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 09f16a2ddab67..06849bffff5ca 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]: """ Used in the JSON C code to access column arrays. """ - return self.arrays + + def convert_array(arr: ArrayLike) -> ArrayLike: + if isinstance(arr, ExtensionArray): + return arr.to_numpy() + else: + return arr + + return [convert_array(arr) for arr in self.arrays] def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False From 84b119faea9b3409c5f1a1afe388b77bfff29e45 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 23 Feb 2022 07:24:01 -0500 Subject: [PATCH 2/7] remove dead code in arrays/interval.py --- pandas/core/arrays/interval.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d23910c37b52b..d809ad90ad1b5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,12 +1664,8 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) - else: - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 64fedddc8d03f8b6ebfad6ce5827c9d778f0a1b7 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 25 Feb 2022 14:51:00 -0500 Subject: [PATCH 3/7] Undo Revert "remove dead code in arrays/interval.py" This reverts commit 84b119faea9b3409c5f1a1afe388b77bfff29e45. --- pandas/core/arrays/interval.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d809ad90ad1b5..d23910c37b52b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,8 +1664,12 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + if needs_i8_conversion(dtype): + new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) + new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + else: + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 8801db6734cac60dc0622fb8ec4310b5675da214 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 8 Mar 2022 14:39:19 -0500 Subject: [PATCH 4/7] add acceptable types to pd.to_datetime --- pandas/core/tools/datetimes.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 21683ed100c72..6dad66f0e6714 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -7,6 +7,7 @@ from typing import ( TYPE_CHECKING, Callable, + Dict, Hashable, List, Tuple, @@ -79,7 +80,10 @@ if TYPE_CHECKING: from pandas._libs.tslibs.nattype import NaTType - from pandas import Series + from pandas import ( + DataFrame, + Series, + ) # --------------------------------------------------------------------- # types used in annotations @@ -89,6 +93,7 @@ DatetimeScalar = Union[Scalar, datetime] DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] +DictConvertible = Union[Dict[str, List], "DataFrame"] start_caching_at = 50 @@ -646,7 +651,7 @@ def to_datetime( @overload def to_datetime( - arg: Series, + arg: Series | DictConvertible, errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., @@ -663,7 +668,7 @@ def to_datetime( @overload def to_datetime( - arg: list | tuple | np.ndarray, + arg: list | tuple | np.ndarray | Index | ExtensionArray, errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., @@ -679,7 +684,7 @@ def to_datetime( def to_datetime( - arg: DatetimeScalarOrArrayConvertible, + arg: DatetimeScalarOrArrayConvertible | DictConvertible, errors: str = "raise", dayfirst: bool = False, yearfirst: bool = False, @@ -1067,10 +1072,10 @@ def to_datetime( # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray, # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" - arg = cast( + argc = cast( Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg ) - cache_array = _maybe_cache(arg, format, cache, convert_listlike) + cache_array = _maybe_cache(argc, format, cache, convert_listlike) except OutOfBoundsDatetime: # caching attempts to create a DatetimeIndex, which may raise # an OOB. If that's the desired behavior, then just reraise... @@ -1081,9 +1086,9 @@ def to_datetime( cache_array = Series([], dtype=object) # just an empty array if not cache_array.empty: - result = _convert_and_box_cache(arg, cache_array) + result = _convert_and_box_cache(argc, cache_array) else: - result = convert_listlike(arg, format) + result = convert_listlike(argc, format) else: result = convert_listlike(np.array([arg]), format)[0] if isinstance(arg, bool) and isinstance(result, np.bool_): From ff7586f66e66733d8fbe91f331dc90d34e7d7c9f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 8 Mar 2022 15:08:06 -0500 Subject: [PATCH 5/7] to_datetime on scalar returns Timestamp in overload --- pandas/core/tools/datetimes.py | 2 +- pandas/io/excel/_odfreader.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6dad66f0e6714..21ffe22512f41 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -645,7 +645,7 @@ def to_datetime( infer_datetime_format: bool = ..., origin=..., cache: bool = ..., -) -> Timestamp | NaTType: +) -> Timestamp: ... diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 856ce52a6d6b6..2d4aa993d95ba 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -214,9 +214,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: cell_value = cell.attributes.get((OFFICENS, "date-value")) return pd.to_datetime(cell_value) elif cell_type == "time": - # cast needed because `pd.to_datetime can return NaTType, - # but we know this is a valid time - stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) + stamp = pd.to_datetime(str(cell)) # cast needed here because Scalar doesn't include datetime.time return cast(Scalar, stamp.time()) else: From b451ca836dacfb0247d682564b30197c51218a32 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 9 Mar 2022 17:41:28 -0500 Subject: [PATCH 6/7] add specifics for TypedDict arg --- pandas/core/tools/datetimes.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 21ffe22512f41..a7652ba2b3d03 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -7,10 +7,10 @@ from typing import ( TYPE_CHECKING, Callable, - Dict, Hashable, List, Tuple, + TypedDict, Union, cast, overload, @@ -88,12 +88,34 @@ # --------------------------------------------------------------------- # types used in annotations -ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"] +ArrayConvertible = Union[List, Tuple, AnyArrayLike] Scalar = Union[int, float, str] DatetimeScalar = Union[Scalar, datetime] DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] -DictConvertible = Union[Dict[str, List], "DataFrame"] + +DatetimeDictArg = Union[List[Scalar], Tuple[Scalar, ...], AnyArrayLike] + + +class YearMonthDayDict(TypedDict, total=True): + year: DatetimeDictArg + month: DatetimeDictArg + day: DatetimeDictArg + + +class FulldatetimeDict(YearMonthDayDict, total=False): + hour: DatetimeDictArg + hours: DatetimeDictArg + minute: DatetimeDictArg + minutes: DatetimeDictArg + second: DatetimeDictArg + seconds: DatetimeDictArg + ms: DatetimeDictArg + us: DatetimeDictArg + ns: DatetimeDictArg + + +DictConvertible = Union[FulldatetimeDict, "DataFrame"] start_caching_at = 50 From 52836e64aaeb17cc5c5b7058b435454fe3b02fda Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 11 Mar 2022 13:35:38 -0500 Subject: [PATCH 7/7] use ArrayLike --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a7652ba2b3d03..3ff6e7f09b72a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -690,7 +690,7 @@ def to_datetime( @overload def to_datetime( - arg: list | tuple | np.ndarray | Index | ExtensionArray, + arg: list | tuple | Index | ArrayLike, errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ...,