From 6e572e06b089c4cfa5ccba2f92af0d7abc4a1347 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Dec 2022 10:06:57 -0800 Subject: [PATCH] BUG: Series[object].astype(td64_unsupported) --- pandas/core/dtypes/astype.py | 25 ++++++++++++++++++----- pandas/tests/frame/methods/test_astype.py | 16 +++++++++++++++ pandas/tests/reshape/merge/test_merge.py | 12 +++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 53c2cfd345e32..57af8c22f76d3 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -7,6 +7,7 @@ import inspect from typing import ( TYPE_CHECKING, + cast, overload, ) @@ -36,7 +37,11 @@ from pandas.core.dtypes.missing import isna if TYPE_CHECKING: - from pandas.core.arrays import ExtensionArray + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, + ) _dtype_obj = np.dtype(object) @@ -109,7 +114,11 @@ def astype_nansafe( # allow frequency conversions if dtype.kind == "M": - return arr.astype(dtype) + from pandas.core.construction import ensure_wrapped_if_datetimelike + + dta = ensure_wrapped_if_datetimelike(arr) + dta = cast("DatetimeArray", dta) + return dta.astype(dtype, copy=copy)._ndarray raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") @@ -124,8 +133,9 @@ def astype_nansafe( # and doing the old convert-to-float behavior otherwise. from pandas.core.construction import ensure_wrapped_if_datetimelike - arr = ensure_wrapped_if_datetimelike(arr) - return arr.astype(dtype, copy=copy) + tda = ensure_wrapped_if_datetimelike(arr) + tda = cast("TimedeltaArray", tda) + return tda.astype(dtype, copy=copy)._ndarray raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") @@ -145,10 +155,15 @@ def astype_nansafe( return dta.astype(dtype, copy=False)._ndarray elif is_timedelta64_dtype(dtype): + from pandas.core.construction import ensure_wrapped_if_datetimelike + # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that # does not require a circular import. - return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) + tdvals = array_to_timedelta64(arr).view("m8[ns]") + + tda = ensure_wrapped_if_datetimelike(tdvals) + return tda.astype(dtype, copy=False)._ndarray if dtype.name in ("datetime64", "timedelta64"): msg = ( diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 9d56dba9b480d..4c74f4782578e 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -387,6 +387,22 @@ def test_astype_from_object_to_datetime_unit(self, unit): with pytest.raises(TypeError, match="Cannot cast"): df.astype(f"M8[{unit}]") + @pytest.mark.parametrize("unit", ["Y", "M", "W", "D", "h", "m"]) + def test_astype_from_object_to_timedelta_unit(self, unit): + vals = [ + ["1 Day", "2 Days", "3 Days"], + ["4 Days", "5 Days", "6 Days"], + ] + df = DataFrame(vals, dtype=object) + msg = ( + r"Cannot convert from timedelta64\[ns\] to timedelta64\[.*\]. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) + with pytest.raises(ValueError, match=msg): + # TODO: this is ValueError while for DatetimeArray it is TypeError; + # get these consistent + df.astype(f"m8[{unit}]") + @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_from_datetimelike_to_object(self, dtype, unit): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index ab589dc26a3ac..f9d4d4fdc19e7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -757,14 +757,18 @@ def test_other_datetime_unit(self, unit): def test_other_timedelta_unit(self, unit): # GH 13389 df1 = DataFrame({"entity_id": [101, 102]}) - s = Series([None, None], index=[101, 102], name="days") + ser = Series([None, None], index=[101, 102], name="days") dtype = f"m8[{unit}]" - df2 = s.astype(dtype).to_frame("days") if unit in ["D", "h", "m"]: - # We get nearest supported unit, i.e. "s" - assert df2["days"].dtype == "m8[s]" + # We cannot astype, instead do nearest supported unit, i.e. "s" + msg = "Supported resolutions are 's', 'ms', 'us', 'ns'" + with pytest.raises(ValueError, match=msg): + ser.astype(dtype) + + df2 = ser.astype("m8[s]").to_frame("days") else: + df2 = ser.astype(dtype).to_frame("days") assert df2["days"].dtype == dtype result = df1.merge(df2, left_on="entity_id", right_index=True)