diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 7cbc1833093ba..b3a006141fadc 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -25,6 +25,7 @@ "Tick", "BaseOffset", "tz_compare", + "is_unitless", ] from pandas._libs.tslibs import dtypes @@ -39,6 +40,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, + is_unitless, ) from pandas._libs.tslibs.offsets import ( BaseOffset, diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index 59f4427125266..27871a78f8aaf 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -9,3 +9,4 @@ def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ... def astype_overflowsafe( arr: np.ndarray, dtype: np.dtype, copy: bool = ... ) -> np.ndarray: ... +def is_unitless(dtype: np.dtype) -> bool: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 07d198193464f..a787bded2bd50 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -92,6 +92,18 @@ def py_get_unit_from_dtype(dtype): return get_unit_from_dtype(dtype) +def is_unitless(dtype: cnp.dtype) -> bool: + """ + Check if a datetime64 or timedelta64 dtype has no attached unit. + """ + if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]: + raise ValueError("is_unitless dtype must be datetime64 or timedelta64") + cdef: + NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype) + + return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + + # ---------------------------------------------------------------------- # Comparison diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index a04104915cf1f..c547503bae273 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -88,6 +88,8 @@ class Timedelta(timedelta): # GH 46171 # While Timedelta can return pd.NaT, having the constructor return # a Union with NaTType makes things awkward for users of pandas + @classmethod + def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ... @property def days(self) -> int: ... @property diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e7f97413f6881..7fef934a85626 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -31,6 +31,7 @@ iNaT, ints_to_pydatetime, is_date_array_normalized, + is_unitless, normalize_i8_timestamps, timezones, to_offset, @@ -335,7 +336,12 @@ def _simple_new( # type: ignore[override] cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=DT64NS_DTYPE ) -> DatetimeArray: assert isinstance(values, np.ndarray) - assert values.dtype == DT64NS_DTYPE + assert dtype.kind == "M" + if isinstance(dtype, np.dtype): + # TODO: once non-nano DatetimeTZDtype is implemented, require that + # dtype's reso match values's reso + assert dtype == values.dtype + assert not is_unitless(dtype) result = super()._simple_new(values, dtype) result._freq = freq @@ -761,7 +767,7 @@ def _add_offset(self, offset) -> DatetimeArray: else: values = self result = offset._apply_array(values).view("M8[ns]") - result = DatetimeArray._simple_new(result) + result = DatetimeArray._simple_new(result, dtype=result.dtype) result = result.tz_localize(self.tz) except NotImplementedError: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1f55842050df0..816f07b076ef8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -29,6 +29,7 @@ precision_from_unit, ) from pandas._libs.tslibs.fields import get_timedelta_field +from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, ints_to_pytimedelta, @@ -40,6 +41,7 @@ npt, ) from pandas.compat.numpy import function as nv +from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_endpoints from pandas.core.dtypes.astype import astype_td64_unit_conversion @@ -154,8 +156,15 @@ class TimedeltaArray(dtl.TimelikeOps): # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray) # operates pointwise. + @cache_readonly + def _reso(self): + return py_get_unit_from_dtype(self.dtype) + def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType: - return Timedelta(x, unit="ns") + y = x.view("i8") + if y == NaT.value: + return NaT + return Timedelta._from_value_and_reso(y, reso=self._reso) @property # error: Return type "dtype" of "dtype" incompatible with return type @@ -174,7 +183,7 @@ def dtype(self) -> np.dtype: # type: ignore[override] ------- numpy.dtype """ - return TD64NS_DTYPE + return self._ndarray.dtype # ---------------------------------------------------------------- # Constructors @@ -244,11 +253,13 @@ def __init__( def _simple_new( # type: ignore[override] cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE ) -> TimedeltaArray: - assert dtype == TD64NS_DTYPE, dtype + # Require td64 dtype, not unit-less, matching values.dtype + assert isinstance(dtype, np.dtype) and dtype.kind == "m" + assert not tslibs.is_unitless(dtype) assert isinstance(values, np.ndarray), type(values) - assert values.dtype == TD64NS_DTYPE + assert dtype == values.dtype - result = super()._simple_new(values=values, dtype=TD64NS_DTYPE) + result = super()._simple_new(values=values, dtype=dtype) result._freq = freq return result @@ -262,7 +273,7 @@ def _from_sequence( data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) - return cls._simple_new(data, freq=freq) + return cls._simple_new(data, dtype=data.dtype, freq=freq) @classmethod def _from_sequence_not_strict( @@ -286,7 +297,7 @@ def _from_sequence_not_strict( if explicit_none: freq = None - result = cls._simple_new(data, freq=freq) + result = cls._simple_new(data, dtype=data.dtype, freq=freq) if inferred_freq is None and freq is not None: # this condition precludes `freq_infer` @@ -330,7 +341,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None): if not right_closed: index = index[:-1] - return cls._simple_new(index.view("m8[ns]"), freq=freq) + td64values = index.view("m8[ns]") + return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq) # ---------------------------------------------------------------- # DatetimeLike Interface diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 9ea87be2a5468..8eb5cc2dd82f6 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -11,6 +11,17 @@ from pandas.core.arrays import DatetimeArray +class TestNonNano: + @pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)]) + @pytest.mark.xfail(reason="_box_func is not yet patched to get reso right") + def test_non_nano(self, unit, reso): + arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + + assert dta.dtype == arr.dtype + assert dta[0]._reso == reso + + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is # sufficiently robust diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index bf3491496ab3a..46306167878f6 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -7,6 +7,16 @@ from pandas.core.arrays import TimedeltaArray +class TestNonNano: + @pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)]) + def test_non_nano(self, unit, reso): + arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") + tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) + + assert tda.dtype == arr.dtype + assert tda[0]._reso == reso + + class TestTimedeltaArray: @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 9655bb88c2fcf..273a7985ff50b 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -50,6 +50,7 @@ def test_namespace(): "tz_convert_from_utc_single", "to_offset", "tz_compare", + "is_unitless", ] expected = set(submodules + api) diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py index 336c7d30d5f77..31f48b9ad7c4a 100644 --- a/pandas/tests/tslibs/test_np_datetime.py +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -4,6 +4,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, astype_overflowsafe, + is_unitless, py_get_unit_from_dtype, py_td64_to_tdstruct, ) @@ -11,6 +12,28 @@ import pandas._testing as tm +def test_is_unitless(): + dtype = np.dtype("M8[ns]") + assert not is_unitless(dtype) + + dtype = np.dtype("datetime64") + assert is_unitless(dtype) + + dtype = np.dtype("m8[ns]") + assert not is_unitless(dtype) + + dtype = np.dtype("timedelta64") + assert is_unitless(dtype) + + msg = "dtype must be datetime64 or timedelta64" + with pytest.raises(ValueError, match=msg): + is_unitless(np.dtype(np.int64)) + + msg = "Argument 'dtype' has incorrect type" + with pytest.raises(TypeError, match=msg): + is_unitless("foo") + + def test_get_unit_from_dtype(): # datetime64 assert py_get_unit_from_dtype(np.dtype("M8[Y]")) == 0