diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index e6cd94b3d9218..4d8684ed2a0b0 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -38,7 +38,10 @@ from pandas._libs.tslibs.conversion cimport ( cast_from_unit, precision_from_unit, ) -from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev +from pandas._libs.tslibs.dtypes cimport ( + get_supported_reso, + npy_unit_to_abbrev, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -939,6 +942,7 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): cdef: _Timedelta td_base + assert value != NPY_NAT # For millisecond and second resos, we cannot actually pass int(value) because # many cases would fall outside of the pytimedelta implementation bounds. # We pass 0 instead, and override seconds, microseconds, days. @@ -1704,10 +1708,27 @@ class Timedelta(_Timedelta): elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): - if get_timedelta64_value(value) == NPY_NAT: + # Retain the resolution if possible, otherwise cast to the nearest + # supported resolution. + new_value = get_timedelta64_value(value) + if new_value == NPY_NAT: # i.e. np.timedelta64("NaT") return NaT - value = ensure_td64ns(value) + + reso = get_datetime64_unit(value) + new_reso = get_supported_reso(reso) + if reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC: + try: + new_value = convert_reso( + get_timedelta64_value(value), + reso, + new_reso, + round_ok=True, + ) + except (OverflowError, OutOfBoundsDatetime) as err: + raise OutOfBoundsTimedelta(value) from err + return cls._from_value_and_reso(new_value, reso=new_reso) + elif is_tick_object(value): value = np.timedelta64(value.nanos, 'ns') elif is_integer_object(value) or is_float_object(value): @@ -1917,9 +1938,15 @@ class Timedelta(_Timedelta): if other.dtype.kind == 'm': # also timedelta-like - if self._reso != NPY_FR_ns: - raise NotImplementedError - return _broadcast_floordiv_td64(self.value, other, _floordiv) + # TODO: could suppress + # RuntimeWarning: invalid value encountered in floor_divide + result = self.asm8 // other + mask = other.view("i8") == NPY_NAT + if mask.any(): + # We differ from numpy here + result = result.astype("f8") + result[mask] = np.nan + return result elif other.dtype.kind in ['i', 'u', 'f']: if other.ndim == 0: @@ -1951,9 +1978,15 @@ class Timedelta(_Timedelta): if other.dtype.kind == 'm': # also timedelta-like - if self._reso != NPY_FR_ns: - raise NotImplementedError - return _broadcast_floordiv_td64(self.value, other, _rfloordiv) + # TODO: could suppress + # RuntimeWarning: invalid value encountered in floor_divide + result = other // self.asm8 + mask = other.view("i8") == NPY_NAT + if mask.any(): + # We differ from numpy here + result = result.astype("f8") + result[mask] = np.nan + return result # Includes integer array // Timedelta, disallowed in GH#19761 raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') @@ -2003,45 +2036,3 @@ cdef bint _should_cast_to_timedelta(object obj): return ( is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str) ) - - -cdef _floordiv(int64_t value, right): - return value // right - - -cdef _rfloordiv(int64_t value, right): - # analogous to referencing operator.div, but there is no operator.rfloordiv - return right // value - - -cdef _broadcast_floordiv_td64( - int64_t value, - ndarray other, - object (*operation)(int64_t value, object right) -): - """ - Boilerplate code shared by Timedelta.__floordiv__ and - Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. - - Parameters - ---------- - value : int64_t; `self.value` from a Timedelta object - other : ndarray[timedelta64[ns]] - operation : function, either _floordiv or _rfloordiv - - Returns - ------- - result : varies based on `other` - """ - # assumes other.dtype.kind == 'm', i.e. other is timedelta-like - # assumes other.ndim != 0 - - # We need to watch out for np.timedelta64('NaT'). - mask = other.view('i8') == NPY_NAT - - res = operation(value, other.astype('m8[ns]', copy=False).astype('i8')) - - if mask.any(): - res = res.astype('f8') - res[mask] = np.nan - return res diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 4d5286e7364f5..48679a8355837 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -3,6 +3,10 @@ import numpy as np from pandas._libs import lib +from pandas._libs.tslibs import ( + get_unit_from_dtype, + is_supported_unit, +) from pandas._typing import ( AxisInt, Dtype, @@ -439,10 +443,12 @@ def _cmp_method(self, other, op): def _wrap_ndarray_result(self, result: np.ndarray): # If we have timedelta64[ns] result, return a TimedeltaArray instead # of a PandasArray - if result.dtype == "timedelta64[ns]": + if result.dtype.kind == "m" and is_supported_unit( + get_unit_from_dtype(result.dtype) + ): from pandas.core.arrays import TimedeltaArray - return TimedeltaArray._simple_new(result) + return TimedeltaArray._simple_new(result, dtype=result.dtype) return type(self)(result) # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4c3e790c2879b..2c97ce2fce242 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -284,6 +284,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") self._check_compatible_with(value, setitem=setitem) + if value is NaT: + return np.timedelta64(value.value, "ns") + else: + return value._as_unit(self._unit).asm8 return np.timedelta64(value.value, "ns") def _scalar_from_string(self, value) -> Timedelta | NaTType: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 297febe724019..704c285197456 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -134,7 +134,8 @@ def _calculate_deltas( _times = np.asarray( times.view(np.int64), dtype=np.float64 # type: ignore[union-attr] ) - _halflife = float(Timedelta(halflife).value) + # TODO: generalize to non-nano? + _halflife = float(Timedelta(halflife)._as_unit("ns").value) return np.diff(_times) / _halflife diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 0cb09ba6a4dfb..89330424e754c 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -204,6 +204,11 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array): box = box_with_array index = numeric_idx expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))]) + if isinstance(scalar_td, np.timedelta64) and box not in [Index, Series]: + # TODO(2.0): once TDA.astype converts to m8, just do expected.astype + tda = expected._data + dtype = scalar_td.dtype + expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) @@ -249,6 +254,14 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array index = numeric_idx[1:3] expected = TimedeltaIndex(["3 Days", "36 Hours"]) + if isinstance(three_days, np.timedelta64) and box not in [Index, Series]: + # TODO(2.0): just use expected.astype + tda = expected._data + dtype = three_days.dtype + if dtype < np.dtype("m8[s]"): + # i.e. resolution is lower -> use lowest supported resolution + dtype = np.dtype("m8[s]") + expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 02bd03f5ea266..c07e0a187babe 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -463,7 +463,9 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_r [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], ) -def test_maybe_promote_any_with_timedelta64(any_numpy_dtype_reduced, fill_value): +def test_maybe_promote_any_with_timedelta64( + any_numpy_dtype_reduced, fill_value, request +): dtype = np.dtype(any_numpy_dtype_reduced) # filling anything but timedelta with timedelta casts to object @@ -471,6 +473,13 @@ def test_maybe_promote_any_with_timedelta64(any_numpy_dtype_reduced, fill_value) expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + + if isinstance(fill_value, np.timedelta64) and fill_value.dtype != "m8[ns]": + mark = pytest.mark.xfail( + reason="maybe_promote not yet updated to handle non-nano " + "Timedelta scalar" + ) + request.node.add_marker(mark) else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 37e08adcfdf88..02313e429f3b6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -856,16 +856,31 @@ def create_data(constructor): tm.assert_frame_equal(result_datetime, expected) tm.assert_frame_equal(result_Timestamp, expected) - def test_constructor_dict_timedelta64_index(self): + @pytest.mark.parametrize( + "klass", + [ + pytest.param( + np.timedelta64, + marks=pytest.mark.xfail( + reason="hash mismatch (GH#44504) causes lib.fast_multiget " + "to mess up on dict lookups with equal Timedeltas with " + "mismatched resos" + ), + ), + timedelta, + Timedelta, + ], + ) + def test_constructor_dict_timedelta64_index(self, klass): # GH 10160 td_as_int = [1, 2, 3, 4] - def create_data(constructor): - return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} + if klass is timedelta: + constructor = lambda x: timedelta(days=x) + else: + constructor = lambda x: klass(x, "D") - data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D")) - data_timedelta = create_data(lambda x: timedelta(days=x)) - data_Timedelta = create_data(lambda x: Timedelta(x, "D")) + data = {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} expected = DataFrame( [ @@ -877,12 +892,8 @@ def create_data(constructor): index=[Timedelta(td, "D") for td in td_as_int], ) - result_timedelta64 = DataFrame(data_timedelta64) - result_timedelta = DataFrame(data_timedelta) - result_Timedelta = DataFrame(data_Timedelta) - tm.assert_frame_equal(result_timedelta64, expected) - tm.assert_frame_equal(result_timedelta, expected) - tm.assert_frame_equal(result_Timedelta, expected) + result = DataFrame(data) + tm.assert_frame_equal(result, expected) def test_constructor_period_dict(self): # PeriodIndex @@ -3111,14 +3122,34 @@ def test_from_out_of_bounds_datetime(self, constructor, cls): assert type(get1(result)) is cls + @pytest.mark.xfail( + reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, " + "but TimedeltaArray._from_sequence has not" + ) @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) - def test_from_out_of_bounds_timedelta(self, constructor, cls): + def test_from_out_of_bounds_ns_timedelta(self, constructor, cls): + # scalar that won't fit in nanosecond td64, but will fit in microsecond scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1) + exp_dtype = "m8[us]" # smallest reso that fits if cls is np.timedelta64: scalar = np.timedelta64(scalar, "D") + exp_dtype = "m8[s]" # closest reso to input result = constructor(scalar) - assert type(get1(result)) is cls + item = get1(result) + dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + + assert type(item) is Timedelta + assert item.asm8.dtype == exp_dtype + assert dtype == exp_dtype + + def test_out_of_s_bounds_timedelta64(self, constructor): + scalar = np.timedelta64(np.iinfo(np.int64).max, "D") + result = constructor(scalar) + item = get1(result) + assert type(item) is np.timedelta64 + dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + assert dtype == object def test_tzaware_data_tznaive_dtype(self, constructor): tz = "US/Eastern" diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 9e1cac4bd2627..1dd416839c08d 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -5,6 +5,7 @@ import pytest from pandas._libs.tslibs import OutOfBoundsTimedelta +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import ( NaT, @@ -26,18 +27,40 @@ def test_construct_with_weeks_unit_overflow(): def test_construct_from_td64_with_unit(): # ignore the unit, as it may cause silently overflows leading to incorrect # results, and in non-overflow cases is irrelevant GH#46827 - obj = np.timedelta64(123456789, "h") + obj = np.timedelta64(123456789000000000, "h") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): + with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): Timedelta(obj, unit="ps") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): + with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): Timedelta(obj, unit="ns") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): + with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): Timedelta(obj) +def test_from_td64_retain_resolution(): + # case where we retain millisecond resolution + obj = np.timedelta64(12345, "ms") + + td = Timedelta(obj) + assert td.value == obj.view("i8") + assert td._reso == NpyDatetimeUnit.NPY_FR_ms.value + + # Case where we cast to nearest-supported reso + obj2 = np.timedelta64(1234, "D") + td2 = Timedelta(obj2) + assert td2._reso == NpyDatetimeUnit.NPY_FR_s.value + assert td2 == obj2 + assert td2.days == 1234 + + # Case that _would_ overflow if we didn't support non-nano + obj3 = np.timedelta64(1000000000000000000, "us") + td3 = Timedelta(obj3) + assert td3.total_seconds() == 1000000000000 + assert td3._reso == NpyDatetimeUnit.NPY_FR_us.value + + def test_construction(): expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") assert Timedelta(10, unit="d").value == expected @@ -231,17 +254,17 @@ def test_overflow_on_construction(): @pytest.mark.parametrize( - "val, unit, name", + "val, unit", [ - (3508, "M", " months"), - (15251, "W", " weeks"), # 1 - (106752, "D", " days"), # change from previous: - (2562048, "h", " hours"), # 0 hours - (153722868, "m", " minutes"), # 13 minutes - (9223372037, "s", " seconds"), # 44 seconds + (3508, "M"), + (15251, "W"), # 1 + (106752, "D"), # change from previous: + (2562048, "h"), # 0 hours + (153722868, "m"), # 13 minutes + (9223372037, "s"), # 44 seconds ], ) -def test_construction_out_of_bounds_td64(val, unit, name): +def test_construction_out_of_bounds_td64ns(val, unit): # TODO: parametrize over units just above/below the implementation bounds # once GH#38964 is resolved @@ -249,9 +272,15 @@ def test_construction_out_of_bounds_td64(val, unit, name): td64 = np.timedelta64(val, unit) assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong - msg = str(val) + name + td = Timedelta(td64) + if unit != "M": + # with unit="M" the conversion to "s" is poorly defined + # (and numpy issues DeprecationWarning) + assert td.asm8 == td64 + assert td.asm8.dtype == "m8[s]" + msg = r"Cannot cast 1067\d\d days .* to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(td64) + td._as_unit("ns") # But just back in bounds and we are OK assert Timedelta(td64 - 1) == td64 - 1 @@ -259,13 +288,34 @@ def test_construction_out_of_bounds_td64(val, unit, name): td64 *= -1 assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong - with pytest.raises(OutOfBoundsTimedelta, match="-" + msg): - Timedelta(td64) + td2 = Timedelta(td64) + msg = r"Cannot cast -1067\d\d days .* to unit='ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + td2._as_unit("ns") # But just back in bounds and we are OK assert Timedelta(td64 + 1) == td64 + 1 +@pytest.mark.parametrize( + "val, unit", + [ + (3508 * 10**9, "M"), + (15251 * 10**9, "W"), + (106752 * 10**9, "D"), + (2562048 * 10**9, "h"), + (153722868 * 10**9, "m"), + ], +) +def test_construction_out_of_bounds_td64s(val, unit): + td64 = np.timedelta64(val, unit) + with pytest.raises(OutOfBoundsTimedelta, match=str(td64)): + Timedelta(td64) + + # But just back in bounds and we are OK + assert Timedelta(td64 - 10**9) == td64 - 10**9 + + @pytest.mark.parametrize( "fmt,exp", [