diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 73c93eb905ab2..68452ce011f9d 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -26,6 +26,7 @@ "BaseOffset", "tz_compare", "is_unitless", + "astype_overflowsafe", "get_unit_from_dtype", "periods_per_day", ] @@ -45,6 +46,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, + astype_overflowsafe, is_unitless, py_get_unit_from_dtype as get_unit_from_dtype, ) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5b7da7347a238..2ed1930b01555 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -193,8 +193,6 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: return ival -@cython.boundscheck(False) -@cython.wraparound(False) def ensure_datetime64ns(arr: ndarray, copy: bool = True): """ Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]' @@ -213,14 +211,6 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True): dtype = arr.dtype arr = arr.astype(dtype.newbyteorder("<")) - if arr.size == 0: - # Fastpath; doesn't matter but we have old tests for result.base - # being arr. - result = arr.view(DT64NS_DTYPE) - if copy: - result = result.copy() - return result - return astype_overflowsafe(arr, DT64NS_DTYPE, copy=copy) @@ -239,29 +229,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True): """ assert arr.dtype.kind == "m", arr.dtype - if arr.dtype == TD64NS_DTYPE: - return arr.copy() if copy else arr - - # Re-use the datetime64 machinery to do an overflow-safe `astype` - dtype = arr.dtype.str.replace("m8", "M8") - dummy = arr.view(dtype) - try: - dt64_result = ensure_datetime64ns(dummy, copy) - except OutOfBoundsDatetime as err: - # Re-write the exception in terms of timedelta64 instead of dt64 - - # Find the value that we are going to report as causing an overflow - tdmin = arr.min() - tdmax = arr.max() - if np.abs(tdmin) >= np.abs(tdmax): - bad_val = tdmin - else: - bad_val = tdmax - - msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}" - raise OutOfBoundsTimedelta(msg) - - return dt64_result.view(TD64NS_DTYPE) + return astype_overflowsafe(arr, dtype=TD64NS_DTYPE, copy=copy) # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index a787bded2bd50..337bedafac757 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -19,6 +19,7 @@ from cpython.object cimport ( import_datetime() +import numpy as np cimport numpy as cnp cnp.import_array() @@ -288,13 +289,21 @@ cpdef ndarray astype_overflowsafe( bint copy=True, ): """ - Convert an ndarray with datetime64[X] to datetime64[Y], raising on overflow. + Convert an ndarray with datetime64[X] to datetime64[Y] + or timedelta64[X] to timedelta64[Y], + raising on overflow. """ - if values.descr.type_num != cnp.NPY_DATETIME: - # aka values.dtype.kind != "M" - raise TypeError("astype_overflowsafe values must have datetime64 dtype") - if dtype.type_num != cnp.NPY_DATETIME: - raise TypeError("astype_overflowsafe dtype must be datetime64") + if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME: + # i.e. dtype.kind == "M" + pass + elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA: + # i.e. dtype.kind == "m" + pass + else: + raise TypeError( + "astype_overflowsafe values.dtype and dtype must be either " + "both-datetime64 or both-timedelta64." + ) cdef: NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype) @@ -306,7 +315,9 @@ cpdef ndarray astype_overflowsafe( ): # without raising explicitly here, we end up with a SystemError # built-in function [...] returned a result with an error - raise ValueError("datetime64 values and dtype must have a unit specified") + raise ValueError( + "datetime64/timedelta64 values and dtype must have a unit specified" + ) if from_unit == to_unit: # Check this before allocating result for perf, might save some memory @@ -314,6 +325,11 @@ cpdef ndarray astype_overflowsafe( return values.copy() return values + elif from_unit > to_unit: + # e.g. ns -> us, so there is no risk of overflow, so we can use + # numpy's astype safely. Note there _is_ risk of truncation. + return values.astype(dtype) + cdef: ndarray i8values = values.view("i8") @@ -326,6 +342,7 @@ cpdef ndarray astype_overflowsafe( Py_ssize_t i, N = values.size int64_t value, new_value npy_datetimestruct dts + bint is_td = dtype.type_num == cnp.NPY_TIMEDELTA for i in range(N): # Analogous to: item = values[i] @@ -335,7 +352,20 @@ cpdef ndarray astype_overflowsafe( new_value = NPY_DATETIME_NAT else: pandas_datetime_to_datetimestruct(value, from_unit, &dts) - check_dts_bounds(&dts, to_unit) + + try: + check_dts_bounds(&dts, to_unit) + except OutOfBoundsDatetime as err: + if is_td: + tdval = np.timedelta64(value).view(values.dtype) + msg = ( + "Cannot convert {tdval} to {dtype} without overflow" + .format(tdval=str(tdval), dtype=str(dtype)) + ) + raise OutOfBoundsTimedelta(msg) from err + else: + raise + new_value = npy_datetimestruct_to_datetime(to_unit, &dts) # Analogous to: iresult[i] = new_value diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f313d0d67c344..8c09152e0d2fc 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -19,6 +19,7 @@ NaT, NaTType, Timedelta, + astype_overflowsafe, delta_to_nanoseconds, dt64arr_to_periodarr as c_dt64arr_to_periodarr, iNaT, @@ -858,11 +859,10 @@ def _check_timedeltalike_freq_compat(self, other): elif isinstance(other, np.ndarray): # numpy timedelta64 array; all entries must be compatible assert other.dtype.kind == "m" - if other.dtype != TD64NS_DTYPE: - # i.e. non-nano unit - # TODO: disallow unit-less timedelta64 - other = other.astype(TD64NS_DTYPE) - nanos = other.view("i8") + other = astype_overflowsafe(other, TD64NS_DTYPE, copy=False) + # error: Incompatible types in assignment (expression has type + # "ndarray[Any, dtype[Any]]", variable has type "int") + nanos = other.view("i8") # type: ignore[assignment] else: # TimedeltaArray/Index nanos = other.asi8 diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 6c11ec42858c0..488e266a1c5c1 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -74,7 +74,10 @@ def test_to_timedelta_units_dtypes(self, dtype, unit): def test_to_timedelta_oob_non_nano(self): arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]") - msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807" + msg = ( + "Cannot convert -9223372036854775807 seconds to " + r"timedelta64\[ns\] without overflow" + ) with pytest.raises(OutOfBoundsTimedelta, match=msg): to_timedelta(arr) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 9dec556644093..d61a2fca33f56 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -51,6 +51,7 @@ def test_namespace(): "to_offset", "tz_compare", "is_unitless", + "astype_overflowsafe", "get_unit_from_dtype", "periods_per_day", ] diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index ade5a2077767f..598be5a0451b6 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -107,7 +107,13 @@ def test_tz_convert_readonly(): def test_length_zero_copy(dtype, copy): arr = np.array([], dtype=dtype) result = conversion.ensure_datetime64ns(arr, copy=copy) - assert result.base is (None if copy else arr) + if copy: + assert not np.shares_memory(result, arr) + else: + if arr.dtype == result.dtype: + assert result is arr + else: + assert not np.shares_memory(result, arr) def test_ensure_datetime64ns_bigendian(): @@ -121,7 +127,7 @@ def test_ensure_datetime64ns_bigendian(): def test_ensure_timedelta64ns_overflows(): arr = np.arange(10).astype("m8[Y]") * 100 - msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900" + msg = r"Cannot convert 300 years to timedelta64\[ns\] without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): conversion.ensure_timedelta64ns(arr) diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py index 31f48b9ad7c4a..9ae491f1618f6 100644 --- a/pandas/tests/tslibs/test_np_datetime.py +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -3,6 +3,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, + OutOfBoundsTimedelta, astype_overflowsafe, is_unitless, py_get_unit_from_dtype, @@ -139,7 +140,10 @@ def test_pass_non_dt64_array(self): arr = np.arange(5) dtype = np.dtype("M8[ns]") - msg = "astype_overflowsafe values must have datetime64 dtype" + msg = ( + "astype_overflowsafe values.dtype and dtype must be either " + "both-datetime64 or both-timedelta64" + ) with pytest.raises(TypeError, match=msg): astype_overflowsafe(arr, dtype, copy=True) @@ -151,14 +155,17 @@ def test_pass_non_dt64_dtype(self): arr = np.arange(5, dtype="i8").view("M8[D]") dtype = np.dtype("m8[ns]") - msg = "astype_overflowsafe dtype must be datetime64" + msg = ( + "astype_overflowsafe values.dtype and dtype must be either " + "both-datetime64 or both-timedelta64" + ) with pytest.raises(TypeError, match=msg): astype_overflowsafe(arr, dtype, copy=True) with pytest.raises(TypeError, match=msg): astype_overflowsafe(arr, dtype, copy=False) - def test_astype_overflowsafe(self): + def test_astype_overflowsafe_dt64(self): dtype = np.dtype("M8[ns]") dt = np.datetime64("2262-04-05", "D") @@ -178,3 +185,25 @@ def test_astype_overflowsafe(self): result = astype_overflowsafe(arr, dtype2) expected = arr.astype(dtype2) tm.assert_numpy_array_equal(result, expected) + + def test_astype_overflowsafe_td64(self): + dtype = np.dtype("m8[ns]") + + dt = np.datetime64("2262-04-05", "D") + arr = dt + np.arange(10, dtype="m8[D]") + arr = arr.view("m8[D]") + + # arr.astype silently overflows, so this + wrong = arr.astype(dtype) + roundtrip = wrong.astype(arr.dtype) + assert not (wrong == roundtrip).all() + + msg = r"Cannot convert 106752 days to timedelta64\[ns\] without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + astype_overflowsafe(arr, dtype) + + # But converting to microseconds is fine, and we match numpy's results. + dtype2 = np.dtype("m8[us]") + result = astype_overflowsafe(arr, dtype2) + expected = arr.astype(dtype2) + tm.assert_numpy_array_equal(result, expected)