Skip to content

astype_overflowsafe handle timedelta64 #47110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"BaseOffset",
"tz_compare",
"is_unitless",
"astype_overflowsafe",
"get_unit_from_dtype",
"periods_per_day",
]
Expand All @@ -45,6 +46,7 @@
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
)
Expand Down
34 changes: 1 addition & 33 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
return ival


@cython.boundscheck(False)
@cython.wraparound(False)
def ensure_datetime64ns(arr: ndarray, copy: bool = True):
"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
Expand All @@ -213,14 +211,6 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
dtype = arr.dtype
arr = arr.astype(dtype.newbyteorder("<"))

if arr.size == 0:
# Fastpath; doesn't matter but we have old tests for result.base
# being arr.
result = arr.view(DT64NS_DTYPE)
if copy:
result = result.copy()
return result

return astype_overflowsafe(arr, DT64NS_DTYPE, copy=copy)


Expand All @@ -239,29 +229,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
"""
assert arr.dtype.kind == "m", arr.dtype

if arr.dtype == TD64NS_DTYPE:
return arr.copy() if copy else arr

# Re-use the datetime64 machinery to do an overflow-safe `astype`
dtype = arr.dtype.str.replace("m8", "M8")
dummy = arr.view(dtype)
try:
dt64_result = ensure_datetime64ns(dummy, copy)
except OutOfBoundsDatetime as err:
# Re-write the exception in terms of timedelta64 instead of dt64

# Find the value that we are going to report as causing an overflow
tdmin = arr.min()
tdmax = arr.max()
if np.abs(tdmin) >= np.abs(tdmax):
bad_val = tdmin
else:
bad_val = tdmax

msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}"
raise OutOfBoundsTimedelta(msg)

return dt64_result.view(TD64NS_DTYPE)
return astype_overflowsafe(arr, dtype=TD64NS_DTYPE, copy=copy)


# ----------------------------------------------------------------------
Expand Down
46 changes: 38 additions & 8 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ from cpython.object cimport (

import_datetime()

import numpy as np
cimport numpy as cnp

cnp.import_array()
Expand Down Expand Up @@ -288,13 +289,21 @@ cpdef ndarray astype_overflowsafe(
bint copy=True,
):
"""
Convert an ndarray with datetime64[X] to datetime64[Y], raising on overflow.
Convert an ndarray with datetime64[X] to datetime64[Y]
or timedelta64[X] to timedelta64[Y],
raising on overflow.
"""
if values.descr.type_num != cnp.NPY_DATETIME:
# aka values.dtype.kind != "M"
raise TypeError("astype_overflowsafe values must have datetime64 dtype")
if dtype.type_num != cnp.NPY_DATETIME:
raise TypeError("astype_overflowsafe dtype must be datetime64")
if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME:
# i.e. dtype.kind == "M"
pass
elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA:
# i.e. dtype.kind == "m"
pass
else:
raise TypeError(
"astype_overflowsafe values.dtype and dtype must be either "
"both-datetime64 or both-timedelta64."
)

cdef:
NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
Expand All @@ -306,14 +315,21 @@ cpdef ndarray astype_overflowsafe(
):
# without raising explicitly here, we end up with a SystemError
# built-in function [...] returned a result with an error
raise ValueError("datetime64 values and dtype must have a unit specified")
raise ValueError(
"datetime64/timedelta64 values and dtype must have a unit specified"
)

if from_unit == to_unit:
# Check this before allocating result for perf, might save some memory
if copy:
return values.copy()
return values

elif from_unit > to_unit:
# e.g. ns -> us, so there is no risk of overflow, so we can use
# numpy's astype safely. Note there _is_ risk of truncation.
return values.astype(dtype)

cdef:
ndarray i8values = values.view("i8")

Expand All @@ -326,6 +342,7 @@ cpdef ndarray astype_overflowsafe(
Py_ssize_t i, N = values.size
int64_t value, new_value
npy_datetimestruct dts
bint is_td = dtype.type_num == cnp.NPY_TIMEDELTA

for i in range(N):
# Analogous to: item = values[i]
Expand All @@ -335,7 +352,20 @@ cpdef ndarray astype_overflowsafe(
new_value = NPY_DATETIME_NAT
else:
pandas_datetime_to_datetimestruct(value, from_unit, &dts)
check_dts_bounds(&dts, to_unit)

try:
check_dts_bounds(&dts, to_unit)
except OutOfBoundsDatetime as err:
if is_td:
tdval = np.timedelta64(value).view(values.dtype)
msg = (
"Cannot convert {tdval} to {dtype} without overflow"
.format(tdval=str(tdval), dtype=str(dtype))
)
raise OutOfBoundsTimedelta(msg) from err
else:
raise

new_value = npy_datetimestruct_to_datetime(to_unit, &dts)

# Analogous to: iresult[i] = new_value
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
NaT,
NaTType,
Timedelta,
astype_overflowsafe,
delta_to_nanoseconds,
dt64arr_to_periodarr as c_dt64arr_to_periodarr,
iNaT,
Expand Down Expand Up @@ -858,11 +859,10 @@ def _check_timedeltalike_freq_compat(self, other):
elif isinstance(other, np.ndarray):
# numpy timedelta64 array; all entries must be compatible
assert other.dtype.kind == "m"
if other.dtype != TD64NS_DTYPE:
# i.e. non-nano unit
# TODO: disallow unit-less timedelta64
other = other.astype(TD64NS_DTYPE)
nanos = other.view("i8")
other = astype_overflowsafe(other, TD64NS_DTYPE, copy=False)
# error: Incompatible types in assignment (expression has type
# "ndarray[Any, dtype[Any]]", variable has type "int")
nanos = other.view("i8") # type: ignore[assignment]
else:
# TimedeltaArray/Index
nanos = other.asi8
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ def test_to_timedelta_units_dtypes(self, dtype, unit):
def test_to_timedelta_oob_non_nano(self):
arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]")

msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807"
msg = (
"Cannot convert -9223372036854775807 seconds to "
r"timedelta64\[ns\] without overflow"
)
with pytest.raises(OutOfBoundsTimedelta, match=msg):
to_timedelta(arr)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_namespace():
"to_offset",
"tz_compare",
"is_unitless",
"astype_overflowsafe",
"get_unit_from_dtype",
"periods_per_day",
]
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/tslibs/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,13 @@ def test_tz_convert_readonly():
def test_length_zero_copy(dtype, copy):
arr = np.array([], dtype=dtype)
result = conversion.ensure_datetime64ns(arr, copy=copy)
assert result.base is (None if copy else arr)
if copy:
assert not np.shares_memory(result, arr)
else:
if arr.dtype == result.dtype:
assert result is arr
else:
assert not np.shares_memory(result, arr)


def test_ensure_datetime64ns_bigendian():
Expand All @@ -121,7 +127,7 @@ def test_ensure_datetime64ns_bigendian():

def test_ensure_timedelta64ns_overflows():
arr = np.arange(10).astype("m8[Y]") * 100
msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900"
msg = r"Cannot convert 300 years to timedelta64\[ns\] without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
conversion.ensure_timedelta64ns(arr)

Expand Down
35 changes: 32 additions & 3 deletions pandas/tests/tslibs/test_np_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype,
Expand Down Expand Up @@ -139,7 +140,10 @@ def test_pass_non_dt64_array(self):
arr = np.arange(5)
dtype = np.dtype("M8[ns]")

msg = "astype_overflowsafe values must have datetime64 dtype"
msg = (
"astype_overflowsafe values.dtype and dtype must be either "
"both-datetime64 or both-timedelta64"
)
with pytest.raises(TypeError, match=msg):
astype_overflowsafe(arr, dtype, copy=True)

Expand All @@ -151,14 +155,17 @@ def test_pass_non_dt64_dtype(self):
arr = np.arange(5, dtype="i8").view("M8[D]")
dtype = np.dtype("m8[ns]")

msg = "astype_overflowsafe dtype must be datetime64"
msg = (
"astype_overflowsafe values.dtype and dtype must be either "
"both-datetime64 or both-timedelta64"
)
with pytest.raises(TypeError, match=msg):
astype_overflowsafe(arr, dtype, copy=True)

with pytest.raises(TypeError, match=msg):
astype_overflowsafe(arr, dtype, copy=False)

def test_astype_overflowsafe(self):
def test_astype_overflowsafe_dt64(self):
dtype = np.dtype("M8[ns]")

dt = np.datetime64("2262-04-05", "D")
Expand All @@ -178,3 +185,25 @@ def test_astype_overflowsafe(self):
result = astype_overflowsafe(arr, dtype2)
expected = arr.astype(dtype2)
tm.assert_numpy_array_equal(result, expected)

def test_astype_overflowsafe_td64(self):
dtype = np.dtype("m8[ns]")

dt = np.datetime64("2262-04-05", "D")
arr = dt + np.arange(10, dtype="m8[D]")
arr = arr.view("m8[D]")

# arr.astype silently overflows, so this
wrong = arr.astype(dtype)
roundtrip = wrong.astype(arr.dtype)
assert not (wrong == roundtrip).all()

msg = r"Cannot convert 106752 days to timedelta64\[ns\] without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
astype_overflowsafe(arr, dtype)

# But converting to microseconds is fine, and we match numpy's results.
dtype2 = np.dtype("m8[us]")
result = astype_overflowsafe(arr, dtype2)
expected = arr.astype(dtype2)
tm.assert_numpy_array_equal(result, expected)