Skip to content

BUG: PeriodIndex + TimedeltaArray-with-NaT #47783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from numpy cimport int64_t
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ _period_code_map: dict[str, int]
def periods_per_day(reso: int) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def is_supported_unit(NPY_DATETIMEUNIT reso):
)


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
return "ns"
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ cpdef cnp.ndarray astype_overflowsafe(
cnp.ndarray values, # ndarray[datetime64[anyunit]]
cnp.dtype dtype, # ndarray[datetime64[anyunit]]
bint copy=*,
bint round_ok=*,
)
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1

Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ class OutOfBoundsTimedelta(ValueError): ...
def py_get_unit_from_dtype(dtype: np.dtype): ...
def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
def astype_overflowsafe(
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
arr: np.ndarray,
dtype: np.dtype,
copy: bool = ...,
round_ok: bool = ...,
) -> np.ndarray: ...
def is_unitless(dtype: np.dtype) -> bool: ...
def compare_mismatched_resolutions(
Expand Down
72 changes: 62 additions & 10 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ cpdef ndarray astype_overflowsafe(
ndarray values,
cnp.dtype dtype,
bint copy=True,
bint round_ok=True,
):
"""
Convert an ndarray with datetime64[X] to datetime64[Y]
Expand Down Expand Up @@ -314,20 +315,24 @@ cpdef ndarray astype_overflowsafe(
"datetime64/timedelta64 values and dtype must have a unit specified"
)

if (<object>values).dtype.byteorder == ">":
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
values = values.astype(values.dtype.newbyteorder("<"))

if from_unit == to_unit:
# Check this before allocating result for perf, might save some memory
if copy:
return values.copy()
return values

elif from_unit > to_unit:
# e.g. ns -> us, so there is no risk of overflow, so we can use
# numpy's astype safely. Note there _is_ risk of truncation.
return values.astype(dtype)
if round_ok:
# e.g. ns -> us, so there is no risk of overflow, so we can use
# numpy's astype safely. Note there _is_ risk of truncation.
return values.astype(dtype)
else:
iresult2 = astype_round_check(values.view("i8"), from_unit, to_unit)
return iresult2.view(dtype)

if (<object>values).dtype.byteorder == ">":
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
values = values.astype(values.dtype.newbyteorder("<"))

cdef:
ndarray i8values = values.view("i8")
Expand Down Expand Up @@ -356,10 +361,11 @@ cpdef ndarray astype_overflowsafe(
check_dts_bounds(&dts, to_unit)
except OutOfBoundsDatetime as err:
if is_td:
tdval = np.timedelta64(value).view(values.dtype)
from_abbrev = np.datetime_data(values.dtype)[0]
np_val = np.timedelta64(value, from_abbrev)
msg = (
"Cannot convert {tdval} to {dtype} without overflow"
.format(tdval=str(tdval), dtype=str(dtype))
"Cannot convert {np_val} to {dtype} without overflow"
.format(np_val=str(np_val), dtype=str(dtype))
)
raise OutOfBoundsTimedelta(msg) from err
else:
Expand Down Expand Up @@ -453,6 +459,52 @@ cdef int op_to_op_code(op):
return Py_GT


cdef ndarray astype_round_check(
ndarray i8values,
NPY_DATETIMEUNIT from_unit,
NPY_DATETIMEUNIT to_unit
):
# cases with from_unit > to_unit, e.g. ns->us, raise if the conversion
# involves truncation, e.g. 1500ns->1us
cdef:
Py_ssize_t i, N = i8values.size

# equiv: iresult = np.empty((<object>i8values).shape, dtype="i8")
ndarray iresult = cnp.PyArray_EMPTY(
i8values.ndim, i8values.shape, cnp.NPY_INT64, 0
)
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values)

# Note the arguments to_unit, from unit are swapped vs how they
# are passed when going to a higher-frequency reso.
int64_t mult = get_conversion_factor(to_unit, from_unit)
int64_t value, mod

for i in range(N):
# Analogous to: item = i8values[i]
value = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if value == NPY_DATETIME_NAT:
new_value = NPY_DATETIME_NAT
else:
new_value, mod = divmod(value, mult)
if mod != 0:
# TODO: avoid runtime import
from pandas._libs.tslibs.dtypes import npy_unit_to_abbrev
from_abbrev = npy_unit_to_abbrev(from_unit)
to_abbrev = npy_unit_to_abbrev(to_unit)
raise ValueError(
f"Cannot losslessly cast '{value} {from_abbrev}' to {to_abbrev}"
)

# Analogous to: iresult[i] = new_value
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value

cnp.PyArray_MultiIter_NEXT(mi)

return iresult


@cython.overflowcheck(True)
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
"""
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ def to_offset(freq: timedelta | str) -> BaseOffset: ...

class Tick(SingleConstructorOffset):
_reso: int
_prefix: str
_td64_unit: str
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
@property
def delta(self) -> Timedelta: ...
Expand Down
8 changes: 8 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ cdef class SingleConstructorOffset(BaseOffset):
cdef class Tick(SingleConstructorOffset):
_adjust_dst = False
_prefix = "undefined"
_td64_unit = "undefined"
_attributes = tuple(["n", "normalize"])

def __init__(self, n=1, normalize=False):
Expand Down Expand Up @@ -968,48 +969,55 @@ cdef class Tick(SingleConstructorOffset):
cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_td64_unit = "D"
_period_dtype_code = PeriodDtypeCode.D
_reso = NPY_DATETIMEUNIT.NPY_FR_D


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
_prefix = "H"
_td64_unit = "h"
_period_dtype_code = PeriodDtypeCode.H
_reso = NPY_DATETIMEUNIT.NPY_FR_h


cdef class Minute(Tick):
_nanos_inc = 60 * 1_000_000_000
_prefix = "T"
_td64_unit = "m"
_period_dtype_code = PeriodDtypeCode.T
_reso = NPY_DATETIMEUNIT.NPY_FR_m


cdef class Second(Tick):
_nanos_inc = 1_000_000_000
_prefix = "S"
_td64_unit = "s"
_period_dtype_code = PeriodDtypeCode.S
_reso = NPY_DATETIMEUNIT.NPY_FR_s


cdef class Milli(Tick):
_nanos_inc = 1_000_000
_prefix = "L"
_td64_unit = "ms"
_period_dtype_code = PeriodDtypeCode.L
_reso = NPY_DATETIMEUNIT.NPY_FR_ms


cdef class Micro(Tick):
_nanos_inc = 1000
_prefix = "U"
_td64_unit = "us"
_period_dtype_code = PeriodDtypeCode.U
_reso = NPY_DATETIMEUNIT.NPY_FR_us


cdef class Nano(Tick):
_nanos_inc = 1
_prefix = "N"
_td64_unit = "ns"
_period_dtype_code = PeriodDtypeCode.N
_reso = NPY_DATETIMEUNIT.NPY_FR_ns

Expand Down
31 changes: 19 additions & 12 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,7 @@
ABCSeries,
ABCTimedeltaArray,
)
from pandas.core.dtypes.missing import (
isna,
notna,
)
from pandas.core.dtypes.missing import notna

import pandas.core.algorithms as algos
from pandas.core.arrays import datetimelike as dtl
Expand Down Expand Up @@ -792,20 +789,30 @@ def _add_timedelta_arraylike(
-------
result : ndarray[int64]
"""
if not isinstance(self.freq, Tick):
freq = self.freq
if not isinstance(freq, Tick):
# We cannot add timedelta-like to non-tick PeriodArray
raise TypeError(
f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
)

if not np.all(isna(other)):
delta = self._check_timedeltalike_freq_compat(other)
else:
# all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT
return self + np.timedelta64("NaT")
dtype = np.dtype(f"m8[{freq._td64_unit}]")

try:
delta = astype_overflowsafe(
np.asarray(other), dtype=dtype, copy=False, round_ok=False
)
except ValueError as err:
# TODO: not actually a great exception message in this case
raise raise_on_incompatible(self, other) from err

b_mask = np.isnat(delta)

ordinals = self._addsub_int_array_or_scalar(delta, operator.add).asi8
return type(self)(ordinals, dtype=self.dtype)
res_values = algos.checked_add_with_arr(
self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
)
np.putmask(res_values, self._isnan | b_mask, iNaT)
return type(self)(res_values, freq=self.freq)

def _check_timedeltalike_freq_compat(self, other):
"""
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,21 @@ def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other):
with pytest.raises(TypeError, match=msg):
other - obj

# some but not *all* NaT
other = other.copy()
other[0] = np.timedelta64(0, "ns")
expected = PeriodIndex([pi[0]] + ["NaT"] * 8, freq="19D")
expected = tm.box_expected(expected, box_with_array)

result = obj + other
tm.assert_equal(result, expected)
result = other + obj
tm.assert_equal(result, expected)
result = obj - other
tm.assert_equal(result, expected)
with pytest.raises(TypeError, match=msg):
other - obj

# ---------------------------------------------------------------
# Unsorted

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/tslibs/test_np_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,15 @@ def test_astype_overflowsafe_td64(self):
result = astype_overflowsafe(arr, dtype2)
expected = arr.astype(dtype2)
tm.assert_numpy_array_equal(result, expected)

def test_astype_overflowsafe_disallow_rounding(self):
arr = np.array([-1500, 1500], dtype="M8[ns]")
dtype = np.dtype("M8[us]")

msg = "Cannot losslessly cast '-1500 ns' to us"
with pytest.raises(ValueError, match=msg):
astype_overflowsafe(arr, dtype, round_ok=False)

result = astype_overflowsafe(arr, dtype, round_ok=True)
expected = arr.astype(dtype)
tm.assert_numpy_array_equal(result, expected)