Skip to content

PERF: datetimelike addition #56373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 0 additions & 37 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
date_range,
to_timedelta,
)
from pandas.core.algorithms import checked_add_with_arr

from .pandas_vb_common import numeric_dtypes

Expand Down Expand Up @@ -389,42 +388,6 @@ def time_add_timedeltas(self, df):
df["timedelta"] + df["timedelta"]


class AddOverflowScalar:
params = [1, -1, 0]
param_names = ["scalar"]

def setup(self, scalar):
N = 10**6
self.arr = np.arange(N)

def time_add_overflow_scalar(self, scalar):
checked_add_with_arr(self.arr, scalar)


class AddOverflowArray:
def setup(self):
N = 10**6
self.arr = np.arange(N)
self.arr_rev = np.arange(-N, 0)
self.arr_mixed = np.array([1, -1]).repeat(N / 2)
self.arr_nan_1 = np.random.choice([True, False], size=N)
self.arr_nan_2 = np.random.choice([True, False], size=N)

def time_add_overflow_arr_rev(self):
checked_add_with_arr(self.arr, self.arr_rev)

def time_add_overflow_arr_mask_nan(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these considered superfluous?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they use a no-longer-used mask argument. actually these are all pretty superfluous. will update to remove

checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)

def time_add_overflow_b_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)

def time_add_overflow_both_arg_nan(self):
checked_add_with_arr(
self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
)


hcal = pd.tseries.holiday.USFederalHolidayCalendar()
# These offsets currently raise a NotImplementedError with .apply_index()
non_apply = [
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"npy_unit_to_abbrev",
"get_supported_reso",
"guess_datetime_format",
"add_overflowsafe",
]

from pandas._libs.tslibs import dtypes # pylint: disable=import-self
Expand All @@ -55,6 +56,7 @@
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
add_overflowsafe,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype as get_unit_from_dtype,
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,5 @@ cdef int64_t convert_reso(
NPY_DATETIMEUNIT to_reso,
bint round_ok,
) except? -1

cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right)
4 changes: 4 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def is_unitless(dtype: np.dtype) -> bool: ...
def compare_mismatched_resolutions(
left: np.ndarray, right: np.ndarray, op
) -> npt.NDArray[np.bool_]: ...
def add_overflowsafe(
left: npt.NDArray[np.int64],
right: npt.NDArray[np.int64],
) -> npt.NDArray[np.int64]: ...
41 changes: 41 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cimport cython
from cpython.datetime cimport (
PyDateTime_CheckExact,
PyDateTime_DATE_GET_HOUR,
Expand Down Expand Up @@ -678,3 +679,43 @@ cdef int64_t _convert_reso_with_dtstruct(
raise OutOfBoundsDatetime from err

return result


@cython.overflowcheck(True)
cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these possible to specialize to int64?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not without specifying an ndim.

"""
Overflow-safe addition for datetime64/timedelta64 dtypes.

`right` may either be zero-dim or of the same shape as `left`.
"""
cdef:
Py_ssize_t N = left.size
int64_t lval, rval, res_value
ndarray iresult = cnp.PyArray_EMPTY(
left.ndim, left.shape, cnp.NPY_INT64, 0
)
cnp.broadcast mi = cnp.PyArray_MultiIterNew3(iresult, left, right)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty cool


# Note: doing this try/except outside the loop improves performance over
# doing it inside the loop.
try:
Copy link
Member

@WillAyd WillAyd Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way I think there's another way of doing this without a loop that could also be more performant. The inspiration from this comes from a book called Hacker's Delight by Henry S Warren in section 4-2

The code would look something like:

uleft = left.view("uint64")
uright = right.view("uint64")
summed = uleft + uright
neg_overflow = uleft & uright & ~summed
pos_overflow = ~uleft & ~uright & summed
overflows = (neg_overflow | pos_overflow) >= 0x8000000000000000

The essence of the algorithm is that you use unsigned arithmetic (which has defined behavior) to do the summation, and then inspect the sign bit to see if "overflow" would have happened

With this would need neither the cython.overflow check nor the try...except block

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's a quick demo

In [42]: left = np.array([2 ** 63 - 1, 2 ** 63 - 1, 2 ** 63 - 1, -(2 ** 63), -(2 ** 63), -(2 ** 63)])
    ...: right = np.array([42, 1, 0, -42, -1, 0])
    ...: uleft = left.view("uint64")
    ...: uright = right.view("uint64")
    ...: summed = uleft + uright
    ...: neg_overflow = uleft & uright & ~summed
    ...: pos_overflow = ~uleft & ~uright & summed
    ...: overflows = (neg_overflow | pos_overflow) >= 0x8000000000000000
    ...: overflows
Out[42]: array([ True,  True, False,  True,  True, False])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat. Does this play nicely with the NPY_NAT check?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good callout. I suppose you would just mask that as well. So something like:

In [5]: left = np.array([2 ** 63 - 1, 2 ** 63 - 1, 2 ** 63 - 1, -(2 ** 63), -(2 ** 63), -(2 ** 63)])
   ...: right = np.array([42, 1, 0, -42, -1, 0])
   ...: is_nat = (left == 2 ** 63 - 1) | (right == 2 ** 63 - 1)
   ...: uleft = left.view("uint64")
   ...: uright = right.view("uint64")
   ...: summed = uleft + uright
   ...: neg_overflow = uleft & uright & ~summed
   ...: pos_overflow = ~uleft & ~uright & summed
   ...: overflows = ((neg_overflow | pos_overflow) >= 0x8000000000000000) & ~is_nat
   ...: overflows
Out[5]: array([False, False, False,  True,  True, False])

I just hardcoded the above to 2 ** 63 - 1 but can replace with macro or python variable, whichever is available

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does that actually out-perform? (maybe SIMD or something?) seems like a lot of additional memory allocations

Copy link
Member

@WillAyd WillAyd Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard to say. In theory yes this would leverage SIMD better if numpy uses it under the hood (not well versed in if NumPy uses that or not). Also the overflow checks on gcc/clang are pretty fast because they use the compiler builtin, but I think MSVC falls back to a slow implementation (there is a builtin for MSVC too but I don't recall Cython using it - would have to double check)

It requires more research so not something that needs to be done in this PR. Just food for future thought

for i in range(N):
# Analogous to: lval = lvalues[i]
lval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

# Analogous to: rval = rvalues[i]
rval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 2))[0]

if lval == NPY_DATETIME_NAT or rval == NPY_DATETIME_NAT:
res_value = NPY_DATETIME_NAT
else:
res_value = lval + rval

# Analogous to: result[i] = res_value
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_value

cnp.PyArray_MultiIter_NEXT(mi)
except OverflowError as err:
raise OverflowError("Overflow in int64 addition") from err

return iresult
92 changes: 0 additions & 92 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,98 +1119,6 @@ def rank(
return ranks


def checked_add_with_arr(
arr: npt.NDArray[np.int64],
b: int | npt.NDArray[np.int64],
arr_mask: npt.NDArray[np.bool_] | None = None,
b_mask: npt.NDArray[np.bool_] | None = None,
) -> npt.NDArray[np.int64]:
"""
Perform array addition that checks for underflow and overflow.

Performs the addition of an int64 array and an int64 integer (or array)
but checks that they do not result in overflow first. For elements that
are indicated to be NaN, whether or not there is overflow for that element
is automatically ignored.

Parameters
----------
arr : np.ndarray[int64] addend.
b : array or scalar addend.
arr_mask : np.ndarray[bool] or None, default None
array indicating which elements to exclude from checking
b_mask : np.ndarray[bool] or None, default None
array or scalar indicating which element(s) to exclude from checking

Returns
-------
sum : An array for elements x + b for each element x in arr if b is
a scalar or an array for elements x + y for each element pair
(x, y) in (arr, b).

Raises
------
OverflowError if any x + y exceeds the maximum or minimum int64 value.
"""
# For performance reasons, we broadcast 'b' to the new array 'b2'
# so that it has the same size as 'arr'.
b2 = np.broadcast_to(b, arr.shape)
if b_mask is not None:
# We do the same broadcasting for b_mask as well.
b2_mask = np.broadcast_to(b_mask, arr.shape)
else:
b2_mask = None

# For elements that are NaN, regardless of their value, we should
# ignore whether they overflow or not when doing the checked add.
if arr_mask is not None and b2_mask is not None:
not_nan = np.logical_not(arr_mask | b2_mask)
elif arr_mask is not None:
not_nan = np.logical_not(arr_mask)
elif b_mask is not None:
# error: Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has
# incompatible type "Optional[ndarray[Any, dtype[bool_]]]";
# expected "Union[_SupportsArray[dtype[Any]], _NestedSequence
# [_SupportsArray[dtype[Any]]], bool, int, float, complex, str
# , bytes, _NestedSequence[Union[bool, int, float, complex, str
# , bytes]]]"
not_nan = np.logical_not(b2_mask) # type: ignore[arg-type]
else:
not_nan = np.empty(arr.shape, dtype=bool)
not_nan.fill(True)

# gh-14324: For each element in 'arr' and its corresponding element
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
# we then check whether its sum with the element in 'arr' exceeds
# np.iinfo(np.int64).max. If so, we have an overflow error. If it
# it is negative, we then check whether its sum with the element in
# 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow
# error as well.
i8max = lib.i8max
i8min = iNaT

mask1 = b2 > 0
mask2 = b2 < 0

if not mask1.any():
to_raise = ((i8min - b2 > arr) & not_nan).any()
elif not mask2.any():
to_raise = ((i8max - b2 < arr) & not_nan).any()
else:
to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or (
(i8min - b2[mask2] > arr[mask2]) & not_nan[mask2]
).any()

if to_raise:
raise OverflowError("Overflow in int64 addition")

result = arr + b
if arr_mask is not None or b2_mask is not None:
np.putmask(result, ~not_nan, iNaT)

return result


# ---- #
# take #
# ---- #
Expand Down
20 changes: 6 additions & 14 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
Tick,
Timedelta,
Timestamp,
add_overflowsafe,
astype_overflowsafe,
get_unit_from_dtype,
iNaT,
Expand Down Expand Up @@ -112,7 +113,6 @@
ops,
)
from pandas.core.algorithms import (
checked_add_with_arr,
isin,
map_array,
unique1d,
Expand Down Expand Up @@ -1013,7 +1013,7 @@ def _get_i8_values_and_mask(
self, other
) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
"""
Get the int64 values and b_mask to pass to checked_add_with_arr.
Get the int64 values and b_mask to pass to add_overflowsafe.
"""
if isinstance(other, Period):
i8values = other.ordinal
Expand Down Expand Up @@ -1069,9 +1069,7 @@ def _add_datetimelike_scalar(self, other) -> DatetimeArray:
self = cast("TimedeltaArray", self)

other_i8, o_mask = self._get_i8_values_and_mask(other)
result = checked_add_with_arr(
self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
)
result = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
res_values = result.view(f"M8[{self.unit}]")

dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
Expand Down Expand Up @@ -1134,9 +1132,7 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
raise type(err)(new_message) from err

other_i8, o_mask = self._get_i8_values_and_mask(other)
res_values = checked_add_with_arr(
self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
)
res_values = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
res_m8 = res_values.view(f"timedelta64[{self.unit}]")

new_freq = self._get_arithmetic_result_freq(other)
Expand Down Expand Up @@ -1202,9 +1198,7 @@ def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
self = cast("DatetimeArray | TimedeltaArray", self)

other_i8, o_mask = self._get_i8_values_and_mask(other)
new_values = checked_add_with_arr(
self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
)
new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
res_values = new_values.view(self._ndarray.dtype)

new_freq = self._get_arithmetic_result_freq(other)
Expand Down Expand Up @@ -1272,9 +1266,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
self._check_compatible_with(other)

other_i8, o_mask = self._get_i8_values_and_mask(other)
new_i8_data = checked_add_with_arr(
self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
)
new_i8_data = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
new_data = np.array([self.freq.base * x for x in new_i8_data])

if o_mask is None:
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
NaT,
NaTType,
Timedelta,
add_overflowsafe,
astype_overflowsafe,
dt64arr_to_periodarr as c_dt64arr_to_periodarr,
get_unit_from_dtype,
Expand Down Expand Up @@ -71,7 +72,6 @@
)
from pandas.core.dtypes.missing import isna

import pandas.core.algorithms as algos
from pandas.core.arrays import datetimelike as dtl
import pandas.core.common as com

Expand Down Expand Up @@ -847,7 +847,7 @@ def _addsub_int_array_or_scalar(
assert op in [operator.add, operator.sub]
if op is operator.sub:
other = -other
res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
res_values = add_overflowsafe(self.asi8, np.asarray(other, dtype="i8"))
return type(self)(res_values, dtype=self.dtype)

def _add_offset(self, other: BaseOffset):
Expand Down Expand Up @@ -912,12 +912,7 @@ def _add_timedelta_arraylike(
"not an integer multiple of the PeriodArray's freq."
) from err

b_mask = np.isnat(delta)

res_values = algos.checked_add_with_arr(
self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
)
np.putmask(res_values, self._isnan | b_mask, iNaT)
res_values = add_overflowsafe(self.asi8, np.asarray(delta.view("i8")))
return type(self)(res_values, dtype=self.dtype)

def _check_timedeltalike_freq_compat(self, other):
Expand Down
51 changes: 0 additions & 51 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1798,57 +1798,6 @@ def test_pct_max_many_rows(self):
assert result == 1


def test_int64_add_overflow():
# see gh-14068
msg = "Overflow in int64 addition"
m = np.iinfo(np.int64).max
n = np.iinfo(np.int64).min

with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([m, m]), m)
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]))
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([n, n]), n)
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([n, n]), np.array([n, n]))
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([m, n]), np.array([n, n]))
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(
np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])
)
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(
np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])
)
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(
np.array([m, m]),
np.array([m, m]),
arr_mask=np.array([False, True]),
b_mask=np.array([False, True]),
)
with pytest.raises(OverflowError, match=msg):
algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m]))

# Check that the nan boolean arrays override whether or not
# the addition overflows. We don't check the result but just
# the fact that an OverflowError is not raised.
algos.checked_add_with_arr(
np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])
)
algos.checked_add_with_arr(
np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])
)
algos.checked_add_with_arr(
np.array([m, m]),
np.array([m, m]),
arr_mask=np.array([True, False]),
b_mask=np.array([False, True]),
)


class TestMode:
def test_no_mode(self):
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
Expand Down
Loading