-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Simplify Timedelta init, standardize overflow errors #47004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4b58319
e35f7be
3c21c7b
f7ee822
613e365
9bf564b
89ab94e
7332c1c
f31daa8
8811562
975752c
e63b8fd
1d187ca
969e79b
7027687
51274d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import collections | ||
import operator | ||
import warnings | ||
|
||
cimport cython | ||
|
@@ -55,6 +56,7 @@ from pandas._libs.tslibs.np_datetime cimport ( | |
pandas_timedelta_to_timedeltastruct, | ||
pandas_timedeltastruct, | ||
) | ||
from pandas._libs.util cimport INT64_MAX | ||
|
||
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta | ||
|
||
|
@@ -217,12 +219,11 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: | |
+ delta.microseconds | ||
) * 1000 | ||
except OverflowError as err: | ||
raise OutOfBoundsTimedelta(*err.args) from err | ||
|
||
msg = f"{delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" | ||
raise OutOfBoundsTimedelta(msg) from err | ||
raise TypeError(type(delta)) | ||
|
||
|
||
@cython.overflowcheck(True) | ||
cdef object ensure_td64ns(object ts): | ||
""" | ||
Overflow-safe implementation of td64.astype("m8[ns]") | ||
|
@@ -241,24 +242,20 @@ cdef object ensure_td64ns(object ts): | |
str unitstr | ||
|
||
td64_unit = get_datetime64_unit(ts) | ||
if ( | ||
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns | ||
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC | ||
): | ||
unitstr = npy_unit_to_abbrev(td64_unit) | ||
if td64_unit == NPY_DATETIMEUNIT.NPY_FR_ns or td64_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: | ||
return ts | ||
|
||
td64_value = get_timedelta64_value(ts) | ||
unitstr = npy_unit_to_abbrev(td64_unit) | ||
mult = precision_from_unit(unitstr)[0] | ||
|
||
mult = precision_from_unit(unitstr)[0] | ||
with cython.overflowcheck(True): | ||
try: | ||
# NB: cython#1381 this cannot be *= | ||
td64_value = td64_value * mult | ||
td64_value = get_timedelta64_value(ts) * mult | ||
except OverflowError as err: | ||
raise OutOfBoundsTimedelta(ts) from err | ||
msg = f"{str(ts)} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" | ||
raise OutOfBoundsTimedelta(msg) from err | ||
|
||
return np.timedelta64(td64_value, "ns") | ||
|
||
return ts | ||
return np.timedelta64(td64_value, "ns") | ||
|
||
|
||
cdef convert_to_timedelta64(object ts, str unit): | ||
|
@@ -674,8 +671,7 @@ cdef bint _validate_ops_compat(other): | |
|
||
def _op_unary_method(func, name): | ||
def f(self): | ||
new_value = func(self.value) | ||
return _timedelta_from_value_and_reso(new_value, self._reso) | ||
return create_timedelta(func(self.value), "ignore", self._reso) | ||
f.__name__ = name | ||
return f | ||
|
||
|
@@ -724,13 +720,7 @@ def _binary_op_method_timedeltalike(op, name): | |
if self._reso != other._reso: | ||
raise NotImplementedError | ||
|
||
res = op(self.value, other.value) | ||
if res == NPY_NAT: | ||
# e.g. test_implementation_limits | ||
# TODO: more generally could do an overflowcheck in op? | ||
return NaT | ||
|
||
return _timedelta_from_value_and_reso(res, reso=self._reso) | ||
return create_timedelta(op(self.value, other.value), "ignore", self._reso) | ||
|
||
f.__name__ = name | ||
return f | ||
|
@@ -861,7 +851,7 @@ cdef _to_py_int_float(v): | |
|
||
|
||
def _timedelta_unpickle(value, reso): | ||
return _timedelta_from_value_and_reso(value, reso) | ||
return create_timedelta(value, "ignore", reso) | ||
|
||
|
||
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): | ||
|
@@ -892,6 +882,68 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): | |
return td_base | ||
|
||
|
||
@cython.overflowcheck(True) | ||
cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso): | ||
""" | ||
Timedelta factory. | ||
|
||
Overflow-safe, and allows for the creation of Timedeltas with non-nano resos while | ||
the public API for that gets hashed out (ref: GH#46587). For now, Timedelta.__new__ | ||
just does arg validation and kwarg processing. | ||
|
||
patrickmckenna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_timedelta_from_value_and_reso faster if value already an int that can be safely | ||
cast to an int64. | ||
|
||
Parameters | ||
---------- | ||
value : Timedelta, timedelta, np.timedelta64, str, int, float | ||
The same value types accepted by Timedelta.__new__ | ||
in_unit : str | ||
Denote the (np) unit of the input, if it's numeric | ||
out_reso: NPY_DATETIMEUNIT | ||
Desired resolution of new Timedelta | ||
|
||
Notes | ||
----- | ||
Pass in_unit="ignore" (or "ns") with a numeric value to just do overflow checking | ||
(and bypass the prior behavior of converting value -> td64[ns] -> int) | ||
""" | ||
cdef: | ||
int64_t out_value | ||
|
||
if isinstance(value, _Timedelta): | ||
return value | ||
|
||
try: | ||
# if unit == "ns", no need to create an m8[ns] just to read the (same) value back | ||
# if unit == "ignore", assume caller wants to invoke an overflow-safe version of | ||
# _timedelta_from_value_and_reso, and that any float rounding is acceptable | ||
if (is_integer_object(value) or is_float_object(value)) and ( | ||
in_unit == "ns" or in_unit == "ignore" | ||
): | ||
if util.is_nan(value): | ||
return NaT | ||
out_value = <int64_t>value | ||
# is_timedelta_64_object may not give correct results w/ some versions? see e.g. | ||
# github.com/pandas-dev/pandas/runs/6397652653?check_suite_focus=true#step:11:435 | ||
elif isinstance(value, np.timedelta64): | ||
out_value = ensure_td64ns(value).view(np.int64) | ||
elif isinstance(value, str): | ||
if value.startswith(("P", "-P")): | ||
out_value = parse_iso_format_string(value) | ||
else: | ||
out_value = parse_timedelta_string(value) | ||
else: | ||
out_value = convert_to_timedelta64(value, in_unit).view(np.int64) | ||
except OverflowError as err: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any way for less of this logic to be within the try/except? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, it's not ideal. Initially, I tried to leave as much conditional processing as possible in A similar option would be to let all the dispatching on arg type happen automatically, and add overflow checking as needed within each variant, e.g. @functools.singledispatch
def value_to_int(value):
...
@value_to_int.register(str)
def _(value: str):
...
def create_timedelta(value):
...
try:
out_value = value_to_int(value)
catch OverflowError:
raise OutOfBoundsTimedelta(msg) from err
... I'm not sure if cython plays well with Definitely open to other ideas though! |
||
msg = f"{value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" | ||
raise OutOfBoundsTimedelta(msg) from err | ||
|
||
if out_value == NPY_NAT: | ||
return NaT | ||
return _timedelta_from_value_and_reso(out_value, out_reso) | ||
|
||
|
||
# Similar to Timestamp/datetime, this is a construction requirement for | ||
# timedeltas that we need to do object instantiation in python. This will | ||
# serve as a C extension type that shadows the Python class, where we do any | ||
|
@@ -1375,7 +1427,7 @@ cdef class _Timedelta(timedelta): | |
@classmethod | ||
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): | ||
# exposing as classmethod for testing | ||
return _timedelta_from_value_and_reso(value, reso) | ||
return create_timedelta(value, "ignore", reso) | ||
|
||
|
||
# Python front end to C extension type _Timedelta | ||
|
@@ -1438,99 +1490,49 @@ class Timedelta(_Timedelta): | |
We see that either way we get the same result | ||
""" | ||
|
||
_req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", | ||
"milliseconds", "microseconds", "nanoseconds"} | ||
_allowed_kwargs = ( | ||
"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", | ||
"nanoseconds" | ||
) | ||
|
||
def __new__(cls, object value=_no_input, unit=None, **kwargs): | ||
cdef _Timedelta td_base | ||
cdef: | ||
NPY_DATETIMEUNIT out_reso = NPY_FR_ns | ||
|
||
# process kwargs iff no value passed | ||
if value is _no_input: | ||
if not len(kwargs): | ||
raise ValueError("cannot construct a Timedelta without a " | ||
"value/unit or descriptive keywords " | ||
"(days,seconds....)") | ||
|
||
kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} | ||
|
||
unsupported_kwargs = set(kwargs) | ||
unsupported_kwargs.difference_update(cls._req_any_kwargs_new) | ||
if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): | ||
if not kwargs: | ||
raise ValueError( | ||
"cannot construct a Timedelta without a value/unit " | ||
"or descriptive keywords (days,seconds....)" | ||
) | ||
if not kwargs.keys() <= set(cls._allowed_kwargs): | ||
raise ValueError( | ||
"cannot construct a Timedelta from the passed arguments, " | ||
"allowed keywords are " | ||
"[weeks, days, hours, minutes, seconds, " | ||
"milliseconds, microseconds, nanoseconds]" | ||
f"allowed keywords are {cls._allowed_kwargs}" | ||
) | ||
|
||
# GH43764, convert any input to nanoseconds first and then | ||
# create the timestamp. This ensures that any potential | ||
# nanosecond contributions from kwargs parsed as floats | ||
# are taken into consideration. | ||
seconds = int(( | ||
( | ||
(kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 | ||
+ kwargs.get('hours', 0) | ||
) * 3600 | ||
+ kwargs.get('minutes', 0) * 60 | ||
+ kwargs.get('seconds', 0) | ||
) * 1_000_000_000 | ||
) | ||
|
||
value = np.timedelta64( | ||
int(kwargs.get('nanoseconds', 0)) | ||
+ int(kwargs.get('microseconds', 0) * 1_000) | ||
+ int(kwargs.get('milliseconds', 0) * 1_000_000) | ||
+ seconds | ||
# GH43764, convert any input to nanoseconds first, to ensure any potential | ||
# nanosecond contributions from kwargs parsed as floats are included | ||
ns = ( | ||
_to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000 | ||
+ _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000 | ||
+ _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000 | ||
+ _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000 | ||
+ _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000 | ||
+ _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000 | ||
+ _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000 | ||
+ _to_py_int_float(kwargs.get("nanoseconds", 0)) | ||
) | ||
return create_timedelta(ns, "ns", out_reso) | ||
|
||
if unit in {'Y', 'y', 'M'}: | ||
if isinstance(value, str) and unit is not None: | ||
raise ValueError("unit must not be specified if the value is a str") | ||
elif unit in {"Y", "y", "M"}: | ||
raise ValueError( | ||
"Units 'M', 'Y', and 'y' are no longer supported, as they do not " | ||
"represent unambiguous timedelta values durations." | ||
) | ||
|
||
# GH 30543 if pd.Timedelta already passed, return it | ||
# check that only value is passed | ||
if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: | ||
return value | ||
elif isinstance(value, _Timedelta): | ||
value = value.value | ||
elif isinstance(value, str): | ||
if unit is not None: | ||
raise ValueError("unit must not be specified if the value is a str") | ||
if (len(value) > 0 and value[0] == 'P') or ( | ||
len(value) > 1 and value[:2] == '-P' | ||
): | ||
value = parse_iso_format_string(value) | ||
else: | ||
value = parse_timedelta_string(value) | ||
value = np.timedelta64(value) | ||
elif PyDelta_Check(value): | ||
value = convert_to_timedelta64(value, 'ns') | ||
elif is_timedelta64_object(value): | ||
value = ensure_td64ns(value) | ||
elif is_tick_object(value): | ||
value = np.timedelta64(value.nanos, 'ns') | ||
elif is_integer_object(value) or is_float_object(value): | ||
# unit=None is de-facto 'ns' | ||
unit = parse_timedelta_unit(unit) | ||
value = convert_to_timedelta64(value, unit) | ||
elif checknull_with_nat(value): | ||
return NaT | ||
else: | ||
raise ValueError( | ||
"Value must be Timedelta, string, integer, " | ||
f"float, timedelta or convertible, not {type(value).__name__}" | ||
) | ||
|
||
if is_timedelta64_object(value): | ||
value = value.view('i8') | ||
|
||
# nat | ||
if value == NPY_NAT: | ||
return NaT | ||
|
||
return _timedelta_from_value_and_reso(value, NPY_FR_ns) | ||
return create_timedelta(value, parse_timedelta_unit(unit), out_reso) | ||
|
||
def __setstate__(self, state): | ||
if len(state) == 1: | ||
|
@@ -1607,30 +1609,25 @@ class Timedelta(_Timedelta): | |
# Arithmetic Methods | ||
# TODO: Can some of these be defined in the cython class? | ||
|
||
__neg__ = _op_unary_method(lambda x: -x, '__neg__') | ||
__pos__ = _op_unary_method(lambda x: x, '__pos__') | ||
__abs__ = _op_unary_method(lambda x: abs(x), '__abs__') | ||
__neg__ = _op_unary_method(operator.neg, "__neg__") | ||
__pos__ = _op_unary_method(operator.pos, "__pos__") | ||
__abs__ = _op_unary_method(operator.abs, "__abs__") | ||
|
||
__add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') | ||
__radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') | ||
__sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') | ||
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') | ||
__add__ = _binary_op_method_timedeltalike(operator.add, "__add__") | ||
__radd__ = _binary_op_method_timedeltalike(operator.add, "__radd__") | ||
__sub__ = _binary_op_method_timedeltalike(operator.sub, "__sub__") | ||
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__") | ||
|
||
def __mul__(self, other): | ||
if is_integer_object(other) or is_float_object(other): | ||
if util.is_nan(other): | ||
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT | ||
return NaT | ||
|
||
return _timedelta_from_value_and_reso( | ||
<int64_t>(other * self.value), | ||
reso=self._reso, | ||
) | ||
|
||
elif is_array(other): | ||
if util.is_nan(other): | ||
# np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT | ||
return NaT | ||
if is_array(other): | ||
# ndarray-like | ||
return other * self.to_timedelta64() | ||
|
||
if is_integer_object(other) or is_float_object(other): | ||
# can't call Timedelta b/c it doesn't (yet) expose reso | ||
return create_timedelta(self.value * other, "ignore", self._reso) | ||
return NotImplemented | ||
|
||
__rmul__ = __mul__ | ||
|
@@ -1825,6 +1822,6 @@ cdef _broadcast_floordiv_td64( | |
|
||
|
||
# resolution in ns | ||
Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) | ||
Timedelta.max = Timedelta(np.iinfo(np.int64).max) | ||
Timedelta.min = Timedelta(NPY_NAT + 1) | ||
Timedelta.max = Timedelta(INT64_MAX) | ||
Timedelta.resolution = Timedelta(nanoseconds=1) |
Uh oh!
There was an error while loading. Please reload this page.