From 4b5831928c50979c4599363fbf67497e7c45f2b7 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 11 May 2022 12:27:16 -0700 Subject: [PATCH 01/10] extract functional updates from #46936 --- pandas/_libs/tslibs/timedeltas.pyx | 239 +++++++++++++---------------- pandas/_libs/tslibs/timestamps.pyx | 22 +-- 2 files changed, 122 insertions(+), 139 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4eb1494c4d56c..59f7ee7d0c420 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ import collections +import operator import warnings cimport cython @@ -55,6 +56,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) +from pandas._libs.util cimport INT64_MAX from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta @@ -216,13 +218,12 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: + delta.seconds * 1_000_000 + delta.microseconds ) * 1000 - except OverflowError as err: - raise OutOfBoundsTimedelta(*err.args) from err - + except OverflowError as ex: + msg = f"{delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" + raise OutOfBoundsTimedelta(msg) from ex raise TypeError(type(delta)) -@cython.overflowcheck(True) cdef object ensure_td64ns(object ts): """ Overflow-safe implementation of td64.astype("m8[ns]") @@ -241,24 +242,20 @@ cdef object ensure_td64ns(object ts): str unitstr td64_unit = get_datetime64_unit(ts) - if ( - td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns - and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC - ): - unitstr = npy_unit_to_abbrev(td64_unit) + if td64_unit == NPY_DATETIMEUNIT.NPY_FR_ns or td64_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + return ts - td64_value = get_timedelta64_value(ts) + unitstr = npy_unit_to_abbrev(td64_unit) + mult = precision_from_unit(unitstr)[0] - mult = precision_from_unit(unitstr)[0] + with cython.overflowcheck(True): try: - # NB: cython#1381 this cannot be *= - td64_value = td64_value * mult - except OverflowError as err: - raise OutOfBoundsTimedelta(ts) from err + td64_value = get_timedelta64_value(ts) * mult + except OverflowError as ex: + msg = f"{ts} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" + raise OutOfBoundsTimedelta(msg) from ex - return np.timedelta64(td64_value, "ns") - - return ts + return np.timedelta64(td64_value, "ns") cdef convert_to_timedelta64(object ts, str unit): @@ -674,8 +671,7 @@ cdef bint _validate_ops_compat(other): def _op_unary_method(func, name): def f(self): - new_value = func(self.value) - return _timedelta_from_value_and_reso(new_value, self._reso) + return create_timedelta(func(self.value), "ignore", self._reso) f.__name__ = name return f @@ -724,13 +720,7 @@ def _binary_op_method_timedeltalike(op, name): if self._reso != other._reso: raise NotImplementedError - res = op(self.value, other.value) - if res == NPY_NAT: - # e.g. test_implementation_limits - # TODO: more generally could do an overflowcheck in op? - return NaT - - return _timedelta_from_value_and_reso(res, reso=self._reso) + return create_timedelta(op(self.value, other.value), "ignore", self._reso) f.__name__ = name return f @@ -861,7 +851,7 @@ cdef _to_py_int_float(v): def _timedelta_unpickle(value, reso): - return _timedelta_from_value_and_reso(value, reso) + return create_timedelta(value, "ignore", reso) cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): @@ -892,6 +882,49 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): return td_base +@cython.overflowcheck(True) +cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso): + """ + Timedelta factory. + + Timedelta.__new__ just does arg validation (at least currently). Also, some internal + functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__ + doesn't yet expose that. + + _timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow. + """ + cdef: + int64_t out_value + + if isinstance(value, _Timedelta): + return value + + try: + # if unit == "ns", no need to create an m8[ns] just to read the (same) value back + # if unit == "ignore", assume caller wants to invoke an overflow-safe version of + # _timedelta_from_value_and_reso, and that any float rounding is acceptable + if (is_integer_object(value) or is_float_object(value)) and (in_unit == "ns" or in_unit == "ignore"): + if util.is_nan(value): + return NaT + out_value = value + elif is_timedelta64_object(value): + out_value = ensure_td64ns(value).view(np.int64) + elif isinstance(value, str): + if value.startswith(("P", "-P")): + out_value = parse_iso_format_string(value) + else: + out_value = parse_timedelta_string(value) + else: + out_value = convert_to_timedelta64(value, in_unit).view(np.int64) + except OverflowError as ex: + msg = f"{value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + if out_value == NPY_NAT: + return NaT + return _timedelta_from_value_and_reso(out_value, out_reso) + + # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any @@ -1375,7 +1408,7 @@ cdef class _Timedelta(timedelta): @classmethod def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): # exposing as classmethod for testing - return _timedelta_from_value_and_reso(value, reso) + return create_timedelta(value, "ignore", reso) # Python front end to C extension type _Timedelta @@ -1438,99 +1471,52 @@ class Timedelta(_Timedelta): We see that either way we get the same result """ - _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", - "milliseconds", "microseconds", "nanoseconds"} + _allowed_kwargs = ( + "weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds" + ) def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base + cdef: + _Timedelta td_base + NPY_DATETIMEUNIT out_reso = NPY_FR_ns + # process kwargs iff no value passed if value is _no_input: - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") - - kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} - - unsupported_kwargs = set(kwargs) - unsupported_kwargs.difference_update(cls._req_any_kwargs_new) - if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): + if not kwargs: + raise ValueError( + "cannot construct a Timedelta without a value/unit " + "or descriptive keywords (days,seconds....)" + ) + if not kwargs.keys() <= set(cls._allowed_kwargs): raise ValueError( "cannot construct a Timedelta from the passed arguments, " - "allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]" + f"allowed keywords are {cls._allowed_kwargs}" ) - - # GH43764, convert any input to nanoseconds first and then - # create the timestamp. This ensures that any potential - # nanosecond contributions from kwargs parsed as floats - # are taken into consideration. - seconds = int(( + # GH43764, convert any input to nanoseconds first, to ensure any potential + # nanosecond contributions from kwargs parsed as floats are included + kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) + ns = sum( ( - (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 - + kwargs.get('hours', 0) - ) * 3600 - + kwargs.get('minutes', 0) * 60 - + kwargs.get('seconds', 0) - ) * 1_000_000_000 - ) - - value = np.timedelta64( - int(kwargs.get('nanoseconds', 0)) - + int(kwargs.get('microseconds', 0) * 1_000) - + int(kwargs.get('milliseconds', 0) * 1_000_000) - + seconds + kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, + kwargs["days"] * 24 * 3600 * 1_000_000_000, + kwargs["hours"] * 3600 * 1_000_000_000, + kwargs["minutes"] * 60 * 1_000_000_000, + kwargs["seconds"] * 1_000_000_000, + kwargs["milliseconds"] * 1_000_000, + kwargs["microseconds"] * 1_000, + kwargs["nanoseconds"], + ) ) + return create_timedelta(ns, "ns", out_reso) - if unit in {'Y', 'y', 'M'}: + if isinstance(value, str) and unit is not None: + raise ValueError("unit must not be specified if the value is a str") + elif unit in {"Y", "y", "M"}: raise ValueError( "Units 'M', 'Y', and 'y' are no longer supported, as they do not " "represent unambiguous timedelta values durations." ) - - # GH 30543 if pd.Timedelta already passed, return it - # check that only value is passed - if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: - return value - elif isinstance(value, _Timedelta): - value = value.value - elif isinstance(value, str): - if unit is not None: - raise ValueError("unit must not be specified if the value is a str") - if (len(value) > 0 and value[0] == 'P') or ( - len(value) > 1 and value[:2] == '-P' - ): - value = parse_iso_format_string(value) - else: - value = parse_timedelta_string(value) - value = np.timedelta64(value) - elif PyDelta_Check(value): - value = convert_to_timedelta64(value, 'ns') - elif is_timedelta64_object(value): - value = ensure_td64ns(value) - elif is_tick_object(value): - value = np.timedelta64(value.nanos, 'ns') - elif is_integer_object(value) or is_float_object(value): - # unit=None is de-facto 'ns' - unit = parse_timedelta_unit(unit) - value = convert_to_timedelta64(value, unit) - elif checknull_with_nat(value): - return NaT - else: - raise ValueError( - "Value must be Timedelta, string, integer, " - f"float, timedelta or convertible, not {type(value).__name__}" - ) - - if is_timedelta64_object(value): - value = value.view('i8') - - # nat - if value == NPY_NAT: - return NaT - - return _timedelta_from_value_and_reso(value, NPY_FR_ns) + return create_timedelta(value, parse_timedelta_unit(unit), out_reso) def __setstate__(self, state): if len(state) == 1: @@ -1607,30 +1593,25 @@ class Timedelta(_Timedelta): # Arithmetic Methods # TODO: Can some of these be defined in the cython class? - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + __neg__ = _op_unary_method(operator.neg, "__neg__") + __pos__ = _op_unary_method(operator.pos, "__pos__") + __abs__ = _op_unary_method(operator.abs, "__abs__") - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + __add__ = _binary_op_method_timedeltalike(operator.add, "__add__") + __radd__ = _binary_op_method_timedeltalike(operator.add, "__radd__") + __sub__ = _binary_op_method_timedeltalike(operator.sub, "__sub__") + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__") def __mul__(self, other): - if is_integer_object(other) or is_float_object(other): - if util.is_nan(other): - # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT - return NaT - - return _timedelta_from_value_and_reso( - (other * self.value), - reso=self._reso, - ) - - elif is_array(other): + if util.is_nan(other): + # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT + return NaT + if is_array(other): # ndarray-like return other * self.to_timedelta64() - + if is_integer_object(other) or is_float_object(other): + # can't call Timedelta b/c it doesn't (yet) expose reso + return create_timedelta(self.value * other, "ignore", self._reso) return NotImplemented __rmul__ = __mul__ @@ -1825,6 +1806,6 @@ cdef _broadcast_floordiv_td64( # resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) -Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.min = Timedelta(NPY_NAT + 1) +Timedelta.max = Timedelta(INT64_MAX) Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fcc9390a2cccd..b728b68dd7dcf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -90,7 +90,10 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) from pandas._libs.tslibs.offsets cimport ( BaseOffset, @@ -435,14 +438,13 @@ cdef class _Timestamp(ABCTimestamp): # Timedelta try: return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime) as err: - if isinstance(other, _Timestamp): - if both_timestamps: - raise OutOfBoundsDatetime( - "Result is too large for pandas.Timedelta. Convert inputs " - "to datetime.datetime with 'Timestamp.to_pydatetime()' " - "before subtracting." - ) from err + except OutOfBoundsTimedelta as err: + if both_timestamps: + raise OutOfBoundsTimedelta( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass @@ -461,7 +463,7 @@ cdef class _Timestamp(ABCTimestamp): if PyDateTime_Check(other): try: return type(self)(other) - self - except (OverflowError, OutOfBoundsDatetime) as err: + except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass From e35f7be8b8486ff29c071bae9f83f6038c29b49f Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 11 May 2022 12:53:16 -0700 Subject: [PATCH 02/10] get current tests passing --- pandas/tests/scalar/conftest.py | 6 ++++++ .../tests/scalar/timedelta/test_arithmetic.py | 7 +++---- .../scalar/timedelta/test_constructors.py | 19 +++++++------------ .../tests/scalar/timedelta/test_timedelta.py | 16 ++++++++-------- .../tests/scalar/timestamp/test_arithmetic.py | 17 ++++++----------- pandas/tests/tools/test_to_datetime.py | 9 ++------- pandas/tests/tools/test_to_timedelta.py | 9 ++------- pandas/tests/tslibs/test_timedeltas.py | 12 ++---------- 8 files changed, 36 insertions(+), 59 deletions(-) create mode 100644 pandas/tests/scalar/conftest.py diff --git a/pandas/tests/scalar/conftest.py b/pandas/tests/scalar/conftest.py new file mode 100644 index 0000000000000..cc80e33c555ad --- /dev/null +++ b/pandas/tests/scalar/conftest.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.fixture(name="td_overflow_msg") +def fixture_td_overflow_msg() -> str: + return R"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 74aa7f045088e..9da65754a5251 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -98,12 +98,11 @@ def test_td_add_datetimelike_scalar(self, op): result = op(td, NaT) assert result is NaT - def test_td_add_timestamp_overflow(self): - msg = "int too (large|big) to convert" - with pytest.raises(OverflowError, match=msg): + def test_td_add_timestamp_overflow(self, td_overflow_msg: str): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - with pytest.raises(OutOfBoundsTimedelta, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timestamp("1700-01-01") + timedelta(days=13 * 19999) @pytest.mark.parametrize("op", [operator.add, ops.radd]) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 7fc7bd3a5a74d..d97c80880f663 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -13,18 +13,15 @@ ) -def test_construct_from_td64_with_unit(): +def test_construct_from_td64_with_unit(td_overflow_msg: str): # ignore the unit, as it may cause silently overflows leading to incorrect # results, and in non-overflow cases is irrelevant GH#46827 obj = np.timedelta64(123456789, "h") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): - Timedelta(obj, unit="ps") - - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(obj, unit="ns") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(obj) @@ -203,19 +200,17 @@ def test_td_from_repr_roundtrip(val): assert Timedelta(td._repr_base()) == td -def test_overflow_on_construction(): - msg = "int too (large|big) to convert" - +def test_overflow_on_construction(td_overflow_msg: str): # GH#3374 value = Timedelta("1day").value * 20169940 - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(value) # xref GH#17637 - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(7 * 19999, unit="D") - with pytest.raises(OutOfBoundsTimedelta, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(timedelta(days=13 * 19999)) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index cf7211e82b799..c02f7a1637bfd 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -13,6 +13,7 @@ NaT, iNaT, ) +from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta import pandas as pd from pandas import ( @@ -646,7 +647,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) - def test_implementation_limits(self): + def test_implementation_limits(self, td_overflow_msg: str): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) @@ -656,23 +657,22 @@ def test_implementation_limits(self): assert max_td.value == lib.i8max # Beyond lower limit, a NAT before the Overflow - assert (min_td - Timedelta(1, "ns")) is NaT + assert (min_td - Timedelta(1, "ns")) is NaT # type: ignore[comparison-overlap] - msg = "int too (large|big) to convert" - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): min_td - Timedelta(2, "ns") - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): max_td + Timedelta(1, "ns") # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, "ns") - assert td is NaT + assert td is NaT # type: ignore[comparison-overlap] - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(min_td.value - 2, "ns") - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(max_td.value + 1, "ns") def test_total_seconds_precision(self): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index b46962fb82896..c9f4373255c4b 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest from pandas._libs.tslibs import ( - OutOfBoundsDatetime, + OutOfBoundsTimedelta, Timedelta, Timestamp, offsets, @@ -33,17 +33,12 @@ def test_overflow_offset(self): expected = Timestamp("1999/09/23") assert stamp - offset_no_overflow == expected - def test_overflow_offset_raises(self): + def test_overflow_offset_raises(self, td_overflow_msg: str): # xref https://github.com/statsmodels/statsmodels/issues/3374 # ends up multiplying really large numbers which overflow stamp = Timestamp("2017-01-13 00:00:00") offset_overflow = 20169940 * offsets.Day(1) - msg = ( - "the add operation between " - r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " - "will overflow" - ) lmsg = "|".join( ["Python int too large to convert to C long", "int too big to convert"] ) @@ -51,7 +46,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg): @@ -61,12 +56,12 @@ def test_overflow_offset_raises(self): # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") - offset_overflow = to_offset("D") * 100**5 + offset_overflow = offsets.Day() * 100**5 with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg): @@ -78,7 +73,7 @@ def test_overflow_timestamp_raises(self): a = Timestamp("2101-01-01 00:00:00") b = Timestamp("1688-01-01 00:00:00") - with pytest.raises(OutOfBoundsDatetime, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=msg): a - b # but we're OK for timestamp and datetime.datetime diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7597d4345cfce..602969b4ae655 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1839,13 +1839,8 @@ def test_to_datetime_overflow(self): # gh-17637 # we are overflowing Timedelta range here - msg = "|".join( - [ - "Python int too large to convert to C long", - "long too big to convert", - "int too big to convert", - ] - ) + # a fixture exists in tests/scalar; should it be moved to a higher level? + msg = R"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 6c11ec42858c0..fcb2575f2251d 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -109,9 +109,7 @@ def test_to_timedelta_invalid_unit(self, arg): def test_to_timedelta_time(self): # time not supported ATM - msg = ( - "Value must be Timedelta, string, integer, float, timedelta or convertible" - ) + msg = "Invalid type for timedelta scalar" with pytest.raises(ValueError, match=msg): to_timedelta(time(second=1)) assert to_timedelta(time(second=1), errors="coerce") is pd.NaT @@ -264,10 +262,7 @@ def test_to_timedelta_zerodim(self, fixed_now_ts): dt64 = fixed_now_ts.to_datetime64() arg = np.array(dt64) - msg = ( - "Value must be Timedelta, string, integer, float, timedelta " - "or convertible, not datetime64" - ) + msg = "Invalid type for timedelta scalar" with pytest.raises(ValueError, match=msg): to_timedelta(arg) diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index d9e86d53f2587..d45d79b5e1896 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -1,5 +1,3 @@ -import re - import numpy as np import pytest @@ -65,14 +63,8 @@ def test_huge_nanoseconds_overflow(): "kwargs", [{"Seconds": 1}, {"seconds": 1, "Nanoseconds": 1}, {"Foo": 2}] ) def test_kwarg_assertion(kwargs): - err_message = ( - "cannot construct a Timedelta from the passed arguments, " - "allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]" - ) - - with pytest.raises(ValueError, match=re.escape(err_message)): + msg = "cannot construct a Timedelta from the passed arguments" + with pytest.raises(ValueError, match=msg): Timedelta(**kwargs) From 3c21c7bb173887da3f00a01ed1a94815bbdb72b0 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 11 May 2022 16:51:23 -0700 Subject: [PATCH 03/10] trigger rerun of timed out tests From f7ee822b0dbd74d1b50bd46098c00ec87c971737 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 11 May 2022 18:37:38 -0700 Subject: [PATCH 04/10] is_timedelta64_object behaves differently on older versions? --- pandas/_libs/tslibs/timedeltas.pyx | 4 +++- pandas/tests/scalar/timedelta/test_constructors.py | 7 +++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 59f7ee7d0c420..b2521c11a8624 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -907,7 +907,9 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res if util.is_nan(value): return NaT out_value = value - elif is_timedelta64_object(value): + # is_timedelta_64_object may not give correct results w/ some versions? + # see e.g. https://github.com/pandas-dev/pandas/runs/6397652653?check_suite_focus=true#step:11:435 + elif isinstance(value, np.timedelta64): out_value = ensure_td64ns(value).view(np.int64) elif isinstance(value, str): if value.startswith(("P", "-P")): diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index d97c80880f663..f49a66b310153 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -225,7 +225,7 @@ def test_overflow_on_construction(td_overflow_msg: str): (9223372037, "s", " seconds"), # 44 seconds ], ) -def test_construction_out_of_bounds_td64(val, unit, name): +def test_construction_out_of_bounds_td64(val, unit, name, td_overflow_msg: str): # TODO: parametrize over units just above/below the implementation bounds # once GH#38964 is resolved @@ -233,8 +233,7 @@ def test_construction_out_of_bounds_td64(val, unit, name): td64 = np.timedelta64(val, unit) assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong - msg = str(val) + name - with pytest.raises(OutOfBoundsTimedelta, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(td64) # But just back in bounds and we are OK @@ -243,7 +242,7 @@ def test_construction_out_of_bounds_td64(val, unit, name): td64 *= -1 assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong - with pytest.raises(OutOfBoundsTimedelta, match="-" + msg): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta(td64) # But just back in bounds and we are OK From 9bf564b57340621ce69945edaa0111b37a64fc36 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Fri, 13 May 2022 13:17:05 -0500 Subject: [PATCH 05/10] will this do the trick? --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b2521c11a8624..5bffb62bf3a6e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -252,7 +252,7 @@ cdef object ensure_td64ns(object ts): try: td64_value = get_timedelta64_value(ts) * mult except OverflowError as ex: - msg = f"{ts} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" + msg = f"{str(ts)} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" raise OutOfBoundsTimedelta(msg) from ex return np.timedelta64(td64_value, "ns") From f31daa8838fca9a97524a4b1c5f99014a0278a35 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 16 May 2022 13:44:02 -0700 Subject: [PATCH 06/10] remove defaultdict --- pandas/_libs/tslibs/timedeltas.pyx | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 5bffb62bf3a6e..11881262d5e05 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1479,7 +1479,6 @@ class Timedelta(_Timedelta): def __new__(cls, object value=_no_input, unit=None, **kwargs): cdef: - _Timedelta td_base NPY_DATETIMEUNIT out_reso = NPY_FR_ns # process kwargs iff no value passed @@ -1496,17 +1495,17 @@ class Timedelta(_Timedelta): ) # GH43764, convert any input to nanoseconds first, to ensure any potential # nanosecond contributions from kwargs parsed as floats are included - kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) + # kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) ns = sum( ( - kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, - kwargs["days"] * 24 * 3600 * 1_000_000_000, - kwargs["hours"] * 3600 * 1_000_000_000, - kwargs["minutes"] * 60 * 1_000_000_000, - kwargs["seconds"] * 1_000_000_000, - kwargs["milliseconds"] * 1_000_000, - kwargs["microseconds"] * 1_000, - kwargs["nanoseconds"], + _to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000, + _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000, + _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000, + _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000, + _to_py_int_float(kwargs.get("nanoseconds", 0)), ) ) return create_timedelta(ns, "ns", out_reso) From 88115628552e2ea376285034e2728db1766672ba Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 16 May 2022 13:56:58 -0700 Subject: [PATCH 07/10] use existing convention of as err, not as ex --- pandas/_libs/tslibs/timedeltas.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 11881262d5e05..685bfbd7e35d5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -218,9 +218,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: + delta.seconds * 1_000_000 + delta.microseconds ) * 1000 - except OverflowError as ex: + except OverflowError as err: msg = f"{delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" - raise OutOfBoundsTimedelta(msg) from ex + raise OutOfBoundsTimedelta(msg) from err raise TypeError(type(delta)) @@ -251,9 +251,9 @@ cdef object ensure_td64ns(object ts): with cython.overflowcheck(True): try: td64_value = get_timedelta64_value(ts) * mult - except OverflowError as ex: + except OverflowError as err: msg = f"{str(ts)} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" - raise OutOfBoundsTimedelta(msg) from ex + raise OutOfBoundsTimedelta(msg) from err return np.timedelta64(td64_value, "ns") @@ -918,9 +918,9 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res out_value = parse_timedelta_string(value) else: out_value = convert_to_timedelta64(value, in_unit).view(np.int64) - except OverflowError as ex: + except OverflowError as err: msg = f"{value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" - raise OutOfBoundsTimedelta(msg) from ex + raise OutOfBoundsTimedelta(msg) from err if out_value == NPY_NAT: return NaT From e63b8fd6726edb30ef70a513ed83605342911939 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Thu, 19 May 2022 09:57:46 -0700 Subject: [PATCH 08/10] update docstrings, wrap long lines --- pandas/_libs/tslibs/timedeltas.pyx | 57 +++++++++++++++++++----------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 685bfbd7e35d5..6d5c7704e6015 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -887,11 +887,26 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res """ Timedelta factory. - Timedelta.__new__ just does arg validation (at least currently). Also, some internal - functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__ - doesn't yet expose that. + Overflow-safe, and allows for the creation of Timedeltas with non-nano resos while + the public API for that gets hashed out (ref: GH#46587). For now, Timedelta.__new__ + just does arg validation and kwarg processing. - _timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow. + _timedelta_from_value_and_reso faster if value already an int that can be safely + cast to an int64. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, str, int, float + The same value types accepted by Timedelta.__new__ + in_unit : str + Denote the (np) unit of the input, if it's numeric + out_reso: NPY_DATETIMEUNIT + Desired resolution of new Timedelta + + Notes + ----- + Pass in_unit="ignore" (or "ns") with a numeric value to just do overflow checking + (and bypass the prior behavior of converting value -> td64[ns] -> int) """ cdef: int64_t out_value @@ -903,12 +918,14 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res # if unit == "ns", no need to create an m8[ns] just to read the (same) value back # if unit == "ignore", assume caller wants to invoke an overflow-safe version of # _timedelta_from_value_and_reso, and that any float rounding is acceptable - if (is_integer_object(value) or is_float_object(value)) and (in_unit == "ns" or in_unit == "ignore"): + if (is_integer_object(value) or is_float_object(value)) and ( + in_unit == "ns" or in_unit == "ignore" + ): if util.is_nan(value): return NaT out_value = value - # is_timedelta_64_object may not give correct results w/ some versions? - # see e.g. https://github.com/pandas-dev/pandas/runs/6397652653?check_suite_focus=true#step:11:435 + # is_timedelta_64_object may not give correct results w/ some versions? see e.g. + # github.com/pandas-dev/pandas/runs/6397652653?check_suite_focus=true#step:11:435 elif isinstance(value, np.timedelta64): out_value = ensure_td64ns(value).view(np.int64) elif isinstance(value, str): @@ -1474,7 +1491,8 @@ class Timedelta(_Timedelta): """ _allowed_kwargs = ( - "weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds" + "weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", + "nanoseconds" ) def __new__(cls, object value=_no_input, unit=None, **kwargs): @@ -1495,19 +1513,16 @@ class Timedelta(_Timedelta): ) # GH43764, convert any input to nanoseconds first, to ensure any potential # nanosecond contributions from kwargs parsed as floats are included - # kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) - ns = sum( - ( - _to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000, - _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000, - _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000, - _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000, - _to_py_int_float(kwargs.get("nanoseconds", 0)), - ) - ) + ns = sum(( + _to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000, + _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000, + _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000, + _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000, + _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000, + _to_py_int_float(kwargs.get("nanoseconds", 0)), + )) return create_timedelta(ns, "ns", out_reso) if isinstance(value, str) and unit is not None: From 702768773b6d30672e588694c7dee8fcd5a5e108 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 23 May 2022 17:52:32 -0700 Subject: [PATCH 09/10] avoid creating tuple --- pandas/_libs/tslibs/timedeltas.pyx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 6d5c7704e6015..3ce75d09f78e8 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1513,16 +1513,16 @@ class Timedelta(_Timedelta): ) # GH43764, convert any input to nanoseconds first, to ensure any potential # nanosecond contributions from kwargs parsed as floats are included - ns = sum(( - _to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000, - _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000, - _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000, - _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000, - _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000, - _to_py_int_float(kwargs.get("nanoseconds", 0)), - )) + ns = ( + _to_py_int_float(kwargs.get("weeks", 0)) * 7 * 24 * 3600 * 1_000_000_000 + + _to_py_int_float(kwargs.get("days", 0)) * 24 * 3600 * 1_000_000_000 + + _to_py_int_float(kwargs.get("hours", 0)) * 3600 * 1_000_000_000 + + _to_py_int_float(kwargs.get("minutes", 0)) * 60 * 1_000_000_000 + + _to_py_int_float(kwargs.get("seconds", 0)) * 1_000_000_000 + + _to_py_int_float(kwargs.get("milliseconds", 0)) * 1_000_000 + + _to_py_int_float(kwargs.get("microseconds", 0)) * 1_000 + + _to_py_int_float(kwargs.get("nanoseconds", 0)) + ) return create_timedelta(ns, "ns", out_reso) if isinstance(value, str) and unit is not None: From 51274d23429771e264b577b6dd45b845990c9dd0 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 24 May 2022 11:46:39 -0700 Subject: [PATCH 10/10] remove fixture --- pandas/tests/scalar/conftest.py | 6 ----- .../tests/scalar/timedelta/test_arithmetic.py | 10 +++++--- .../scalar/timedelta/test_constructors.py | 24 +++++++++++-------- .../tests/scalar/timedelta/test_timedelta.py | 18 ++++++++------ .../tests/scalar/timestamp/test_arithmetic.py | 10 +++++--- 5 files changed, 39 insertions(+), 29 deletions(-) delete mode 100644 pandas/tests/scalar/conftest.py diff --git a/pandas/tests/scalar/conftest.py b/pandas/tests/scalar/conftest.py deleted file mode 100644 index cc80e33c555ad..0000000000000 --- a/pandas/tests/scalar/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -import pytest - - -@pytest.fixture(name="td_overflow_msg") -def fixture_td_overflow_msg() -> str: - return R"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 9da65754a5251..955dc86285a20 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -22,6 +22,10 @@ import pandas._testing as tm from pandas.core import ops +TD_OVERFLOW_MSG = ( + r"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" +) + class TestTimedeltaAdditionSubtraction: """ @@ -98,11 +102,11 @@ def test_td_add_datetimelike_scalar(self, op): result = op(td, NaT) assert result is NaT - def test_td_add_timestamp_overflow(self, td_overflow_msg: str): - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + def test_td_add_timestamp_overflow(self): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timestamp("1700-01-01") + timedelta(days=13 * 19999) @pytest.mark.parametrize("op", [operator.add, ops.radd]) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index f49a66b310153..92937854679d0 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -12,16 +12,20 @@ to_timedelta, ) +TD_OVERFLOW_MSG = ( + r"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" +) + -def test_construct_from_td64_with_unit(td_overflow_msg: str): +def test_construct_from_td64_with_unit(): # ignore the unit, as it may cause silently overflows leading to incorrect # results, and in non-overflow cases is irrelevant GH#46827 obj = np.timedelta64(123456789, "h") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(obj, unit="ns") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(obj) @@ -200,17 +204,17 @@ def test_td_from_repr_roundtrip(val): assert Timedelta(td._repr_base()) == td -def test_overflow_on_construction(td_overflow_msg: str): +def test_overflow_on_construction(): # GH#3374 value = Timedelta("1day").value * 20169940 - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(value) # xref GH#17637 - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(7 * 19999, unit="D") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(timedelta(days=13 * 19999)) @@ -225,7 +229,7 @@ def test_overflow_on_construction(td_overflow_msg: str): (9223372037, "s", " seconds"), # 44 seconds ], ) -def test_construction_out_of_bounds_td64(val, unit, name, td_overflow_msg: str): +def test_construction_out_of_bounds_td64(val, unit, name): # TODO: parametrize over units just above/below the implementation bounds # once GH#38964 is resolved @@ -233,7 +237,7 @@ def test_construction_out_of_bounds_td64(val, unit, name, td_overflow_msg: str): td64 = np.timedelta64(val, unit) assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(td64) # But just back in bounds and we are OK @@ -242,7 +246,7 @@ def test_construction_out_of_bounds_td64(val, unit, name, td_overflow_msg: str): td64 *= -1 assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(td64) # But just back in bounds and we are OK diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index c02f7a1637bfd..87550022b414f 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -24,6 +24,10 @@ ) import pandas._testing as tm +TD_OVERFLOW_MSG = ( + r"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" +) + class TestNonNano: @pytest.fixture(params=[7, 8, 9]) @@ -647,7 +651,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) - def test_implementation_limits(self, td_overflow_msg: str): + def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) @@ -657,22 +661,22 @@ def test_implementation_limits(self, td_overflow_msg: str): assert max_td.value == lib.i8max # Beyond lower limit, a NAT before the Overflow - assert (min_td - Timedelta(1, "ns")) is NaT # type: ignore[comparison-overlap] + assert (min_td - Timedelta(1, "ns")) is NaT - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): min_td - Timedelta(2, "ns") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): max_td + Timedelta(1, "ns") # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, "ns") - assert td is NaT # type: ignore[comparison-overlap] + assert td is NaT - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(min_td.value - 2, "ns") - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): Timedelta(max_td.value + 1, "ns") def test_total_seconds_precision(self): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index c9f4373255c4b..5475b0d9042e1 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -17,6 +17,10 @@ import pandas._testing as tm +TD_OVERFLOW_MSG = ( + r"outside allowed range \[-9223372036854775807ns, 9223372036854775807ns\]" +) + class TestTimestampArithmetic: def test_overflow_offset(self): @@ -33,7 +37,7 @@ def test_overflow_offset(self): expected = Timestamp("1999/09/23") assert stamp - offset_no_overflow == expected - def test_overflow_offset_raises(self, td_overflow_msg: str): + def test_overflow_offset_raises(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 # ends up multiplying really large numbers which overflow @@ -46,7 +50,7 @@ def test_overflow_offset_raises(self, td_overflow_msg: str): with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg): @@ -61,7 +65,7 @@ def test_overflow_offset_raises(self, td_overflow_msg: str): with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + with pytest.raises(OutOfBoundsTimedelta, match=TD_OVERFLOW_MSG): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg):