diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 71af2ff650a01..7199a23876b8e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -436,6 +436,7 @@ Other API changes The ``auth_local_webserver = False`` option is planned to stop working in October 2022. (:issue:`46312`) - :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`) +- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`) - .. --------------------------------------------------------------------------- @@ -736,6 +737,7 @@ Timedelta ^^^^^^^^^ - Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`) - Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`) +- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`) - Time Zones diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 28a6480f368d9..d8a069245f9d7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -316,6 +316,8 @@ cdef convert_to_timedelta64(object ts, str unit): Return an ns based int64 """ + # Caller is responsible for checking unit not in ["Y", "y", "M"] + if checknull_with_nat(ts): return np.timedelta64(NPY_NAT, "ns") elif isinstance(ts, _Timedelta): @@ -329,17 +331,9 @@ cdef convert_to_timedelta64(object ts, str unit): if ts == NPY_NAT: return np.timedelta64(NPY_NAT, "ns") else: - if unit in ["Y", "M", "W"]: - ts = np.timedelta64(ts, unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts, "ns") + ts = _maybe_cast_from_unit(ts, unit) elif is_float_object(ts): - if unit in ["Y", "M", "W"]: - ts = np.timedelta64(int(ts), unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts, "ns") + ts = _maybe_cast_from_unit(ts, unit) elif isinstance(ts, str): if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): ts = parse_iso_format_string(ts) @@ -356,6 +350,20 @@ cdef convert_to_timedelta64(object ts, str unit): return ts.astype("timedelta64[ns]") +cdef _maybe_cast_from_unit(ts, str unit): + # caller is responsible for checking + # assert unit not in ["Y", "y", "M"] + try: + ts = cast_from_unit(ts, unit) + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot cast {ts} from {unit} to 'ns' without overflow." + ) from err + + ts = np.timedelta64(ts, "ns") + return ts + + @cython.boundscheck(False) @cython.wraparound(False) def array_to_timedelta64( @@ -370,6 +378,8 @@ def array_to_timedelta64( ------- np.ndarray[timedelta64ns] """ + # Caller is responsible for checking + assert unit not in ["Y", "y", "M"] cdef: Py_ssize_t i, n = values.size @@ -652,24 +662,20 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): cdef: str n - try: - unit = ''.join(unit) - - if unit in ["M", "Y", "y"]: - warnings.warn( - "Units 'M', 'Y' and 'y' do not represent unambiguous " - "timedelta values and will be removed in a future version.", - FutureWarning, - stacklevel=2, - ) + unit = ''.join(unit) + if unit in ["M", "Y", "y"]: + warnings.warn( + "Units 'M', 'Y' and 'y' do not represent unambiguous " + "timedelta values and will be removed in a future version.", + FutureWarning, + stacklevel=3, + ) - if unit == 'M': - # To parse ISO 8601 string, 'M' should be treated as minute, - # not month - unit = 'm' - unit = parse_timedelta_unit(unit) - except KeyError: - raise ValueError(f"invalid abbreviation: {unit}") + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) @@ -696,7 +702,7 @@ cpdef inline str parse_timedelta_unit(str unit): return unit try: return timedelta_abbrevs[unit.lower()] - except (KeyError, AttributeError): + except KeyError: raise ValueError(f"invalid unit abbreviation: {unit}") # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 67aae23f7fdd1..c25a8687ba33e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -90,7 +90,10 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) from pandas._libs.tslibs.offsets cimport ( BaseOffset, @@ -455,7 +458,7 @@ cdef class _Timestamp(ABCTimestamp): # Timedelta try: return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime) as err: + except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: if isinstance(other, _Timestamp): if both_timestamps: raise OutOfBoundsDatetime( diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 793bddee7f3cc..36e8e44e2034f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -270,6 +270,8 @@ def _from_sequence_not_strict( if dtype: _validate_td64_dtype(dtype) + assert unit not in ["Y", "y", "M"] # caller is responsible for checking + explicit_none = freq is None freq = freq if freq is not lib.no_default else None @@ -923,6 +925,8 @@ def sequence_to_td64ns( errors to be ignored; they are caught and subsequently ignored at a higher level. """ + assert unit not in ["Y", "y", "M"] # caller is responsible for checking + inferred_freq = None if unit is not None: unit = parse_timedelta_unit(unit) @@ -954,7 +958,7 @@ def sequence_to_td64ns( # Convert whatever we have into timedelta64[ns] dtype if is_object_dtype(data.dtype) or is_string_dtype(data.dtype): # no need to make a copy, need to convert if string-dtyped - data = objects_to_td64ns(data, unit=unit, errors=errors) + data = _objects_to_td64ns(data, unit=unit, errors=errors) copy = False elif is_integer_dtype(data.dtype): @@ -1032,7 +1036,7 @@ def ints_to_td64ns(data, unit="ns"): return data, copy_made -def objects_to_td64ns(data, unit=None, errors="raise"): +def _objects_to_td64ns(data, unit=None, errors="raise"): """ Convert a object-dtyped or string-dtyped array into an timedelta64[ns]-dtyped array. diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index c9e096bb3678e..614245ec7a93e 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -99,8 +99,8 @@ def test_td_add_datetimelike_scalar(self, op): assert result is NaT def test_td_add_timestamp_overflow(self): - msg = "int too (large|big) to convert" - with pytest.raises(OverflowError, match=msg): + msg = "Cannot cast 259987 from D to 'ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow" diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 3a12fa031545b..5b2438ec30f3a 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -13,6 +13,15 @@ ) +def test_construct_with_weeks_unit_overflow(): + # GH#47268 don't silently wrap around + with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): + Timedelta(1000000000000000000, unit="W") + + with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): + Timedelta(1000000000000000000.0, unit="W") + + def test_construct_from_td64_with_unit(): # ignore the unit, as it may cause silently overflows leading to incorrect # results, and in non-overflow cases is irrelevant GH#46827 @@ -204,15 +213,15 @@ def test_td_from_repr_roundtrip(val): def test_overflow_on_construction(): - msg = "int too (large|big) to convert" - # GH#3374 value = Timedelta("1day").value * 20169940 - with pytest.raises(OverflowError, match=msg): + msg = "Cannot cast 1742682816000000000000 from ns to 'ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(value) # xref GH#17637 - with pytest.raises(OverflowError, match=msg): + msg = "Cannot cast 139993 from D to 'ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(7 * 19999, unit="D") msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 5ef69075c5e2f..90c090d816c9d 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -744,10 +744,12 @@ def test_implementation_limits(self): td = Timedelta(min_td.value - 1, "ns") assert td is NaT - with pytest.raises(OverflowError, match=msg): + msg = "Cannot cast -9223372036854775809 from ns to 'ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(min_td.value - 2, "ns") - with pytest.raises(OverflowError, match=msg): + msg = "Cannot cast 9223372036854775808 from ns to 'ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(max_td.value + 1, "ns") def test_total_seconds_precision(self):