diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3b24310014ff8..dd2bee58b1720 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -118,12 +118,12 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- +- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Timedelta ^^^^^^^^^ -- +- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`) - Timezones diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 242e7159d29b5..2f1fb7fc44b2f 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -704,7 +704,7 @@ cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value + int64_t res, value, remainder with cython.overflowcheck(True): for i in range(n): @@ -732,6 +732,34 @@ cdef ndarray[int64_t] _roundup_int64(values, int64_t unit): return _floor_int64(values + unit // 2, unit) +cdef ndarray[int64_t] _round_nearest_int64(const int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value, half, remainder, quotient + + half = unit // 2 + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + + if value == NPY_NAT: + res = NPY_NAT + else: + quotient, remainder = divmod(value, unit) + if remainder > half: + res = value + (unit - remainder) + elif remainder == half and quotient % 2: + res = value + (unit - remainder) + else: + res = value - remainder + + result[i] = res + + return result + + def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: """ Applies rounding mode at given frequency @@ -762,13 +790,7 @@ def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: # for odd unit there is no need of a tie break if unit % 2: return _rounddown_int64(values, unit) - quotient, remainder = np.divmod(values, unit) - mask = np.logical_or( - remainder > (unit // 2), - np.logical_and(remainder == (unit // 2), quotient % 2) - ) - quotient[mask] += 1 - return quotient * unit + return _round_nearest_int64(values, unit) # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2a62b3193cfab..d68fea7d08b2a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1824,7 +1824,12 @@ class Timedelta(_Timedelta): unit = delta_to_nanoseconds(to_offset(freq), self._creso) arr = np.array([self._value], dtype="i8") - result = round_nsint64(arr, mode, unit)[0] + try: + result = round_nsint64(arr, mode, unit)[0] + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot round {self} to freq={freq} without overflow" + ) from err return Timedelta._from_value_and_reso(result, self._creso) def round(self, freq): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 33e8344c79d6c..59fdad40dc9c6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1692,7 +1692,13 @@ class Timestamp(_Timestamp): value = np.array([value], dtype=np.int64) # Will only ever contain 1 element for timestamp - r = round_nsint64(value, mode, nanos)[0] + try: + r = round_nsint64(value, mode, nanos)[0] + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Cannot round {self} to freq={freq} without overflow" + ) from err + result = Timestamp._from_value_and_reso(r, self._creso, None) if self.tz is not None: result = result.tz_localize( diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 47ee81c506a87..7046055b5c0d5 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -698,56 +698,103 @@ def test_round_implementation_bounds(self): expected = Timedelta.max - Timedelta(854775807) assert result == expected - with pytest.raises(OverflowError, match="value too large"): + msg = ( + r"Cannot round -106752 days \+00:12:43.145224193 to freq=s without overflow" + ) + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.min.floor("s") + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta.min.round("s") - # the second message here shows up in windows builds - msg = "|".join( - ["Python int too large to convert to C long", "int too big to convert"] - ) - with pytest.raises(OverflowError, match=msg): + msg = "Cannot round 106751 days 23:47:16.854775807 to freq=s without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.max.ceil("s") + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta.max.round("s") - @pytest.mark.xfail(reason="Failing on builds", strict=False) @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) @pytest.mark.parametrize( "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] ) def test_round_sanity(self, val, method): - val = np.int64(val) - td = Timedelta(val) + cls = Timedelta + err_cls = OutOfBoundsTimedelta - assert method(td, "ns") == td + val = np.int64(val) + td = cls(val) + + def checker(ts, nanos, unit): + # First check that we do raise in cases where we should + if nanos == 1: + pass + else: + div, mod = divmod(ts._value, nanos) + diff = int(nanos - mod) + lb = ts._value - mod + assert lb <= ts._value # i.e. no overflows with python ints + ub = ts._value + diff + assert ub > ts._value # i.e. no overflows with python ints + + msg = "without overflow" + if mod == 0: + # We should never be raising in this + pass + elif method is cls.ceil: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + elif method is cls.floor: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if mod >= diff: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + + res = method(ts, unit) + + td = res - ts + diff = abs(td._value) + assert diff < nanos + assert res._value % nanos == 0 + + if method is cls.round: + assert diff <= nanos / 2 + elif method is cls.floor: + assert res <= ts + elif method is cls.ceil: + assert res >= ts + + nanos = 1 + checker(td, nanos, "ns") - res = method(td, "us") nanos = 1000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "us") - res = method(td, "ms") nanos = 1_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "ms") - res = method(td, "s") nanos = 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "s") - res = method(td, "min") nanos = 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "min") - res = method(td, "h") nanos = 60 * 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "h") - res = method(td, "D") nanos = 24 * 60 * 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "D") @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) def test_round_non_nano(self, unit): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index d37fb927bdd96..5a082fb91a03e 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -294,71 +294,103 @@ def test_round_implementation_bounds(self): expected = Timestamp.max - Timedelta(854775807) assert result == expected - with pytest.raises(OverflowError, match="value too large"): + msg = "Cannot round 1677-09-21 00:12:43.145224193 to freq=" + with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.min.floor("s") - # the second message here shows up in windows builds - msg = "|".join( - ["Python int too large to convert to C long", "int too big to convert"] - ) - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.min.round("s") + + msg = "Cannot round 2262-04-11 23:47:16.854775807 to freq=" + with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.max.ceil("s") - @pytest.mark.xfail(reason="Failing on builds", strict=False) + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.max.round("s") + @given(val=st.integers(iNaT + 1, lib.i8max)) @pytest.mark.parametrize( "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] ) def test_round_sanity(self, val, method): - val = np.int64(val) - ts = Timestamp(val) + cls = Timestamp + err_cls = OutOfBoundsDatetime - def checker(res, ts, nanos): - if method is Timestamp.round: - diff = np.abs((res - ts)._value) + val = np.int64(val) + ts = cls(val) + + def checker(ts, nanos, unit): + # First check that we do raise in cases where we should + if nanos == 1: + pass + else: + div, mod = divmod(ts._value, nanos) + diff = int(nanos - mod) + lb = ts._value - mod + assert lb <= ts._value # i.e. no overflows with python ints + ub = ts._value + diff + assert ub > ts._value # i.e. no overflows with python ints + + msg = "without overflow" + if mod == 0: + # We should never be raising in this + pass + elif method is cls.ceil: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + elif method is cls.floor: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if mod >= diff: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + + res = method(ts, unit) + + td = res - ts + diff = abs(td._value) + assert diff < nanos + assert res._value % nanos == 0 + + if method is cls.round: assert diff <= nanos / 2 - elif method is Timestamp.floor: + elif method is cls.floor: assert res <= ts - elif method is Timestamp.ceil: + elif method is cls.ceil: assert res >= ts - assert method(ts, "ns") == ts + nanos = 1 + checker(ts, nanos, "ns") - res = method(ts, "us") nanos = 1000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "us") - res = method(ts, "ms") nanos = 1_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "ms") - res = method(ts, "s") nanos = 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "s") - res = method(ts, "min") nanos = 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "min") - res = method(ts, "h") nanos = 60 * 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "h") - res = method(ts, "D") nanos = 24 * 60 * 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "D") # -------------------------------------------------------------- # Timestamp.replace