From e0e6c340a62f76554e5806b50baa839e649477b4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Feb 2023 17:09:49 -0800 Subject: [PATCH 1/3] BUG: Timestamp.round overflows --- doc/source/whatsnew/v2.0.0.rst | 3 + pandas/_libs/tslibs/fields.pyx | 38 ++++-- pandas/_libs/tslibs/timedeltas.pyx | 7 +- pandas/_libs/tslibs/timestamps.pyx | 8 +- .../tests/scalar/timedelta/test_timedelta.py | 103 +++++++++++----- .../tests/scalar/timestamp/test_unary_ops.py | 110 +++++++++++------- 6 files changed, 192 insertions(+), 77 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ac7d30310be9e..b63ca1bbcd599 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1205,6 +1205,8 @@ Datetimelike - Bug in :func:`to_datetime` with both ``unit`` and ``origin`` specified returning incorrect results (:issue:`42624`) - Bug in :meth:`GroupBy.quantile` with datetime or timedelta dtypes giving incorrect results for groups containing ``NaT`` (:issue:`51373`) - Bug in :meth:`Groupby.quantile` incorrectly raising with :class:`PeriodDtype` or :class:`DatetimeTZDtype` (:issue:`51373`) +- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` + - Timedelta @@ -1212,6 +1214,7 @@ Timedelta - Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`) - Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`) - Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`) +- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` - Timezones diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 242e7159d29b5..2f1fb7fc44b2f 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -704,7 +704,7 @@ cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value + int64_t res, value, remainder with cython.overflowcheck(True): for i in range(n): @@ -732,6 +732,34 @@ cdef ndarray[int64_t] _roundup_int64(values, int64_t unit): return _floor_int64(values + unit // 2, unit) +cdef ndarray[int64_t] _round_nearest_int64(const int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value, half, remainder, quotient + + half = unit // 2 + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + + if value == NPY_NAT: + res = NPY_NAT + else: + quotient, remainder = divmod(value, unit) + if remainder > half: + res = value + (unit - remainder) + elif remainder == half and quotient % 2: + res = value + (unit - remainder) + else: + res = value - remainder + + result[i] = res + + return result + + def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: """ Applies rounding mode at given frequency @@ -762,13 +790,7 @@ def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray: # for odd unit there is no need of a tie break if unit % 2: return _rounddown_int64(values, unit) - quotient, remainder = np.divmod(values, unit) - mask = np.logical_or( - remainder > (unit // 2), - np.logical_and(remainder == (unit // 2), quotient % 2) - ) - quotient[mask] += 1 - return quotient * unit + return _round_nearest_int64(values, unit) # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a8c5474f0f532..2eee49a869ef0 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1823,7 +1823,12 @@ class Timedelta(_Timedelta): unit = delta_to_nanoseconds(to_offset(freq), self._creso) arr = np.array([self._value], dtype="i8") - result = round_nsint64(arr, mode, unit)[0] + try: + result = round_nsint64(arr, mode, unit)[0] + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot round {self} to freq={freq} without overflow" + ) from err return Timedelta._from_value_and_reso(result, self._creso) def round(self, freq): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 33e8344c79d6c..59fdad40dc9c6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1692,7 +1692,13 @@ class Timestamp(_Timestamp): value = np.array([value], dtype=np.int64) # Will only ever contain 1 element for timestamp - r = round_nsint64(value, mode, nanos)[0] + try: + r = round_nsint64(value, mode, nanos)[0] + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Cannot round {self} to freq={freq} without overflow" + ) from err + result = Timestamp._from_value_and_reso(r, self._creso, None) if self.tz is not None: result = result.tz_localize( diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 47ee81c506a87..7046055b5c0d5 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -698,56 +698,103 @@ def test_round_implementation_bounds(self): expected = Timedelta.max - Timedelta(854775807) assert result == expected - with pytest.raises(OverflowError, match="value too large"): + msg = ( + r"Cannot round -106752 days \+00:12:43.145224193 to freq=s without overflow" + ) + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.min.floor("s") + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta.min.round("s") - # the second message here shows up in windows builds - msg = "|".join( - ["Python int too large to convert to C long", "int too big to convert"] - ) - with pytest.raises(OverflowError, match=msg): + msg = "Cannot round 106751 days 23:47:16.854775807 to freq=s without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta.max.ceil("s") + with pytest.raises(OutOfBoundsTimedelta, match=msg): + Timedelta.max.round("s") - @pytest.mark.xfail(reason="Failing on builds", strict=False) @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max)) @pytest.mark.parametrize( "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] ) def test_round_sanity(self, val, method): - val = np.int64(val) - td = Timedelta(val) + cls = Timedelta + err_cls = OutOfBoundsTimedelta - assert method(td, "ns") == td + val = np.int64(val) + td = cls(val) + + def checker(ts, nanos, unit): + # First check that we do raise in cases where we should + if nanos == 1: + pass + else: + div, mod = divmod(ts._value, nanos) + diff = int(nanos - mod) + lb = ts._value - mod + assert lb <= ts._value # i.e. no overflows with python ints + ub = ts._value + diff + assert ub > ts._value # i.e. no overflows with python ints + + msg = "without overflow" + if mod == 0: + # We should never be raising in this + pass + elif method is cls.ceil: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + elif method is cls.floor: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if mod >= diff: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + + res = method(ts, unit) + + td = res - ts + diff = abs(td._value) + assert diff < nanos + assert res._value % nanos == 0 + + if method is cls.round: + assert diff <= nanos / 2 + elif method is cls.floor: + assert res <= ts + elif method is cls.ceil: + assert res >= ts + + nanos = 1 + checker(td, nanos, "ns") - res = method(td, "us") nanos = 1000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "us") - res = method(td, "ms") nanos = 1_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "ms") - res = method(td, "s") nanos = 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "s") - res = method(td, "min") nanos = 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "min") - res = method(td, "h") nanos = 60 * 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "h") - res = method(td, "D") nanos = 24 * 60 * 60 * 1_000_000_000 - assert np.abs((res - td).value) < nanos - assert res.value % nanos == 0 + checker(td, nanos, "D") @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) def test_round_non_nano(self, unit): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index d37fb927bdd96..5a082fb91a03e 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -294,71 +294,103 @@ def test_round_implementation_bounds(self): expected = Timestamp.max - Timedelta(854775807) assert result == expected - with pytest.raises(OverflowError, match="value too large"): + msg = "Cannot round 1677-09-21 00:12:43.145224193 to freq=" + with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.min.floor("s") - # the second message here shows up in windows builds - msg = "|".join( - ["Python int too large to convert to C long", "int too big to convert"] - ) - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.min.round("s") + + msg = "Cannot round 2262-04-11 23:47:16.854775807 to freq=" + with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.max.ceil("s") - @pytest.mark.xfail(reason="Failing on builds", strict=False) + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp.max.round("s") + @given(val=st.integers(iNaT + 1, lib.i8max)) @pytest.mark.parametrize( "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] ) def test_round_sanity(self, val, method): - val = np.int64(val) - ts = Timestamp(val) + cls = Timestamp + err_cls = OutOfBoundsDatetime - def checker(res, ts, nanos): - if method is Timestamp.round: - diff = np.abs((res - ts)._value) + val = np.int64(val) + ts = cls(val) + + def checker(ts, nanos, unit): + # First check that we do raise in cases where we should + if nanos == 1: + pass + else: + div, mod = divmod(ts._value, nanos) + diff = int(nanos - mod) + lb = ts._value - mod + assert lb <= ts._value # i.e. no overflows with python ints + ub = ts._value + diff + assert ub > ts._value # i.e. no overflows with python ints + + msg = "without overflow" + if mod == 0: + # We should never be raising in this + pass + elif method is cls.ceil: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + elif method is cls.floor: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if mod >= diff: + if ub > cls.max._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + else: + if lb < cls.min._value: + with pytest.raises(err_cls, match=msg): + method(ts, unit) + return + + res = method(ts, unit) + + td = res - ts + diff = abs(td._value) + assert diff < nanos + assert res._value % nanos == 0 + + if method is cls.round: assert diff <= nanos / 2 - elif method is Timestamp.floor: + elif method is cls.floor: assert res <= ts - elif method is Timestamp.ceil: + elif method is cls.ceil: assert res >= ts - assert method(ts, "ns") == ts + nanos = 1 + checker(ts, nanos, "ns") - res = method(ts, "us") nanos = 1000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "us") - res = method(ts, "ms") nanos = 1_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "ms") - res = method(ts, "s") nanos = 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "s") - res = method(ts, "min") nanos = 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "min") - res = method(ts, "h") nanos = 60 * 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "h") - res = method(ts, "D") nanos = 24 * 60 * 60 * 1_000_000_000 - assert np.abs((res - ts)._value) < nanos - assert res._value % nanos == 0 - checker(res, ts, nanos) + checker(ts, nanos, "D") # -------------------------------------------------------------- # Timestamp.replace From ff7cb75745a5ecca9cca2310a8192bc22b3a0e6f Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Feb 2023 17:10:53 -0800 Subject: [PATCH 2/3] GH ref --- doc/source/whatsnew/v2.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b63ca1bbcd599..322f88c58566e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1205,7 +1205,7 @@ Datetimelike - Bug in :func:`to_datetime` with both ``unit`` and ``origin`` specified returning incorrect results (:issue:`42624`) - Bug in :meth:`GroupBy.quantile` with datetime or timedelta dtypes giving incorrect results for groups containing ``NaT`` (:issue:`51373`) - Bug in :meth:`Groupby.quantile` incorrectly raising with :class:`PeriodDtype` or :class:`DatetimeTZDtype` (:issue:`51373`) -- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` +- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - @@ -1214,7 +1214,7 @@ Timedelta - Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`) - Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`) - Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`) -- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` +- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`) - Timezones From fef3189225ffb812b80d8f3628de0575cb54965e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Feb 2023 13:40:33 -0800 Subject: [PATCH 3/3] move whatsnew to 2.1.0 --- doc/source/whatsnew/v2.0.0.rst | 2 -- doc/source/whatsnew/v2.1.0.rst | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f0fc6f9703f7b..f303cadcb8507 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1217,14 +1217,12 @@ Datetimelike - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` when converting an object-dtype object containing timezone-aware datetimes or strings to ``datetime64[ns]`` incorrectly localizing as UTC instead of raising ``TypeError`` (:issue:`50140`) - Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` with datetime or timedelta dtypes giving incorrect results for groups containing ``NaT`` (:issue:`51373`) - Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` incorrectly raising with :class:`PeriodDtype` or :class:`DatetimeTZDtype` (:issue:`51373`) -- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) Timedelta ^^^^^^^^^ - Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`) - Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`) - Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`) -- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`) Timezones ^^^^^^^^^ diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3b24310014ff8..dd2bee58b1720 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -118,12 +118,12 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- +- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Timedelta ^^^^^^^^^ -- +- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`) - Timezones