BUG: Timestamp.round overflows (#51494)

jbrockmendel · web-flow · commit efe896c0813a · 2023-02-24T18:13:02.000-08:00
* BUG: Timestamp.round overflows

* GH ref

* move whatsnew to 2.1.0
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -118,12 +118,12 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
--
+- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
 -
 
 Timedelta
 ^^^^^^^^^
--
+- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`)
 -
 
 Timezones
diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -704,7 +704,7 @@ cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit):
     cdef:
         Py_ssize_t i, n = len(values)
         ndarray[int64_t] result = np.empty(n, dtype="i8")
-        int64_t res, value
+        int64_t res, value, remainder
 
     with cython.overflowcheck(True):
         for i in range(n):
@@ -732,6 +732,34 @@ cdef ndarray[int64_t] _roundup_int64(values, int64_t unit):
     return _floor_int64(values + unit // 2, unit)
 
 
+cdef ndarray[int64_t] _round_nearest_int64(const int64_t[:] values, int64_t unit):
+    cdef:
+        Py_ssize_t i, n = len(values)
+        ndarray[int64_t] result = np.empty(n, dtype="i8")
+        int64_t res, value, half, remainder, quotient
+
+    half = unit // 2
+
+    with cython.overflowcheck(True):
+        for i in range(n):
+            value = values[i]
+
+            if value == NPY_NAT:
+                res = NPY_NAT
+            else:
+                quotient, remainder = divmod(value, unit)
+                if remainder > half:
+                    res = value + (unit - remainder)
+                elif remainder == half and quotient % 2:
+                    res = value + (unit - remainder)
+                else:
+                    res = value - remainder
+
+            result[i] = res
+
+    return result
+
+
 def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
     """
     Applies rounding mode at given frequency
@@ -762,13 +790,7 @@ def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
         # for odd unit there is no need of a tie break
         if unit % 2:
             return _rounddown_int64(values, unit)
-        quotient, remainder = np.divmod(values, unit)
-        mask = np.logical_or(
-            remainder > (unit // 2),
-            np.logical_and(remainder == (unit // 2), quotient % 2)
-        )
-        quotient[mask] += 1
-        return quotient * unit
+        return _round_nearest_int64(values, unit)
 
     # if/elif above should catch all rounding modes defined in enum 'RoundTo':
     # if flow of control arrives here, it is a bug
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1824,7 +1824,12 @@ class Timedelta(_Timedelta):
         unit = delta_to_nanoseconds(to_offset(freq), self._creso)
 
         arr = np.array([self._value], dtype="i8")
-        result = round_nsint64(arr, mode, unit)[0]
+        try:
+            result = round_nsint64(arr, mode, unit)[0]
+        except OverflowError as err:
+            raise OutOfBoundsTimedelta(
+                f"Cannot round {self} to freq={freq} without overflow"
+            ) from err
         return Timedelta._from_value_and_reso(result, self._creso)
 
     def round(self, freq):
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -1692,7 +1692,13 @@ class Timestamp(_Timestamp):
         value = np.array([value], dtype=np.int64)
 
         # Will only ever contain 1 element for timestamp
-        r = round_nsint64(value, mode, nanos)[0]
+        try:
+            r = round_nsint64(value, mode, nanos)[0]
+        except OverflowError as err:
+            raise OutOfBoundsDatetime(
+                f"Cannot round {self} to freq={freq} without overflow"
+            ) from err
+
         result = Timestamp._from_value_and_reso(r, self._creso, None)
         if self.tz is not None:
             result = result.tz_localize(
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -698,56 +698,103 @@ def test_round_implementation_bounds(self):
         expected = Timedelta.max - Timedelta(854775807)
         assert result == expected
 
-        with pytest.raises(OverflowError, match="value too large"):
+        msg = (
+            r"Cannot round -106752 days \+00:12:43.145224193 to freq=s without overflow"
+        )
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timedelta.min.floor("s")
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
+            Timedelta.min.round("s")
 
-        # the second message here shows up in windows builds
-        msg = "|".join(
-            ["Python int too large to convert to C long", "int too big to convert"]
-        )
-        with pytest.raises(OverflowError, match=msg):
+        msg = "Cannot round 106751 days 23:47:16.854775807 to freq=s without overflow"
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timedelta.max.ceil("s")
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
+            Timedelta.max.round("s")
 
-    @pytest.mark.xfail(reason="Failing on builds", strict=False)
     @given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max))
     @pytest.mark.parametrize(
         "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil]
     )
     def test_round_sanity(self, val, method):
-        val = np.int64(val)
-        td = Timedelta(val)
+        cls = Timedelta
+        err_cls = OutOfBoundsTimedelta
 
-        assert method(td, "ns") == td
+        val = np.int64(val)
+        td = cls(val)
+
+        def checker(ts, nanos, unit):
+            # First check that we do raise in cases where we should
+            if nanos == 1:
+                pass
+            else:
+                div, mod = divmod(ts._value, nanos)
+                diff = int(nanos - mod)
+                lb = ts._value - mod
+                assert lb <= ts._value  # i.e. no overflows with python ints
+                ub = ts._value + diff
+                assert ub > ts._value  # i.e. no overflows with python ints
+
+                msg = "without overflow"
+                if mod == 0:
+                    # We should never be raising in this
+                    pass
+                elif method is cls.ceil:
+                    if ub > cls.max._value:
+                        with pytest.raises(err_cls, match=msg):
+                            method(ts, unit)
+                        return
+                elif method is cls.floor:
+                    if lb < cls.min._value:
+                        with pytest.raises(err_cls, match=msg):
+                            method(ts, unit)
+                        return
+                else:
+                    if mod >= diff:
+                        if ub > cls.max._value:
+                            with pytest.raises(err_cls, match=msg):
+                                method(ts, unit)
+                            return
+                    else:
+                        if lb < cls.min._value:
+                            with pytest.raises(err_cls, match=msg):
+                                method(ts, unit)
+                            return
+
+            res = method(ts, unit)
+
+            td = res - ts
+            diff = abs(td._value)
+            assert diff < nanos
+            assert res._value % nanos == 0
+
+            if method is cls.round:
+                assert diff <= nanos / 2
+            elif method is cls.floor:
+                assert res <= ts
+            elif method is cls.ceil:
+                assert res >= ts
+
+        nanos = 1
+        checker(td, nanos, "ns")
 
-        res = method(td, "us")
         nanos = 1000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "us")
 
-        res = method(td, "ms")
         nanos = 1_000_000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "ms")
 
-        res = method(td, "s")
         nanos = 1_000_000_000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "s")
 
-        res = method(td, "min")
         nanos = 60 * 1_000_000_000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "min")
 
-        res = method(td, "h")
         nanos = 60 * 60 * 1_000_000_000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "h")
 
-        res = method(td, "D")
         nanos = 24 * 60 * 60 * 1_000_000_000
-        assert np.abs((res - td).value) < nanos
-        assert res.value % nanos == 0
+        checker(td, nanos, "D")
 
     @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
     def test_round_non_nano(self, unit):
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -294,71 +294,103 @@ def test_round_implementation_bounds(self):
         expected = Timestamp.max - Timedelta(854775807)
         assert result == expected
 
-        with pytest.raises(OverflowError, match="value too large"):
+        msg = "Cannot round 1677-09-21 00:12:43.145224193 to freq=<Second>"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
             Timestamp.min.floor("s")
 
-        # the second message here shows up in windows builds
-        msg = "|".join(
-            ["Python int too large to convert to C long", "int too big to convert"]
-        )
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            Timestamp.min.round("s")
+
+        msg = "Cannot round 2262-04-11 23:47:16.854775807 to freq=<Second>"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
             Timestamp.max.ceil("s")
 
-    @pytest.mark.xfail(reason="Failing on builds", strict=False)
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            Timestamp.max.round("s")
+
     @given(val=st.integers(iNaT + 1, lib.i8max))
     @pytest.mark.parametrize(
         "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
     )
     def test_round_sanity(self, val, method):
-        val = np.int64(val)
-        ts = Timestamp(val)
+        cls = Timestamp
+        err_cls = OutOfBoundsDatetime
 
-        def checker(res, ts, nanos):
-            if method is Timestamp.round:
-                diff = np.abs((res - ts)._value)
+        val = np.int64(val)
+        ts = cls(val)
+
+        def checker(ts, nanos, unit):
+            # First check that we do raise in cases where we should
+            if nanos == 1:
+                pass
+            else:
+                div, mod = divmod(ts._value, nanos)
+                diff = int(nanos - mod)
+                lb = ts._value - mod
+                assert lb <= ts._value  # i.e. no overflows with python ints
+                ub = ts._value + diff
+                assert ub > ts._value  # i.e. no overflows with python ints
+
+                msg = "without overflow"
+                if mod == 0:
+                    # We should never be raising in this
+                    pass
+                elif method is cls.ceil:
+                    if ub > cls.max._value:
+                        with pytest.raises(err_cls, match=msg):
+                            method(ts, unit)
+                        return
+                elif method is cls.floor:
+                    if lb < cls.min._value:
+                        with pytest.raises(err_cls, match=msg):
+                            method(ts, unit)
+                        return
+                else:
+                    if mod >= diff:
+                        if ub > cls.max._value:
+                            with pytest.raises(err_cls, match=msg):
+                                method(ts, unit)
+                            return
+                    else:
+                        if lb < cls.min._value:
+                            with pytest.raises(err_cls, match=msg):
+                                method(ts, unit)
+                            return
+
+            res = method(ts, unit)
+
+            td = res - ts
+            diff = abs(td._value)
+            assert diff < nanos
+            assert res._value % nanos == 0
+
+            if method is cls.round:
                 assert diff <= nanos / 2
-            elif method is Timestamp.floor:
+            elif method is cls.floor:
                 assert res <= ts
-            elif method is Timestamp.ceil:
+            elif method is cls.ceil:
                 assert res >= ts
 
-        assert method(ts, "ns") == ts
+        nanos = 1
+        checker(ts, nanos, "ns")
 
-        res = method(ts, "us")
         nanos = 1000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "us")
 
-        res = method(ts, "ms")
         nanos = 1_000_000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "ms")
 
-        res = method(ts, "s")
         nanos = 1_000_000_000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "s")
 
-        res = method(ts, "min")
         nanos = 60 * 1_000_000_000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "min")
 
-        res = method(ts, "h")
         nanos = 60 * 60 * 1_000_000_000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "h")
 
-        res = method(ts, "D")
         nanos = 24 * 60 * 60 * 1_000_000_000
-        assert np.abs((res - ts)._value) < nanos
-        assert res._value % nanos == 0
-        checker(res, ts, nanos)
+        checker(ts, nanos, "D")
 
     # --------------------------------------------------------------
     # Timestamp.replace

Original file line number	Diff line number	Diff line change
`@@ -118,12 +118,12 @@ Categorical`
`118`	`118`
`119`	`119`	`Datetimelike`
`120`	`120`	`^^^^^^^^^^^^`
`121`		`--`
	`121`	+- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
`122`	`122`	`-`
`123`	`123`
`124`	`124`	`Timedelta`
`125`	`125`	`^^^^^^^^^`
`126`		`--`
	`126`	+- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`)
`127`	`127`	`-`
`128`	`128`
`129`	`129`	`Timezones`