BUG: Timestamp.round floating point error (#39244)

jbrockmendel · web-flow · commit fb6a334c22f1 · 2021-01-21T12:36:49.000-05:00
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -234,6 +234,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
 - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
 - Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
+- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
 - Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
 
 Timedelta
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -8,6 +8,8 @@ shadows the python class, where we do any heavy lifting.
 """
 import warnings
 
+cimport cython
+
 import numpy as np
 
 cimport numpy as cnp
@@ -153,32 +155,69 @@ class RoundTo:
         return 4
 
 
-cdef inline _floor_int64(values, unit):
-    return values - np.remainder(values, unit)
+cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
+    cdef:
+        Py_ssize_t i, n = len(values)
+        ndarray[int64_t] result = np.empty(n, dtype="i8")
+        int64_t res, value
+
+    with cython.overflowcheck(True):
+        for i in range(n):
+            value = values[i]
+            if value == NPY_NAT:
+                res = NPY_NAT
+            else:
+                res = value - value % unit
+            result[i] = res
+
+    return result
+
+
+cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
+    cdef:
+        Py_ssize_t i, n = len(values)
+        ndarray[int64_t] result = np.empty(n, dtype="i8")
+        int64_t res, value
 
-cdef inline _ceil_int64(values, unit):
-    return values + np.remainder(-values, unit)
+    with cython.overflowcheck(True):
+        for i in range(n):
+            value = values[i]
 
-cdef inline _rounddown_int64(values, unit):
+            if value == NPY_NAT:
+                res = NPY_NAT
+            else:
+                remainder = value % unit
+                if remainder == 0:
+                    res = value
+                else:
+                    res = value + (unit - remainder)
+
+            result[i] = res
+
+    return result
+
+
+cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
     return _ceil_int64(values - unit//2, unit)
 
-cdef inline _roundup_int64(values, unit):
+
+cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
     return _floor_int64(values + unit//2, unit)
 
 
-def round_nsint64(values, mode, freq):
+def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray:
     """
     Applies rounding mode at given frequency
 
     Parameters
     ----------
-    values : :obj:`ndarray`
+    values : np.ndarray[int64_t]`
     mode : instance of `RoundTo` enumeration
     freq : str, obj
 
     Returns
     -------
-    :obj:`ndarray`
+    np.ndarray[int64_t]
     """
 
     unit = to_offset(freq).nanos
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -1,11 +1,12 @@
 from datetime import datetime
 
 from dateutil.tz import gettz
+import numpy as np
 import pytest
 import pytz
 from pytz import utc
 
-from pandas._libs.tslibs import NaT, Timestamp, conversion, to_offset
+from pandas._libs.tslibs import NaT, Timedelta, Timestamp, conversion, to_offset
 from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
 import pandas.util._test_decorators as td
 
@@ -247,6 +248,82 @@ def test_round_int64(self, timestamp, freq):
             # round half to even
             assert result.value // unit % 2 == 0, "round half to even error"
 
+    def test_round_implementation_bounds(self):
+        # See also: analogous test for Timedelta
+        result = Timestamp.min.ceil("s")
+        expected = Timestamp(1677, 9, 21, 0, 12, 44)
+        assert result == expected
+
+        result = Timestamp.max.floor("s")
+        expected = Timestamp.max - Timedelta(854775807)
+        assert result == expected
+
+        with pytest.raises(OverflowError, match="value too large"):
+            Timestamp.min.floor("s")
+
+        # the second message here shows up in windows builds
+        msg = "|".join(
+            ["Python int too large to convert to C long", "int too big to convert"]
+        )
+        with pytest.raises(OverflowError, match=msg):
+            Timestamp.max.ceil("s")
+
+    @pytest.mark.parametrize("n", range(100))
+    @pytest.mark.parametrize(
+        "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
+    )
+    def test_round_sanity(self, method, n):
+        iinfo = np.iinfo(np.int64)
+        val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64)
+        ts = Timestamp(val)
+
+        def checker(res, ts, nanos):
+            if method is Timestamp.round:
+                diff = np.abs((res - ts).value)
+                assert diff <= nanos / 2
+            elif method is Timestamp.floor:
+                assert res <= ts
+            elif method is Timestamp.ceil:
+                assert res >= ts
+
+        assert method(ts, "ns") == ts
+
+        res = method(ts, "us")
+        nanos = 1000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
+        res = method(ts, "ms")
+        nanos = 1_000_000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
+        res = method(ts, "s")
+        nanos = 1_000_000_000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
+        res = method(ts, "min")
+        nanos = 60 * 1_000_000_000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
+        res = method(ts, "h")
+        nanos = 60 * 60 * 1_000_000_000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
+        res = method(ts, "D")
+        nanos = 24 * 60 * 60 * 1_000_000_000
+        assert np.abs((res - ts).value) < nanos
+        assert res.value % nanos == 0
+        checker(res, ts, nanos)
+
     # --------------------------------------------------------------
     # Timestamp.replace