From 819f84f043441b93db260f9c6f948530f99de3c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 14:39:18 -0800 Subject: [PATCH 1/3] BUG: Timestamp.round floating point error --- pandas/_libs/tslibs/timestamps.pyx | 57 +++++++++++--- .../tests/scalar/timestamp/test_unary_ops.py | 76 ++++++++++++++++++- 2 files changed, 123 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index df4677a242758..81b63a7e3ff8a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -8,6 +8,8 @@ shadows the python class, where we do any heavy lifting. """ import warnings +cimport cython + import numpy as np cimport numpy as cnp @@ -153,32 +155,69 @@ class RoundTo: return 4 -cdef inline _floor_int64(values, unit): - return values - np.remainder(values, unit) +cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + if value == NPY_NAT: + res = NPY_NAT + else: + res = value - value % unit + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value -cdef inline _ceil_int64(values, unit): - return values + np.remainder(-values, unit) + with cython.overflowcheck(True): + for i in range(n): + value = values[i] -cdef inline _rounddown_int64(values, unit): + if value == NPY_NAT: + res = NPY_NAT + else: + remainder = value % unit + if remainder == 0: + res = value + else: + res = value + (unit - remainder) + + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): return _ceil_int64(values - unit//2, unit) -cdef inline _roundup_int64(values, unit): + +cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): return _floor_int64(values + unit//2, unit) -def round_nsint64(values, mode, freq): +def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray: """ Applies rounding mode at given frequency Parameters ---------- - values : :obj:`ndarray` + values : np.ndarray[int64_t]` mode : instance of `RoundTo` enumeration freq : str, obj Returns ------- - :obj:`ndarray` + np.ndarray[int64_t] """ unit = to_offset(freq).nanos diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 88f99a6784ba1..49d7fee0e4e7b 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -1,11 +1,12 @@ from datetime import datetime from dateutil.tz import gettz +import numpy as np import pytest import pytz from pytz import utc -from pandas._libs.tslibs import NaT, Timestamp, conversion, to_offset +from pandas._libs.tslibs import NaT, Timedelta, Timestamp, conversion, to_offset from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG import pandas.util._test_decorators as td @@ -247,6 +248,79 @@ def test_round_int64(self, timestamp, freq): # round half to even assert result.value // unit % 2 == 0, "round half to even error" + def test_round_implementation_bounds(self): + # See also: analogous test for Timedelta + result = Timestamp.min.ceil("s") + expected = Timestamp(1677, 9, 21, 0, 12, 44) + assert result == expected + + result = Timestamp.max.floor("s") + expected = Timestamp.max - Timedelta(854775807) + assert result == expected + + with pytest.raises(OverflowError, match="value too large"): + Timestamp.min.floor("s") + + msg = "Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): + Timestamp.max.ceil("s") + + @pytest.mark.parametrize("n", range(100)) + @pytest.mark.parametrize( + "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] + ) + def test_round_sanity(self, method, n): + iinfo = np.iinfo(np.int64) + val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + ts = Timestamp(val) + + def checker(res, ts, nanos): + if method is Timestamp.round: + diff = np.abs((res - ts).value) + assert diff <= nanos / 2 + elif method is Timestamp.floor: + assert res <= ts + elif method is Timestamp.ceil: + assert res >= ts + + assert method(ts, "ns") == ts + + res = method(ts, "us") + nanos = 1000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "ms") + nanos = 1_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "s") + nanos = 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "min") + nanos = 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "h") + nanos = 60 * 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + + res = method(ts, "D") + nanos = 24 * 60 * 60 * 1_000_000_000 + assert np.abs((res - ts).value) < nanos + assert res.value % nanos == 0 + checker(res, ts, nanos) + # -------------------------------------------------------------- # Timestamp.replace From 0f7b0d1121a7938312e665be112d6b2c15eb161c Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 17 Jan 2021 16:16:45 -0800 Subject: [PATCH 2/3] windows compat --- pandas/tests/scalar/timestamp/test_unary_ops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 49d7fee0e4e7b..4278d185ea7dd 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -261,7 +261,10 @@ def test_round_implementation_bounds(self): with pytest.raises(OverflowError, match="value too large"): Timestamp.min.floor("s") - msg = "Python int too large to convert to C long" + # the second message here shows up in windows builds + msg = "|".join( + ["Python int too large to convert to C long", "int too big to convert"] + ) with pytest.raises(OverflowError, match=msg): Timestamp.max.ceil("s") From 57549cd99aaebb0fddf7c38f1363f4e45db25548 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 20 Jan 2021 14:16:26 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b061c4040096e..c79343be409d8 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -233,6 +233,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) - Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`) +- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`) Timedelta ^^^^^^^^^