Skip to content

Commit fb6a334

Browse files
authored
BUG: Timestamp.round floating point error (#39244)
1 parent b7184b9 commit fb6a334

File tree

3 files changed

+127
-10
lines changed

3 files changed

+127
-10
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ Datetimelike
234234
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
235235
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
236236
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
237+
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
237238
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
238239

239240
Timedelta

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ shadows the python class, where we do any heavy lifting.
88
"""
99
import warnings
1010

11+
cimport cython
12+
1113
import numpy as np
1214

1315
cimport numpy as cnp
@@ -153,32 +155,69 @@ class RoundTo:
153155
return 4
154156

155157

156-
cdef inline _floor_int64(values, unit):
157-
return values - np.remainder(values, unit)
158+
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
159+
cdef:
160+
Py_ssize_t i, n = len(values)
161+
ndarray[int64_t] result = np.empty(n, dtype="i8")
162+
int64_t res, value
163+
164+
with cython.overflowcheck(True):
165+
for i in range(n):
166+
value = values[i]
167+
if value == NPY_NAT:
168+
res = NPY_NAT
169+
else:
170+
res = value - value % unit
171+
result[i] = res
172+
173+
return result
174+
175+
176+
cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
177+
cdef:
178+
Py_ssize_t i, n = len(values)
179+
ndarray[int64_t] result = np.empty(n, dtype="i8")
180+
int64_t res, value
158181

159-
cdef inline _ceil_int64(values, unit):
160-
return values + np.remainder(-values, unit)
182+
with cython.overflowcheck(True):
183+
for i in range(n):
184+
value = values[i]
161185

162-
cdef inline _rounddown_int64(values, unit):
186+
if value == NPY_NAT:
187+
res = NPY_NAT
188+
else:
189+
remainder = value % unit
190+
if remainder == 0:
191+
res = value
192+
else:
193+
res = value + (unit - remainder)
194+
195+
result[i] = res
196+
197+
return result
198+
199+
200+
cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
163201
return _ceil_int64(values - unit//2, unit)
164202

165-
cdef inline _roundup_int64(values, unit):
203+
204+
cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
166205
return _floor_int64(values + unit//2, unit)
167206

168207

169-
def round_nsint64(values, mode, freq):
208+
def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray:
170209
"""
171210
Applies rounding mode at given frequency
172211

173212
Parameters
174213
----------
175-
values : :obj:`ndarray`
214+
values : np.ndarray[int64_t]`
176215
mode : instance of `RoundTo` enumeration
177216
freq : str, obj
178217

179218
Returns
180219
-------
181-
:obj:`ndarray`
220+
np.ndarray[int64_t]
182221
"""
183222

184223
unit = to_offset(freq).nanos

pandas/tests/scalar/timestamp/test_unary_ops.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from datetime import datetime
22

33
from dateutil.tz import gettz
4+
import numpy as np
45
import pytest
56
import pytz
67
from pytz import utc
78

8-
from pandas._libs.tslibs import NaT, Timestamp, conversion, to_offset
9+
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, conversion, to_offset
910
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
1011
import pandas.util._test_decorators as td
1112

@@ -247,6 +248,82 @@ def test_round_int64(self, timestamp, freq):
247248
# round half to even
248249
assert result.value // unit % 2 == 0, "round half to even error"
249250

251+
def test_round_implementation_bounds(self):
252+
# See also: analogous test for Timedelta
253+
result = Timestamp.min.ceil("s")
254+
expected = Timestamp(1677, 9, 21, 0, 12, 44)
255+
assert result == expected
256+
257+
result = Timestamp.max.floor("s")
258+
expected = Timestamp.max - Timedelta(854775807)
259+
assert result == expected
260+
261+
with pytest.raises(OverflowError, match="value too large"):
262+
Timestamp.min.floor("s")
263+
264+
# the second message here shows up in windows builds
265+
msg = "|".join(
266+
["Python int too large to convert to C long", "int too big to convert"]
267+
)
268+
with pytest.raises(OverflowError, match=msg):
269+
Timestamp.max.ceil("s")
270+
271+
@pytest.mark.parametrize("n", range(100))
272+
@pytest.mark.parametrize(
273+
"method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
274+
)
275+
def test_round_sanity(self, method, n):
276+
iinfo = np.iinfo(np.int64)
277+
val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64)
278+
ts = Timestamp(val)
279+
280+
def checker(res, ts, nanos):
281+
if method is Timestamp.round:
282+
diff = np.abs((res - ts).value)
283+
assert diff <= nanos / 2
284+
elif method is Timestamp.floor:
285+
assert res <= ts
286+
elif method is Timestamp.ceil:
287+
assert res >= ts
288+
289+
assert method(ts, "ns") == ts
290+
291+
res = method(ts, "us")
292+
nanos = 1000
293+
assert np.abs((res - ts).value) < nanos
294+
assert res.value % nanos == 0
295+
checker(res, ts, nanos)
296+
297+
res = method(ts, "ms")
298+
nanos = 1_000_000
299+
assert np.abs((res - ts).value) < nanos
300+
assert res.value % nanos == 0
301+
checker(res, ts, nanos)
302+
303+
res = method(ts, "s")
304+
nanos = 1_000_000_000
305+
assert np.abs((res - ts).value) < nanos
306+
assert res.value % nanos == 0
307+
checker(res, ts, nanos)
308+
309+
res = method(ts, "min")
310+
nanos = 60 * 1_000_000_000
311+
assert np.abs((res - ts).value) < nanos
312+
assert res.value % nanos == 0
313+
checker(res, ts, nanos)
314+
315+
res = method(ts, "h")
316+
nanos = 60 * 60 * 1_000_000_000
317+
assert np.abs((res - ts).value) < nanos
318+
assert res.value % nanos == 0
319+
checker(res, ts, nanos)
320+
321+
res = method(ts, "D")
322+
nanos = 24 * 60 * 60 * 1_000_000_000
323+
assert np.abs((res - ts).value) < nanos
324+
assert res.value % nanos == 0
325+
checker(res, ts, nanos)
326+
250327
# --------------------------------------------------------------
251328
# Timestamp.replace
252329

0 commit comments

Comments
 (0)