Skip to content

Commit efe896c

Browse files
authored
BUG: Timestamp.round overflows (#51494)
* BUG: Timestamp.round overflows * GH ref * move whatsnew to 2.1.0
1 parent 21b019f commit efe896c

File tree

6 files changed

+191
-79
lines changed

6 files changed

+191
-79
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ Categorical
118118

119119
Datetimelike
120120
^^^^^^^^^^^^
121-
-
121+
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
122122
-
123123

124124
Timedelta
125125
^^^^^^^^^
126-
-
126+
- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`)
127127
-
128128

129129
Timezones

pandas/_libs/tslibs/fields.pyx

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit):
704704
cdef:
705705
Py_ssize_t i, n = len(values)
706706
ndarray[int64_t] result = np.empty(n, dtype="i8")
707-
int64_t res, value
707+
int64_t res, value, remainder
708708

709709
with cython.overflowcheck(True):
710710
for i in range(n):
@@ -732,6 +732,34 @@ cdef ndarray[int64_t] _roundup_int64(values, int64_t unit):
732732
return _floor_int64(values + unit // 2, unit)
733733

734734

735+
cdef ndarray[int64_t] _round_nearest_int64(const int64_t[:] values, int64_t unit):
736+
cdef:
737+
Py_ssize_t i, n = len(values)
738+
ndarray[int64_t] result = np.empty(n, dtype="i8")
739+
int64_t res, value, half, remainder, quotient
740+
741+
half = unit // 2
742+
743+
with cython.overflowcheck(True):
744+
for i in range(n):
745+
value = values[i]
746+
747+
if value == NPY_NAT:
748+
res = NPY_NAT
749+
else:
750+
quotient, remainder = divmod(value, unit)
751+
if remainder > half:
752+
res = value + (unit - remainder)
753+
elif remainder == half and quotient % 2:
754+
res = value + (unit - remainder)
755+
else:
756+
res = value - remainder
757+
758+
result[i] = res
759+
760+
return result
761+
762+
735763
def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
736764
"""
737765
Applies rounding mode at given frequency
@@ -762,13 +790,7 @@ def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
762790
# for odd unit there is no need of a tie break
763791
if unit % 2:
764792
return _rounddown_int64(values, unit)
765-
quotient, remainder = np.divmod(values, unit)
766-
mask = np.logical_or(
767-
remainder > (unit // 2),
768-
np.logical_and(remainder == (unit // 2), quotient % 2)
769-
)
770-
quotient[mask] += 1
771-
return quotient * unit
793+
return _round_nearest_int64(values, unit)
772794

773795
# if/elif above should catch all rounding modes defined in enum 'RoundTo':
774796
# if flow of control arrives here, it is a bug

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,12 @@ class Timedelta(_Timedelta):
18241824
unit = delta_to_nanoseconds(to_offset(freq), self._creso)
18251825

18261826
arr = np.array([self._value], dtype="i8")
1827-
result = round_nsint64(arr, mode, unit)[0]
1827+
try:
1828+
result = round_nsint64(arr, mode, unit)[0]
1829+
except OverflowError as err:
1830+
raise OutOfBoundsTimedelta(
1831+
f"Cannot round {self} to freq={freq} without overflow"
1832+
) from err
18281833
return Timedelta._from_value_and_reso(result, self._creso)
18291834

18301835
def round(self, freq):

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,13 @@ class Timestamp(_Timestamp):
16921692
value = np.array([value], dtype=np.int64)
16931693

16941694
# Will only ever contain 1 element for timestamp
1695-
r = round_nsint64(value, mode, nanos)[0]
1695+
try:
1696+
r = round_nsint64(value, mode, nanos)[0]
1697+
except OverflowError as err:
1698+
raise OutOfBoundsDatetime(
1699+
f"Cannot round {self} to freq={freq} without overflow"
1700+
) from err
1701+
16961702
result = Timestamp._from_value_and_reso(r, self._creso, None)
16971703
if self.tz is not None:
16981704
result = result.tz_localize(

pandas/tests/scalar/timedelta/test_timedelta.py

Lines changed: 75 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -698,56 +698,103 @@ def test_round_implementation_bounds(self):
698698
expected = Timedelta.max - Timedelta(854775807)
699699
assert result == expected
700700

701-
with pytest.raises(OverflowError, match="value too large"):
701+
msg = (
702+
r"Cannot round -106752 days \+00:12:43.145224193 to freq=s without overflow"
703+
)
704+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
702705
Timedelta.min.floor("s")
706+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
707+
Timedelta.min.round("s")
703708

704-
# the second message here shows up in windows builds
705-
msg = "|".join(
706-
["Python int too large to convert to C long", "int too big to convert"]
707-
)
708-
with pytest.raises(OverflowError, match=msg):
709+
msg = "Cannot round 106751 days 23:47:16.854775807 to freq=s without overflow"
710+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
709711
Timedelta.max.ceil("s")
712+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
713+
Timedelta.max.round("s")
710714

711-
@pytest.mark.xfail(reason="Failing on builds", strict=False)
712715
@given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max))
713716
@pytest.mark.parametrize(
714717
"method", [Timedelta.round, Timedelta.floor, Timedelta.ceil]
715718
)
716719
def test_round_sanity(self, val, method):
717-
val = np.int64(val)
718-
td = Timedelta(val)
720+
cls = Timedelta
721+
err_cls = OutOfBoundsTimedelta
719722

720-
assert method(td, "ns") == td
723+
val = np.int64(val)
724+
td = cls(val)
725+
726+
def checker(ts, nanos, unit):
727+
# First check that we do raise in cases where we should
728+
if nanos == 1:
729+
pass
730+
else:
731+
div, mod = divmod(ts._value, nanos)
732+
diff = int(nanos - mod)
733+
lb = ts._value - mod
734+
assert lb <= ts._value # i.e. no overflows with python ints
735+
ub = ts._value + diff
736+
assert ub > ts._value # i.e. no overflows with python ints
737+
738+
msg = "without overflow"
739+
if mod == 0:
740+
# We should never be raising in this
741+
pass
742+
elif method is cls.ceil:
743+
if ub > cls.max._value:
744+
with pytest.raises(err_cls, match=msg):
745+
method(ts, unit)
746+
return
747+
elif method is cls.floor:
748+
if lb < cls.min._value:
749+
with pytest.raises(err_cls, match=msg):
750+
method(ts, unit)
751+
return
752+
else:
753+
if mod >= diff:
754+
if ub > cls.max._value:
755+
with pytest.raises(err_cls, match=msg):
756+
method(ts, unit)
757+
return
758+
else:
759+
if lb < cls.min._value:
760+
with pytest.raises(err_cls, match=msg):
761+
method(ts, unit)
762+
return
763+
764+
res = method(ts, unit)
765+
766+
td = res - ts
767+
diff = abs(td._value)
768+
assert diff < nanos
769+
assert res._value % nanos == 0
770+
771+
if method is cls.round:
772+
assert diff <= nanos / 2
773+
elif method is cls.floor:
774+
assert res <= ts
775+
elif method is cls.ceil:
776+
assert res >= ts
777+
778+
nanos = 1
779+
checker(td, nanos, "ns")
721780

722-
res = method(td, "us")
723781
nanos = 1000
724-
assert np.abs((res - td).value) < nanos
725-
assert res.value % nanos == 0
782+
checker(td, nanos, "us")
726783

727-
res = method(td, "ms")
728784
nanos = 1_000_000
729-
assert np.abs((res - td).value) < nanos
730-
assert res.value % nanos == 0
785+
checker(td, nanos, "ms")
731786

732-
res = method(td, "s")
733787
nanos = 1_000_000_000
734-
assert np.abs((res - td).value) < nanos
735-
assert res.value % nanos == 0
788+
checker(td, nanos, "s")
736789

737-
res = method(td, "min")
738790
nanos = 60 * 1_000_000_000
739-
assert np.abs((res - td).value) < nanos
740-
assert res.value % nanos == 0
791+
checker(td, nanos, "min")
741792

742-
res = method(td, "h")
743793
nanos = 60 * 60 * 1_000_000_000
744-
assert np.abs((res - td).value) < nanos
745-
assert res.value % nanos == 0
794+
checker(td, nanos, "h")
746795

747-
res = method(td, "D")
748796
nanos = 24 * 60 * 60 * 1_000_000_000
749-
assert np.abs((res - td).value) < nanos
750-
assert res.value % nanos == 0
797+
checker(td, nanos, "D")
751798

752799
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
753800
def test_round_non_nano(self, unit):

pandas/tests/scalar/timestamp/test_unary_ops.py

Lines changed: 71 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -294,71 +294,103 @@ def test_round_implementation_bounds(self):
294294
expected = Timestamp.max - Timedelta(854775807)
295295
assert result == expected
296296

297-
with pytest.raises(OverflowError, match="value too large"):
297+
msg = "Cannot round 1677-09-21 00:12:43.145224193 to freq=<Second>"
298+
with pytest.raises(OutOfBoundsDatetime, match=msg):
298299
Timestamp.min.floor("s")
299300

300-
# the second message here shows up in windows builds
301-
msg = "|".join(
302-
["Python int too large to convert to C long", "int too big to convert"]
303-
)
304-
with pytest.raises(OverflowError, match=msg):
301+
with pytest.raises(OutOfBoundsDatetime, match=msg):
302+
Timestamp.min.round("s")
303+
304+
msg = "Cannot round 2262-04-11 23:47:16.854775807 to freq=<Second>"
305+
with pytest.raises(OutOfBoundsDatetime, match=msg):
305306
Timestamp.max.ceil("s")
306307

307-
@pytest.mark.xfail(reason="Failing on builds", strict=False)
308+
with pytest.raises(OutOfBoundsDatetime, match=msg):
309+
Timestamp.max.round("s")
310+
308311
@given(val=st.integers(iNaT + 1, lib.i8max))
309312
@pytest.mark.parametrize(
310313
"method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
311314
)
312315
def test_round_sanity(self, val, method):
313-
val = np.int64(val)
314-
ts = Timestamp(val)
316+
cls = Timestamp
317+
err_cls = OutOfBoundsDatetime
315318

316-
def checker(res, ts, nanos):
317-
if method is Timestamp.round:
318-
diff = np.abs((res - ts)._value)
319+
val = np.int64(val)
320+
ts = cls(val)
321+
322+
def checker(ts, nanos, unit):
323+
# First check that we do raise in cases where we should
324+
if nanos == 1:
325+
pass
326+
else:
327+
div, mod = divmod(ts._value, nanos)
328+
diff = int(nanos - mod)
329+
lb = ts._value - mod
330+
assert lb <= ts._value # i.e. no overflows with python ints
331+
ub = ts._value + diff
332+
assert ub > ts._value # i.e. no overflows with python ints
333+
334+
msg = "without overflow"
335+
if mod == 0:
336+
# We should never be raising in this
337+
pass
338+
elif method is cls.ceil:
339+
if ub > cls.max._value:
340+
with pytest.raises(err_cls, match=msg):
341+
method(ts, unit)
342+
return
343+
elif method is cls.floor:
344+
if lb < cls.min._value:
345+
with pytest.raises(err_cls, match=msg):
346+
method(ts, unit)
347+
return
348+
else:
349+
if mod >= diff:
350+
if ub > cls.max._value:
351+
with pytest.raises(err_cls, match=msg):
352+
method(ts, unit)
353+
return
354+
else:
355+
if lb < cls.min._value:
356+
with pytest.raises(err_cls, match=msg):
357+
method(ts, unit)
358+
return
359+
360+
res = method(ts, unit)
361+
362+
td = res - ts
363+
diff = abs(td._value)
364+
assert diff < nanos
365+
assert res._value % nanos == 0
366+
367+
if method is cls.round:
319368
assert diff <= nanos / 2
320-
elif method is Timestamp.floor:
369+
elif method is cls.floor:
321370
assert res <= ts
322-
elif method is Timestamp.ceil:
371+
elif method is cls.ceil:
323372
assert res >= ts
324373

325-
assert method(ts, "ns") == ts
374+
nanos = 1
375+
checker(ts, nanos, "ns")
326376

327-
res = method(ts, "us")
328377
nanos = 1000
329-
assert np.abs((res - ts)._value) < nanos
330-
assert res._value % nanos == 0
331-
checker(res, ts, nanos)
378+
checker(ts, nanos, "us")
332379

333-
res = method(ts, "ms")
334380
nanos = 1_000_000
335-
assert np.abs((res - ts)._value) < nanos
336-
assert res._value % nanos == 0
337-
checker(res, ts, nanos)
381+
checker(ts, nanos, "ms")
338382

339-
res = method(ts, "s")
340383
nanos = 1_000_000_000
341-
assert np.abs((res - ts)._value) < nanos
342-
assert res._value % nanos == 0
343-
checker(res, ts, nanos)
384+
checker(ts, nanos, "s")
344385

345-
res = method(ts, "min")
346386
nanos = 60 * 1_000_000_000
347-
assert np.abs((res - ts)._value) < nanos
348-
assert res._value % nanos == 0
349-
checker(res, ts, nanos)
387+
checker(ts, nanos, "min")
350388

351-
res = method(ts, "h")
352389
nanos = 60 * 60 * 1_000_000_000
353-
assert np.abs((res - ts)._value) < nanos
354-
assert res._value % nanos == 0
355-
checker(res, ts, nanos)
390+
checker(ts, nanos, "h")
356391

357-
res = method(ts, "D")
358392
nanos = 24 * 60 * 60 * 1_000_000_000
359-
assert np.abs((res - ts)._value) < nanos
360-
assert res._value % nanos == 0
361-
checker(res, ts, nanos)
393+
checker(ts, nanos, "D")
362394

363395
# --------------------------------------------------------------
364396
# Timestamp.replace

0 commit comments

Comments
 (0)