Skip to content

BUG: Timestamp.round overflows #51494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ Categorical

Datetimelike
^^^^^^^^^^^^
-
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
-

Timedelta
^^^^^^^^^
-
- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`)
-

Timezones
Expand Down
38 changes: 30 additions & 8 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value
int64_t res, value, remainder

with cython.overflowcheck(True):
for i in range(n):
Expand Down Expand Up @@ -732,6 +732,34 @@ cdef ndarray[int64_t] _roundup_int64(values, int64_t unit):
return _floor_int64(values + unit // 2, unit)


cdef ndarray[int64_t] _round_nearest_int64(const int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value, half, remainder, quotient

half = unit // 2

with cython.overflowcheck(True):
for i in range(n):
value = values[i]

if value == NPY_NAT:
res = NPY_NAT
else:
quotient, remainder = divmod(value, unit)
if remainder > half:
res = value + (unit - remainder)
elif remainder == half and quotient % 2:
res = value + (unit - remainder)
else:
res = value - remainder

result[i] = res

return result


def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
"""
Applies rounding mode at given frequency
Expand Down Expand Up @@ -762,13 +790,7 @@ def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
# for odd unit there is no need of a tie break
if unit % 2:
return _rounddown_int64(values, unit)
quotient, remainder = np.divmod(values, unit)
mask = np.logical_or(
remainder > (unit // 2),
np.logical_and(remainder == (unit // 2), quotient % 2)
)
quotient[mask] += 1
return quotient * unit
return _round_nearest_int64(values, unit)

# if/elif above should catch all rounding modes defined in enum 'RoundTo':
# if flow of control arrives here, it is a bug
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1824,7 +1824,12 @@ class Timedelta(_Timedelta):
unit = delta_to_nanoseconds(to_offset(freq), self._creso)

arr = np.array([self._value], dtype="i8")
result = round_nsint64(arr, mode, unit)[0]
try:
result = round_nsint64(arr, mode, unit)[0]
except OverflowError as err:
raise OutOfBoundsTimedelta(
f"Cannot round {self} to freq={freq} without overflow"
) from err
return Timedelta._from_value_and_reso(result, self._creso)

def round(self, freq):
Expand Down
8 changes: 7 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1692,7 +1692,13 @@ class Timestamp(_Timestamp):
value = np.array([value], dtype=np.int64)

# Will only ever contain 1 element for timestamp
r = round_nsint64(value, mode, nanos)[0]
try:
r = round_nsint64(value, mode, nanos)[0]
except OverflowError as err:
raise OutOfBoundsDatetime(
f"Cannot round {self} to freq={freq} without overflow"
) from err

result = Timestamp._from_value_and_reso(r, self._creso, None)
if self.tz is not None:
result = result.tz_localize(
Expand Down
103 changes: 75 additions & 28 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,56 +698,103 @@ def test_round_implementation_bounds(self):
expected = Timedelta.max - Timedelta(854775807)
assert result == expected

with pytest.raises(OverflowError, match="value too large"):
msg = (
r"Cannot round -106752 days \+00:12:43.145224193 to freq=s without overflow"
)
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta.min.floor("s")
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta.min.round("s")

# the second message here shows up in windows builds
msg = "|".join(
["Python int too large to convert to C long", "int too big to convert"]
)
with pytest.raises(OverflowError, match=msg):
msg = "Cannot round 106751 days 23:47:16.854775807 to freq=s without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta.max.ceil("s")
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta.max.round("s")

@pytest.mark.xfail(reason="Failing on builds", strict=False)
@given(val=st.integers(min_value=iNaT + 1, max_value=lib.i8max))
@pytest.mark.parametrize(
"method", [Timedelta.round, Timedelta.floor, Timedelta.ceil]
)
def test_round_sanity(self, val, method):
val = np.int64(val)
td = Timedelta(val)
cls = Timedelta
err_cls = OutOfBoundsTimedelta

assert method(td, "ns") == td
val = np.int64(val)
td = cls(val)

def checker(ts, nanos, unit):
# First check that we do raise in cases where we should
if nanos == 1:
pass
else:
div, mod = divmod(ts._value, nanos)
diff = int(nanos - mod)
lb = ts._value - mod
assert lb <= ts._value # i.e. no overflows with python ints
ub = ts._value + diff
assert ub > ts._value # i.e. no overflows with python ints

msg = "without overflow"
if mod == 0:
# We should never be raising in this
pass
elif method is cls.ceil:
if ub > cls.max._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
elif method is cls.floor:
if lb < cls.min._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
else:
if mod >= diff:
if ub > cls.max._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
else:
if lb < cls.min._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return

res = method(ts, unit)

td = res - ts
diff = abs(td._value)
assert diff < nanos
assert res._value % nanos == 0

if method is cls.round:
assert diff <= nanos / 2
elif method is cls.floor:
assert res <= ts
elif method is cls.ceil:
assert res >= ts

nanos = 1
checker(td, nanos, "ns")

res = method(td, "us")
nanos = 1000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "us")

res = method(td, "ms")
nanos = 1_000_000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "ms")

res = method(td, "s")
nanos = 1_000_000_000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "s")

res = method(td, "min")
nanos = 60 * 1_000_000_000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "min")

res = method(td, "h")
nanos = 60 * 60 * 1_000_000_000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "h")

res = method(td, "D")
nanos = 24 * 60 * 60 * 1_000_000_000
assert np.abs((res - td).value) < nanos
assert res.value % nanos == 0
checker(td, nanos, "D")

@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
def test_round_non_nano(self, unit):
Expand Down
110 changes: 71 additions & 39 deletions pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,71 +294,103 @@ def test_round_implementation_bounds(self):
expected = Timestamp.max - Timedelta(854775807)
assert result == expected

with pytest.raises(OverflowError, match="value too large"):
msg = "Cannot round 1677-09-21 00:12:43.145224193 to freq=<Second>"
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp.min.floor("s")

# the second message here shows up in windows builds
msg = "|".join(
["Python int too large to convert to C long", "int too big to convert"]
)
with pytest.raises(OverflowError, match=msg):
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp.min.round("s")

msg = "Cannot round 2262-04-11 23:47:16.854775807 to freq=<Second>"
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp.max.ceil("s")

@pytest.mark.xfail(reason="Failing on builds", strict=False)
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp.max.round("s")

@given(val=st.integers(iNaT + 1, lib.i8max))
@pytest.mark.parametrize(
"method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
)
def test_round_sanity(self, val, method):
val = np.int64(val)
ts = Timestamp(val)
cls = Timestamp
err_cls = OutOfBoundsDatetime

def checker(res, ts, nanos):
if method is Timestamp.round:
diff = np.abs((res - ts)._value)
val = np.int64(val)
ts = cls(val)

def checker(ts, nanos, unit):
# First check that we do raise in cases where we should
if nanos == 1:
pass
else:
div, mod = divmod(ts._value, nanos)
diff = int(nanos - mod)
lb = ts._value - mod
assert lb <= ts._value # i.e. no overflows with python ints
ub = ts._value + diff
assert ub > ts._value # i.e. no overflows with python ints

msg = "without overflow"
if mod == 0:
# We should never be raising in this
pass
elif method is cls.ceil:
if ub > cls.max._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
elif method is cls.floor:
if lb < cls.min._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
else:
if mod >= diff:
if ub > cls.max._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return
else:
if lb < cls.min._value:
with pytest.raises(err_cls, match=msg):
method(ts, unit)
return

res = method(ts, unit)

td = res - ts
diff = abs(td._value)
assert diff < nanos
assert res._value % nanos == 0

if method is cls.round:
assert diff <= nanos / 2
elif method is Timestamp.floor:
elif method is cls.floor:
assert res <= ts
elif method is Timestamp.ceil:
elif method is cls.ceil:
assert res >= ts

assert method(ts, "ns") == ts
nanos = 1
checker(ts, nanos, "ns")

res = method(ts, "us")
nanos = 1000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "us")

res = method(ts, "ms")
nanos = 1_000_000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "ms")

res = method(ts, "s")
nanos = 1_000_000_000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "s")

res = method(ts, "min")
nanos = 60 * 1_000_000_000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "min")

res = method(ts, "h")
nanos = 60 * 60 * 1_000_000_000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "h")

res = method(ts, "D")
nanos = 24 * 60 * 60 * 1_000_000_000
assert np.abs((res - ts)._value) < nanos
assert res._value % nanos == 0
checker(res, ts, nanos)
checker(ts, nanos, "D")

# --------------------------------------------------------------
# Timestamp.replace
Expand Down