Skip to content

BUG: Ensure to_datetime raises errors for out-of-bounds scalar inputs #60744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ Datetimelike
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
- BUG: Ensure to_datetime raises errors for out-of-bounds scalar inputs (:issue:`60744`)

Timedelta
^^^^^^^^^
Expand Down
36 changes: 35 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,9 +481,18 @@ def _array_strptime_with_fallback(

def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
"""
to_datetime specalized to the case where a 'unit' is passed.
to_datetime specialized to the case where a 'unit' is passed.

Note: This function currently treats values at the upper bound differently
from values at the lower bound.
For upper bound, it raises OutOfBoundsDatetime.
For lower bound, it returns NaT.
"""
arg = extract_array(arg, extract_numpy=True)
# Fix GH#60677
# Ensure scalar and array-like both become arrays
# (so both paths use the same code).
arg = np.atleast_1d(arg)

# GH#30050 pass an ndarray to tslib.array_to_datetime
# because it expects an ndarray argument
Expand All @@ -496,6 +505,31 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
if arg.dtype.kind in "iu":
# Note we can't do "f" here because that could induce unwanted
# rounding GH#14156, GH#20445
# Fix GH#60677
# ------------------------------------------------
# A) **Check for uint64 values above int64 max**
# so we don't accidentally wrap around to -1, etc.
# ------------------------------------------------
if arg.dtype.kind == "u": # unsigned
above_max = arg > np.iinfo(np.int64).max
if above_max.any():
if errors == "raise":
raise OutOfBoundsDatetime(
"Cannot convert uint64 values above"
f"{np.iinfo(np.int64).max}"
"to a 64-bit signed datetime64[ns]."
)
else:
# For errors != "raise" (e.g. "coerce" or "ignore"),
# we can replace out-of-range entries with NaN (-> NaT),
# then switch to the fallback object path:
arg = arg.astype(object)
arg[above_max] = np.nan
return _to_datetime_with_unit(arg, unit, name, utc, errors)

# ------------------------------------------------
# B) Proceed with normal numeric -> datetime logic
# ------------------------------------------------
arr = arg.astype(f"datetime64[{unit}]", copy=False)
try:
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3689,3 +3689,30 @@ def test_to_datetime_wrapped_datetime64_ps():
["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None
)
tm.assert_index_equal(result, expected)


def test_to_datetime_scalar_out_of_bounds():
"""Ensure pd.to_datetime raises an error for out-of-bounds scalar values."""
uint64_max = np.iinfo("uint64").max
int64_min = np.iinfo("int64").min

# Expect an OverflowError when passing uint64_max as a scalar
with pytest.raises(OutOfBoundsDatetime):
to_datetime(uint64_max, unit="ns")

# Expect the same behavior when passing it as a list
with pytest.raises(OutOfBoundsDatetime):
to_datetime([uint64_max], unit="ns")

# Expect NAT when passing int64_min as a scalar
value = to_datetime(int64_min, unit="ns")
assert value is NaT

# Expect the same behavior when passing it as a list
value = to_datetime([int64_min], unit="ns")
assert value[0] is NaT

# Test a valid value (should not raise an error)
valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp
result = to_datetime(valid_timestamp, unit="ns")
assert isinstance(result, Timestamp)
Loading