diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a7f63d75a047e..35594b23710e5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -640,6 +640,7 @@ Datetimelike - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) +- BUG: Ensure to_datetime raises errors for out-of-bounds scalar inputs (:issue:`60744`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 30487de7bafd5..e9d5baf0f3a2b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -481,9 +481,18 @@ def _array_strptime_with_fallback( def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: """ - to_datetime specalized to the case where a 'unit' is passed. + to_datetime specialized to the case where a 'unit' is passed. + + Note: This function currently treats values at the upper bound differently + from values at the lower bound. + For upper bound, it raises OutOfBoundsDatetime. + For lower bound, it returns NaT. """ arg = extract_array(arg, extract_numpy=True) + # Fix GH#60677 + # Ensure scalar and array-like both become arrays + # (so both paths use the same code). + arg = np.atleast_1d(arg) # GH#30050 pass an ndarray to tslib.array_to_datetime # because it expects an ndarray argument @@ -496,6 +505,31 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: if arg.dtype.kind in "iu": # Note we can't do "f" here because that could induce unwanted # rounding GH#14156, GH#20445 + # Fix GH#60677 + # ------------------------------------------------ + # A) **Check for uint64 values above int64 max** + # so we don't accidentally wrap around to -1, etc. + # ------------------------------------------------ + if arg.dtype.kind == "u": # unsigned + above_max = arg > np.iinfo(np.int64).max + if above_max.any(): + if errors == "raise": + raise OutOfBoundsDatetime( + "Cannot convert uint64 values above" + f"{np.iinfo(np.int64).max}" + "to a 64-bit signed datetime64[ns]." + ) + else: + # For errors != "raise" (e.g. "coerce" or "ignore"), + # we can replace out-of-range entries with NaN (-> NaT), + # then switch to the fallback object path: + arg = arg.astype(object) + arg[above_max] = np.nan + return _to_datetime_with_unit(arg, unit, name, utc, errors) + + # ------------------------------------------------ + # B) Proceed with normal numeric -> datetime logic + # ------------------------------------------------ arr = arg.astype(f"datetime64[{unit}]", copy=False) try: arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 74b051aec71a4..7901ac955b711 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3689,3 +3689,30 @@ def test_to_datetime_wrapped_datetime64_ps(): ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None ) tm.assert_index_equal(result, expected) + + +def test_to_datetime_scalar_out_of_bounds(): + """Ensure pd.to_datetime raises an error for out-of-bounds scalar values.""" + uint64_max = np.iinfo("uint64").max + int64_min = np.iinfo("int64").min + + # Expect an OverflowError when passing uint64_max as a scalar + with pytest.raises(OutOfBoundsDatetime): + to_datetime(uint64_max, unit="ns") + + # Expect the same behavior when passing it as a list + with pytest.raises(OutOfBoundsDatetime): + to_datetime([uint64_max], unit="ns") + + # Expect NAT when passing int64_min as a scalar + value = to_datetime(int64_min, unit="ns") + assert value is NaT + + # Expect the same behavior when passing it as a list + value = to_datetime([int64_min], unit="ns") + assert value[0] is NaT + + # Test a valid value (should not raise an error) + valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp + result = to_datetime(valid_timestamp, unit="ns") + assert isinstance(result, Timestamp)