Skip to content

Commit cb710ee

Browse files
committed
ENH: Improve error handling for out-of-bounds uint64 values in _to_datetime_with_unit
1 parent 4a2ad59 commit cb710ee

File tree

2 files changed

+43
-37
lines changed

2 files changed

+43
-37
lines changed

pandas/core/tools/datetimes.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
from pandas.core.construction import extract_array
7676
from pandas.core.indexes.base import Index
7777
from pandas.core.indexes.datetimes import DatetimeIndex
78+
7879
if TYPE_CHECKING:
7980
from collections.abc import (
8081
Callable,
@@ -478,72 +479,78 @@ def _array_strptime_with_fallback(
478479
return Index(result, dtype=result.dtype, name=name)
479480

480481

481-
482-
483-
484482
def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeIndex:
485483
"""
486484
to_datetime specialized to the case where a 'unit' is passed.
487-
Fixes a bug where scalar out-of-bounds values were not raising
488-
an error consistently.
489485
"""
490-
import pdb; pdb.set_trace()
491-
492-
# Ensure we handle both array-likes and scalars the same way.
493-
# extract_array can return a scalar if 'arg' is scalar-like;
494-
# so we force everything into at least 1D shape.
495486
arg = extract_array(arg, extract_numpy=True)
487+
# Fix GH#60677
488+
# Ensure scalar and array-like both become arrays
489+
# (so both paths use the same code).
496490
arg = np.atleast_1d(arg)
497491

498492
# GH#30050 pass an ndarray to tslib.array_to_datetime
499493
# because it expects an ndarray argument
500494
if isinstance(arg, IntegerArray):
501-
# For IntegerArray, we can directly convert
502495
arr = arg.astype(f"datetime64[{unit}]")
503496
tz_parsed = None
504-
505497
else:
506-
# Now we have a guaranteed ndarray
507498
arg = np.asarray(arg)
508499

509500
if arg.dtype.kind in "iu":
510501
# Note we can't do "f" here because that could induce unwanted
511-
# rounding GH#14156, GH#20445
502+
# rounding GH#14156, GH#20445
503+
# Fix GH#60677
504+
# ------------------------------------------------
505+
# A) **Check for uint64 values above int64 max**
506+
# so we don't accidentally wrap around to -1, etc.
507+
# ------------------------------------------------
508+
if arg.dtype.kind == "u": # unsigned
509+
above_max = arg > np.iinfo(np.int64).max
510+
if above_max.any():
511+
if errors == "raise":
512+
raise OutOfBoundsDatetime(
513+
"Cannot convert uint64 values above"
514+
f"{np.iinfo(np.int64).max}"
515+
"to a 64-bit signed datetime64[ns]."
516+
)
517+
else:
518+
# For errors != "raise" (e.g. "coerce" or "ignore"),
519+
# we can replace out-of-range entries with NaN (-> NaT),
520+
# then switch to the fallback object path:
521+
arg = arg.astype(object)
522+
arg[above_max] = np.nan
523+
return _to_datetime_with_unit(arg, unit, name, utc, errors)
524+
525+
# ------------------------------------------------
526+
# B) Proceed with normal numeric -> datetime logic
527+
# ------------------------------------------------
512528
arr = arg.astype(f"datetime64[{unit}]", copy=False)
513529
try:
514530
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
515531
except OutOfBoundsDatetime:
516532
if errors == "raise":
517533
raise
518-
# errors != "raise" => coerce to object and retry
519534
arg = arg.astype(object)
520535
return _to_datetime_with_unit(arg, unit, name, utc, errors)
521536
tz_parsed = None
522537

523538
elif arg.dtype.kind == "f":
524-
# Floating dtypes
525539
with np.errstate(over="raise"):
526540
try:
527541
arr = cast_from_unit_vectorized(arg, unit=unit)
528542
except OutOfBoundsDatetime as err:
529543
if errors != "raise":
530-
# coerce to object and retry
531544
return _to_datetime_with_unit(
532-
arg.astype(object),
533-
unit,
534-
name,
535-
utc,
536-
errors,
545+
arg.astype(object), unit, name, utc, errors
537546
)
538547
raise OutOfBoundsDatetime(
539548
f"cannot convert input with unit '{unit}'"
540549
) from err
541550

542551
arr = arr.view("M8[ns]")
543552
tz_parsed = None
544-
545553
else:
546-
# Fallback: treat as object dtype
547554
arg = arg.astype(object, copy=False)
548555
arr, tz_parsed = tslib.array_to_datetime(
549556
arg,
@@ -553,22 +560,21 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeI
553560
creso=NpyDatetimeUnit.NPY_FR_ns.value,
554561
)
555562

556-
# Construct a DatetimeIndex from the array
557563
result = DatetimeIndex(arr, name=name)
558564

559-
# May need to localize result to parsed tz or convert to UTC if requested
565+
# GH#23758: We may still need to localize the result with tz
566+
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
567+
# result will be naive but in UTC
560568
result = result.tz_localize("UTC").tz_convert(tz_parsed)
561569

562570
if utc:
563571
if result.tz is None:
564572
result = result.tz_localize("utc")
565573
else:
566574
result = result.tz_convert("utc")
567-
568575
return result
569576

570577

571-
572578
def _adjust_to_origin(arg, origin, unit):
573579
"""
574580
Helper function for to_datetime.

pandas/tests/tools/test_to_datetime.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""test to_datetime"""
22

33
import calendar
4-
import locale
54
from collections import deque
65
from datetime import (
76
date,
@@ -10,12 +9,12 @@
109
timezone,
1110
)
1211
from decimal import Decimal
12+
import locale
1313
import zoneinfo
1414

15+
from dateutil.parser import parse
1516
import numpy as np
16-
import pandas as pd
1717
import pytest
18-
from dateutil.parser import parse
1918

2019
from pandas._libs import tslib
2120
from pandas._libs.tslibs import (
@@ -30,6 +29,8 @@
3029
import pandas.util._test_decorators as td
3130

3231
from pandas.core.dtypes.common import is_datetime64_ns_dtype
32+
33+
import pandas as pd
3334
from pandas import (
3435
DataFrame,
3536
DatetimeIndex,
@@ -3688,22 +3689,21 @@ def test_to_datetime_wrapped_datetime64_ps():
36883689
["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None
36893690
)
36903691
tm.assert_index_equal(result, expected)
3691-
3692+
36923693

36933694
def test_to_datetime_scalar_out_of_bounds():
36943695
"""Ensure pd.to_datetime raises an error for out-of-bounds scalar values."""
36953696
uint64_max = np.iinfo("uint64").max
36963697

36973698
# Expect an OverflowError when passing uint64_max as a scalar
36983699
with pytest.raises(OutOfBoundsDatetime):
3699-
pd.to_datetime(uint64_max, unit="ns")
3700+
to_datetime(uint64_max, unit="ns")
37003701

37013702
# Expect the same behavior when passing it as a list
37023703
with pytest.raises(OutOfBoundsDatetime):
3703-
pd.to_datetime([uint64_max], unit="ns")
3704+
to_datetime([uint64_max], unit="ns")
37043705

37053706
# Test a valid value (should not raise an error)
37063707
valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp
3707-
result = pd.to_datetime(valid_timestamp, unit="ns")
3708-
assert isinstance(result, pd.Timestamp)
3709-
3708+
result = to_datetime(valid_timestamp, unit="ns")
3709+
assert isinstance(result, Timestamp)

0 commit comments

Comments
 (0)