Skip to content

Commit e37ff77

Browse files
authored
ENH: all-NaT-> infer second resolution (#56103)
1 parent 196e907 commit e37ff77

File tree

4 files changed

+52
-20
lines changed

4 files changed

+52
-20
lines changed

pandas/_libs/tslib.pyx

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -642,11 +642,16 @@ cpdef array_to_datetime(
642642
utc=utc,
643643
creso=state.creso,
644644
)
645-
646-
# Otherwise we can use the single reso that we encountered and avoid
647-
# a second pass.
648-
abbrev = npy_unit_to_abbrev(state.creso)
649-
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
645+
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
646+
# i.e. we never encountered anything non-NaT, default to "s". This
647+
# ensures that insert and concat-like operations with NaT
648+
# do not upcast units
649+
result = iresult.view("M8[s]").reshape(result.shape)
650+
else:
651+
# Otherwise we can use the single reso that we encountered and avoid
652+
# a second pass.
653+
abbrev = npy_unit_to_abbrev(state.creso)
654+
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
650655
return result, tz_out
651656

652657

@@ -823,14 +828,16 @@ def array_to_datetime_with_tz(
823828
# We encountered mismatched resolutions, need to re-parse with
824829
# the correct one.
825830
return array_to_datetime_with_tz(values, tz=tz, creso=creso)
826-
827-
# Otherwise we can use the single reso that we encountered and avoid
828-
# a second pass.
829-
abbrev = npy_unit_to_abbrev(creso)
830-
result = result.view(f"M8[{abbrev}]")
831-
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
832-
# We didn't find any non-NaT to infer from, default to "ns"
833-
result = result.view("M8[ns]")
831+
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
832+
# i.e. we never encountered anything non-NaT, default to "s". This
833+
# ensures that insert and concat-like operations with NaT
834+
# do not upcast units
835+
result = result.view("M8[s]")
836+
else:
837+
# Otherwise we can use the single reso that we encountered and avoid
838+
# a second pass.
839+
abbrev = npy_unit_to_abbrev(creso)
840+
result = result.view(f"M8[{abbrev}]")
834841
else:
835842
abbrev = npy_unit_to_abbrev(creso)
836843
result = result.view(f"M8[{abbrev}]")

pandas/_libs/tslibs/strptime.pyx

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,16 @@ def array_strptime(
489489
creso=state.creso,
490490
)
491491

492-
# Otherwise we can use the single reso that we encountered and avoid
493-
# a second pass.
494-
abbrev = npy_unit_to_abbrev(state.creso)
495-
result = iresult.base.view(f"M8[{abbrev}]")
492+
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
493+
# i.e. we never encountered anything non-NaT, default to "s". This
494+
# ensures that insert and concat-like operations with NaT
495+
# do not upcast units
496+
result = iresult.base.view("M8[s]")
497+
else:
498+
# Otherwise we can use the single reso that we encountered and avoid
499+
# a second pass.
500+
abbrev = npy_unit_to_abbrev(state.creso)
501+
result = iresult.base.view(f"M8[{abbrev}]")
496502
return result, result_timezone.base
497503

498504

pandas/tests/tslibs/test_array_to_datetime.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626
class TestArrayToDatetimeResolutionInference:
2727
# TODO: tests that include tzs, ints
2828

29+
def test_infer_all_nat(self):
30+
arr = np.array([NaT, np.nan], dtype=object)
31+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
32+
assert tz is None
33+
assert result.dtype == "M8[s]"
34+
2935
def test_infer_homogeoneous_datetimes(self):
3036
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
3137
arr = np.array([dt, dt, dt], dtype=object)
@@ -120,11 +126,11 @@ def test_array_to_datetime_with_tz_resolution_all_nat(self):
120126
tz = tzoffset("custom", 3600)
121127
vals = np.array(["NaT"], dtype=object)
122128
res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer)
123-
assert res.dtype == "M8[ns]"
129+
assert res.dtype == "M8[s]"
124130

125131
vals2 = np.array([NaT, NaT], dtype=object)
126132
res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer)
127-
assert res2.dtype == "M8[ns]"
133+
assert res2.dtype == "M8[s]"
128134

129135

130136
@pytest.mark.parametrize(

pandas/tests/tslibs/test_strptime.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,26 @@
99
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
1010
from pandas._libs.tslibs.strptime import array_strptime
1111

12-
from pandas import Timestamp
12+
from pandas import (
13+
NaT,
14+
Timestamp,
15+
)
1316
import pandas._testing as tm
1417

1518
creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value
1619

1720

1821
class TestArrayStrptimeResolutionInference:
22+
def test_array_strptime_resolution_all_nat(self):
23+
arr = np.array([NaT, np.nan], dtype=object)
24+
25+
fmt = "%Y-%m-%d %H:%M:%S"
26+
res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer)
27+
assert res.dtype == "M8[s]"
28+
29+
res, _ = array_strptime(arr, fmt=fmt, utc=True, creso=creso_infer)
30+
assert res.dtype == "M8[s]"
31+
1932
@pytest.mark.parametrize("tz", [None, timezone.utc])
2033
def test_array_strptime_resolution_inference_homogeneous_strings(self, tz):
2134
dt = datetime(2016, 1, 2, 3, 4, 5, 678900, tzinfo=tz)

0 commit comments

Comments
 (0)