diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8736b9d5ec8d6..da4eeaeb3b692 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -642,11 +642,16 @@ cpdef array_to_datetime( utc=utc, creso=state.creso, ) - - # Otherwise we can use the single reso that we encountered and avoid - # a second pass. - abbrev = npy_unit_to_abbrev(state.creso) - result = iresult.view(f"M8[{abbrev}]").reshape(result.shape) + elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # i.e. we never encountered anything non-NaT, default to "s". This + # ensures that insert and concat-like operations with NaT + # do not upcast units + result = iresult.view("M8[s]").reshape(result.shape) + else: + # Otherwise we can use the single reso that we encountered and avoid + # a second pass. + abbrev = npy_unit_to_abbrev(state.creso) + result = iresult.view(f"M8[{abbrev}]").reshape(result.shape) return result, tz_out @@ -823,14 +828,16 @@ def array_to_datetime_with_tz( # We encountered mismatched resolutions, need to re-parse with # the correct one. return array_to_datetime_with_tz(values, tz=tz, creso=creso) - - # Otherwise we can use the single reso that we encountered and avoid - # a second pass. - abbrev = npy_unit_to_abbrev(creso) - result = result.view(f"M8[{abbrev}]") - elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - # We didn't find any non-NaT to infer from, default to "ns" - result = result.view("M8[ns]") + elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # i.e. we never encountered anything non-NaT, default to "s". This + # ensures that insert and concat-like operations with NaT + # do not upcast units + result = result.view("M8[s]") + else: + # Otherwise we can use the single reso that we encountered and avoid + # a second pass. + abbrev = npy_unit_to_abbrev(creso) + result = result.view(f"M8[{abbrev}]") else: abbrev = npy_unit_to_abbrev(creso) result = result.view(f"M8[{abbrev}]") diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d8926d14ae7e5..381575c439dcc 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -489,10 +489,16 @@ def array_strptime( creso=state.creso, ) - # Otherwise we can use the single reso that we encountered and avoid - # a second pass. - abbrev = npy_unit_to_abbrev(state.creso) - result = iresult.base.view(f"M8[{abbrev}]") + elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + # i.e. we never encountered anything non-NaT, default to "s". This + # ensures that insert and concat-like operations with NaT + # do not upcast units + result = iresult.base.view("M8[s]") + else: + # Otherwise we can use the single reso that we encountered and avoid + # a second pass. + abbrev = npy_unit_to_abbrev(state.creso) + result = iresult.base.view(f"M8[{abbrev}]") return result, result_timezone.base diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 15e34c68c4d2f..632d3b4cc3c84 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -26,6 +26,12 @@ class TestArrayToDatetimeResolutionInference: # TODO: tests that include tzs, ints + def test_infer_all_nat(self): + arr = np.array([NaT, np.nan], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + assert result.dtype == "M8[s]" + def test_infer_homogeoneous_datetimes(self): dt = datetime(2023, 10, 27, 18, 3, 5, 678000) arr = np.array([dt, dt, dt], dtype=object) @@ -120,11 +126,11 @@ def test_array_to_datetime_with_tz_resolution_all_nat(self): tz = tzoffset("custom", 3600) vals = np.array(["NaT"], dtype=object) res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer) - assert res.dtype == "M8[ns]" + assert res.dtype == "M8[s]" vals2 = np.array([NaT, NaT], dtype=object) res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer) - assert res2.dtype == "M8[ns]" + assert res2.dtype == "M8[s]" @pytest.mark.parametrize( diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index ce45bdd10b8e8..d726006b03f6d 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -9,13 +9,26 @@ from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas._libs.tslibs.strptime import array_strptime -from pandas import Timestamp +from pandas import ( + NaT, + Timestamp, +) import pandas._testing as tm creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value class TestArrayStrptimeResolutionInference: + def test_array_strptime_resolution_all_nat(self): + arr = np.array([NaT, np.nan], dtype=object) + + fmt = "%Y-%m-%d %H:%M:%S" + res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer) + assert res.dtype == "M8[s]" + + res, _ = array_strptime(arr, fmt=fmt, utc=True, creso=creso_infer) + assert res.dtype == "M8[s]" + @pytest.mark.parametrize("tz", [None, timezone.utc]) def test_array_strptime_resolution_inference_homogeneous_strings(self, tz): dt = datetime(2016, 1, 2, 3, 4, 5, 678900, tzinfo=tz)