Skip to content

ENH: all-NaT-> infer second resolution #56103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,16 @@ cpdef array_to_datetime(
utc=utc,
creso=state.creso,
)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = iresult.view("M8[s]").reshape(result.shape)
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
return result, tz_out


Expand Down Expand Up @@ -823,14 +828,16 @@ def array_to_datetime_with_tz(
# We encountered mismatched resolutions, need to re-parse with
# the correct one.
return array_to_datetime_with_tz(values, tz=tz, creso=creso)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# We didn't find any non-NaT to infer from, default to "ns"
result = result.view("M8[ns]")
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = result.view("M8[s]")
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
else:
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
Expand Down
14 changes: 10 additions & 4 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,16 @@ def array_strptime(
creso=state.creso,
)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.base.view(f"M8[{abbrev}]")
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = iresult.base.view("M8[s]")
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.base.view(f"M8[{abbrev}]")
return result, result_timezone.base


Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
class TestArrayToDatetimeResolutionInference:
# TODO: tests that include tzs, ints

def test_infer_all_nat(self):
arr = np.array([NaT, np.nan], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
assert result.dtype == "M8[s]"

def test_infer_homogeoneous_datetimes(self):
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
arr = np.array([dt, dt, dt], dtype=object)
Expand Down Expand Up @@ -120,11 +126,11 @@ def test_array_to_datetime_with_tz_resolution_all_nat(self):
tz = tzoffset("custom", 3600)
vals = np.array(["NaT"], dtype=object)
res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer)
assert res.dtype == "M8[ns]"
assert res.dtype == "M8[s]"

vals2 = np.array([NaT, NaT], dtype=object)
res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer)
assert res2.dtype == "M8[ns]"
assert res2.dtype == "M8[s]"


@pytest.mark.parametrize(
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/tslibs/test_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,26 @@
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas._libs.tslibs.strptime import array_strptime

from pandas import Timestamp
from pandas import (
NaT,
Timestamp,
)
import pandas._testing as tm

creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value


class TestArrayStrptimeResolutionInference:
def test_array_strptime_resolution_all_nat(self):
arr = np.array([NaT, np.nan], dtype=object)

fmt = "%Y-%m-%d %H:%M:%S"
res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer)
assert res.dtype == "M8[s]"

res, _ = array_strptime(arr, fmt=fmt, utc=True, creso=creso_infer)
assert res.dtype == "M8[s]"

@pytest.mark.parametrize("tz", [None, timezone.utc])
def test_array_strptime_resolution_inference_homogeneous_strings(self, tz):
dt = datetime(2016, 1, 2, 3, 4, 5, 678900, tzinfo=tz)
Expand Down