Skip to content

Commit 5a599c1

Browse files
authored
ENH: allow non-nano in DatetimeArray, TimedeltaArray._simple_new (#46901)
1 parent 44dcef4 commit 5a599c1

File tree

10 files changed

+90
-10
lines changed

10 files changed

+90
-10
lines changed

pandas/_libs/tslibs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"Tick",
2626
"BaseOffset",
2727
"tz_compare",
28+
"is_unitless",
2829
]
2930

3031
from pandas._libs.tslibs import dtypes
@@ -39,6 +40,7 @@
3940
from pandas._libs.tslibs.np_datetime import (
4041
OutOfBoundsDatetime,
4142
OutOfBoundsTimedelta,
43+
is_unitless,
4244
)
4345
from pandas._libs.tslibs.offsets import (
4446
BaseOffset,

pandas/_libs/tslibs/np_datetime.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
99
def astype_overflowsafe(
1010
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
1111
) -> np.ndarray: ...
12+
def is_unitless(dtype: np.dtype) -> bool: ...

pandas/_libs/tslibs/np_datetime.pyx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,18 @@ def py_get_unit_from_dtype(dtype):
9292
return get_unit_from_dtype(dtype)
9393

9494

95+
def is_unitless(dtype: cnp.dtype) -> bool:
96+
"""
97+
Check if a datetime64 or timedelta64 dtype has no attached unit.
98+
"""
99+
if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]:
100+
raise ValueError("is_unitless dtype must be datetime64 or timedelta64")
101+
cdef:
102+
NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype)
103+
104+
return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
105+
106+
95107
# ----------------------------------------------------------------------
96108
# Comparison
97109

pandas/_libs/tslibs/timedeltas.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ class Timedelta(timedelta):
8888
# GH 46171
8989
# While Timedelta can return pd.NaT, having the constructor return
9090
# a Union with NaTType makes things awkward for users of pandas
91+
@classmethod
92+
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
9193
@property
9294
def days(self) -> int: ...
9395
@property

pandas/core/arrays/datetimes.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
iNaT,
3232
ints_to_pydatetime,
3333
is_date_array_normalized,
34+
is_unitless,
3435
normalize_i8_timestamps,
3536
timezones,
3637
to_offset,
@@ -335,7 +336,12 @@ def _simple_new( # type: ignore[override]
335336
cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=DT64NS_DTYPE
336337
) -> DatetimeArray:
337338
assert isinstance(values, np.ndarray)
338-
assert values.dtype == DT64NS_DTYPE
339+
assert dtype.kind == "M"
340+
if isinstance(dtype, np.dtype):
341+
# TODO: once non-nano DatetimeTZDtype is implemented, require that
342+
# dtype's reso match values's reso
343+
assert dtype == values.dtype
344+
assert not is_unitless(dtype)
339345

340346
result = super()._simple_new(values, dtype)
341347
result._freq = freq
@@ -761,7 +767,7 @@ def _add_offset(self, offset) -> DatetimeArray:
761767
else:
762768
values = self
763769
result = offset._apply_array(values).view("M8[ns]")
764-
result = DatetimeArray._simple_new(result)
770+
result = DatetimeArray._simple_new(result, dtype=result.dtype)
765771
result = result.tz_localize(self.tz)
766772

767773
except NotImplementedError:

pandas/core/arrays/timedeltas.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
precision_from_unit,
3030
)
3131
from pandas._libs.tslibs.fields import get_timedelta_field
32+
from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype
3233
from pandas._libs.tslibs.timedeltas import (
3334
array_to_timedelta64,
3435
ints_to_pytimedelta,
@@ -40,6 +41,7 @@
4041
npt,
4142
)
4243
from pandas.compat.numpy import function as nv
44+
from pandas.util._decorators import cache_readonly
4345
from pandas.util._validators import validate_endpoints
4446

4547
from pandas.core.dtypes.astype import astype_td64_unit_conversion
@@ -154,8 +156,15 @@ class TimedeltaArray(dtl.TimelikeOps):
154156
# Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray)
155157
# operates pointwise.
156158

159+
@cache_readonly
160+
def _reso(self):
161+
return py_get_unit_from_dtype(self.dtype)
162+
157163
def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType:
158-
return Timedelta(x, unit="ns")
164+
y = x.view("i8")
165+
if y == NaT.value:
166+
return NaT
167+
return Timedelta._from_value_and_reso(y, reso=self._reso)
159168

160169
@property
161170
# error: Return type "dtype" of "dtype" incompatible with return type
@@ -174,7 +183,7 @@ def dtype(self) -> np.dtype: # type: ignore[override]
174183
-------
175184
numpy.dtype
176185
"""
177-
return TD64NS_DTYPE
186+
return self._ndarray.dtype
178187

179188
# ----------------------------------------------------------------
180189
# Constructors
@@ -244,11 +253,13 @@ def __init__(
244253
def _simple_new( # type: ignore[override]
245254
cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE
246255
) -> TimedeltaArray:
247-
assert dtype == TD64NS_DTYPE, dtype
256+
# Require td64 dtype, not unit-less, matching values.dtype
257+
assert isinstance(dtype, np.dtype) and dtype.kind == "m"
258+
assert not tslibs.is_unitless(dtype)
248259
assert isinstance(values, np.ndarray), type(values)
249-
assert values.dtype == TD64NS_DTYPE
260+
assert dtype == values.dtype
250261

251-
result = super()._simple_new(values=values, dtype=TD64NS_DTYPE)
262+
result = super()._simple_new(values=values, dtype=dtype)
252263
result._freq = freq
253264
return result
254265

@@ -262,7 +273,7 @@ def _from_sequence(
262273
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
263274
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
264275

265-
return cls._simple_new(data, freq=freq)
276+
return cls._simple_new(data, dtype=data.dtype, freq=freq)
266277

267278
@classmethod
268279
def _from_sequence_not_strict(
@@ -286,7 +297,7 @@ def _from_sequence_not_strict(
286297
if explicit_none:
287298
freq = None
288299

289-
result = cls._simple_new(data, freq=freq)
300+
result = cls._simple_new(data, dtype=data.dtype, freq=freq)
290301

291302
if inferred_freq is None and freq is not None:
292303
# this condition precludes `freq_infer`
@@ -330,7 +341,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
330341
if not right_closed:
331342
index = index[:-1]
332343

333-
return cls._simple_new(index.view("m8[ns]"), freq=freq)
344+
td64values = index.view("m8[ns]")
345+
return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq)
334346

335347
# ----------------------------------------------------------------
336348
# DatetimeLike Interface

pandas/tests/arrays/test_datetimes.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,17 @@
1111
from pandas.core.arrays import DatetimeArray
1212

1313

14+
class TestNonNano:
15+
@pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)])
16+
@pytest.mark.xfail(reason="_box_func is not yet patched to get reso right")
17+
def test_non_nano(self, unit, reso):
18+
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
19+
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
20+
21+
assert dta.dtype == arr.dtype
22+
assert dta[0]._reso == reso
23+
24+
1425
class TestDatetimeArrayComparisons:
1526
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
1627
# sufficiently robust

pandas/tests/arrays/test_timedeltas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from pandas.core.arrays import TimedeltaArray
88

99

10+
class TestNonNano:
11+
@pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)])
12+
def test_non_nano(self, unit, reso):
13+
arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
14+
tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype)
15+
16+
assert tda.dtype == arr.dtype
17+
assert tda[0]._reso == reso
18+
19+
1020
class TestTimedeltaArray:
1121
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
1222
def test_astype_int(self, dtype):

pandas/tests/tslibs/test_api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_namespace():
5050
"tz_convert_from_utc_single",
5151
"to_offset",
5252
"tz_compare",
53+
"is_unitless",
5354
]
5455

5556
expected = set(submodules + api)

pandas/tests/tslibs/test_np_datetime.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,36 @@
44
from pandas._libs.tslibs.np_datetime import (
55
OutOfBoundsDatetime,
66
astype_overflowsafe,
7+
is_unitless,
78
py_get_unit_from_dtype,
89
py_td64_to_tdstruct,
910
)
1011

1112
import pandas._testing as tm
1213

1314

15+
def test_is_unitless():
16+
dtype = np.dtype("M8[ns]")
17+
assert not is_unitless(dtype)
18+
19+
dtype = np.dtype("datetime64")
20+
assert is_unitless(dtype)
21+
22+
dtype = np.dtype("m8[ns]")
23+
assert not is_unitless(dtype)
24+
25+
dtype = np.dtype("timedelta64")
26+
assert is_unitless(dtype)
27+
28+
msg = "dtype must be datetime64 or timedelta64"
29+
with pytest.raises(ValueError, match=msg):
30+
is_unitless(np.dtype(np.int64))
31+
32+
msg = "Argument 'dtype' has incorrect type"
33+
with pytest.raises(TypeError, match=msg):
34+
is_unitless("foo")
35+
36+
1437
def test_get_unit_from_dtype():
1538
# datetime64
1639
assert py_get_unit_from_dtype(np.dtype("M8[Y]")) == 0

0 commit comments

Comments
 (0)