Skip to content

ENH: allow non-nano in DatetimeArray, TimedeltaArray._simple_new #46901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"Tick",
"BaseOffset",
"tz_compare",
"is_unitless",
]

from pandas._libs.tslibs import dtypes
Expand All @@ -39,6 +40,7 @@
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
is_unitless,
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
def astype_overflowsafe(
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
) -> np.ndarray: ...
def is_unitless(dtype: np.dtype) -> bool: ...
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,18 @@ def py_get_unit_from_dtype(dtype):
return get_unit_from_dtype(dtype)


def is_unitless(dtype: cnp.dtype) -> bool:
"""
Check if a datetime64 or timedelta64 dtype has no attached unit.
"""
if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]:
raise ValueError("is_unitless dtype must be datetime64 or timedelta64")
cdef:
NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype)

return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC


# ----------------------------------------------------------------------
# Comparison

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class Timedelta(timedelta):
# GH 46171
# While Timedelta can return pd.NaT, having the constructor return
# a Union with NaTType makes things awkward for users of pandas
@classmethod
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
@property
def days(self) -> int: ...
@property
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
iNaT,
ints_to_pydatetime,
is_date_array_normalized,
is_unitless,
normalize_i8_timestamps,
timezones,
to_offset,
Expand Down Expand Up @@ -335,7 +336,12 @@ def _simple_new( # type: ignore[override]
cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=DT64NS_DTYPE
) -> DatetimeArray:
assert isinstance(values, np.ndarray)
assert values.dtype == DT64NS_DTYPE
assert dtype.kind == "M"
if isinstance(dtype, np.dtype):
# TODO: once non-nano DatetimeTZDtype is implemented, require that
# dtype's reso match values's reso
assert dtype == values.dtype
assert not is_unitless(dtype)

result = super()._simple_new(values, dtype)
result._freq = freq
Expand Down Expand Up @@ -761,7 +767,7 @@ def _add_offset(self, offset) -> DatetimeArray:
else:
values = self
result = offset._apply_array(values).view("M8[ns]")
result = DatetimeArray._simple_new(result)
result = DatetimeArray._simple_new(result, dtype=result.dtype)
result = result.tz_localize(self.tz)

except NotImplementedError:
Expand Down
28 changes: 20 additions & 8 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
precision_from_unit,
)
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64,
ints_to_pytimedelta,
Expand All @@ -40,6 +41,7 @@
npt,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_endpoints

from pandas.core.dtypes.astype import astype_td64_unit_conversion
Expand Down Expand Up @@ -154,8 +156,15 @@ class TimedeltaArray(dtl.TimelikeOps):
# Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray)
# operates pointwise.

@cache_readonly
def _reso(self):
return py_get_unit_from_dtype(self.dtype)

def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType:
return Timedelta(x, unit="ns")
y = x.view("i8")
if y == NaT.value:
return NaT
return Timedelta._from_value_and_reso(y, reso=self._reso)

@property
# error: Return type "dtype" of "dtype" incompatible with return type
Expand All @@ -174,7 +183,7 @@ def dtype(self) -> np.dtype: # type: ignore[override]
-------
numpy.dtype
"""
return TD64NS_DTYPE
return self._ndarray.dtype

# ----------------------------------------------------------------
# Constructors
Expand Down Expand Up @@ -244,11 +253,13 @@ def __init__(
def _simple_new( # type: ignore[override]
cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE
) -> TimedeltaArray:
assert dtype == TD64NS_DTYPE, dtype
# Require td64 dtype, not unit-less, matching values.dtype
assert isinstance(dtype, np.dtype) and dtype.kind == "m"
assert not tslibs.is_unitless(dtype)
assert isinstance(values, np.ndarray), type(values)
assert values.dtype == TD64NS_DTYPE
assert dtype == values.dtype

result = super()._simple_new(values=values, dtype=TD64NS_DTYPE)
result = super()._simple_new(values=values, dtype=dtype)
result._freq = freq
return result

Expand All @@ -262,7 +273,7 @@ def _from_sequence(
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)

return cls._simple_new(data, freq=freq)
return cls._simple_new(data, dtype=data.dtype, freq=freq)

@classmethod
def _from_sequence_not_strict(
Expand All @@ -286,7 +297,7 @@ def _from_sequence_not_strict(
if explicit_none:
freq = None

result = cls._simple_new(data, freq=freq)
result = cls._simple_new(data, dtype=data.dtype, freq=freq)

if inferred_freq is None and freq is not None:
# this condition precludes `freq_infer`
Expand Down Expand Up @@ -330,7 +341,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
if not right_closed:
index = index[:-1]

return cls._simple_new(index.view("m8[ns]"), freq=freq)
td64values = index.view("m8[ns]")
return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq)

# ----------------------------------------------------------------
# DatetimeLike Interface
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@
from pandas.core.arrays import DatetimeArray


class TestNonNano:
@pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)])
@pytest.mark.xfail(reason="_box_func is not yet patched to get reso right")
def test_non_nano(self, unit, reso):
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)

assert dta.dtype == arr.dtype
assert dta[0]._reso == reso


class TestDatetimeArrayComparisons:
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
# sufficiently robust
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
from pandas.core.arrays import TimedeltaArray


class TestNonNano:
@pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)])
def test_non_nano(self, unit, reso):
arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype)

assert tda.dtype == arr.dtype
assert tda[0]._reso == reso


class TestTimedeltaArray:
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_namespace():
"tz_convert_from_utc_single",
"to_offset",
"tz_compare",
"is_unitless",
]

expected = set(submodules + api)
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/tslibs/test_np_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,36 @@
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
astype_overflowsafe,
is_unitless,
py_get_unit_from_dtype,
py_td64_to_tdstruct,
)

import pandas._testing as tm


def test_is_unitless():
dtype = np.dtype("M8[ns]")
assert not is_unitless(dtype)

dtype = np.dtype("datetime64")
assert is_unitless(dtype)

dtype = np.dtype("m8[ns]")
assert not is_unitless(dtype)

dtype = np.dtype("timedelta64")
assert is_unitless(dtype)

msg = "dtype must be datetime64 or timedelta64"
with pytest.raises(ValueError, match=msg):
is_unitless(np.dtype(np.int64))

msg = "Argument 'dtype' has incorrect type"
with pytest.raises(TypeError, match=msg):
is_unitless("foo")


def test_get_unit_from_dtype():
# datetime64
assert py_get_unit_from_dtype(np.dtype("M8[Y]")) == 0
Expand Down