Skip to content

ENH: implement Timestamp _as_reso, _as_unit #47333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timestamps.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ cdef class _Timestamp(ABCTimestamp):
cpdef void _set_freq(self, freq)
cdef _warn_on_field_deprecation(_Timestamp self, freq, str field)
cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op)
cdef _Timestamp _as_reso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*)
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,4 @@ class Timestamp(datetime):
def days_in_month(self) -> int: ...
@property
def daysinmonth(self) -> int: ...
def _as_unit(self, unit: str, round_ok: bool = ...) -> Timestamp: ...
38 changes: 38 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ from pandas._libs.tslibs.conversion cimport (
maybe_localize_tso,
)
from pandas._libs.tslibs.dtypes cimport (
get_conversion_factor,
npy_unit_to_abbrev,
periods_per_day,
periods_per_second,
Expand Down Expand Up @@ -86,6 +87,7 @@ from pandas._libs.tslibs.np_datetime cimport (
dt64_to_dtstruct,
get_datetime64_unit,
get_datetime64_value,
get_unit_from_dtype,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pydatetime_to_dt64,
Expand Down Expand Up @@ -1000,6 +1002,42 @@ cdef class _Timestamp(ABCTimestamp):
# -----------------------------------------------------------------
# Conversion Methods

# TODO: share with _Timedelta?
@cython.cdivision(False)
cdef _Timestamp _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
cdef:
int64_t value, mult, div, mod

if reso == self._reso:
return self

if reso < self._reso:
# e.g. ns -> us
mult = get_conversion_factor(reso, self._reso)
div, mod = divmod(self.value, mult)
if mod > 0 and not round_ok:
raise ValueError("Cannot losslessly convert units")

# Note that when mod > 0, we follow np.datetime64 in always
# rounding down.
value = div
else:
mult = get_conversion_factor(self._reso, reso)
with cython.overflowcheck(True):
# Note: caller is responsible for re-raising as OutOfBoundsDatetime
value = self.value * mult
return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo)

def _as_unit(self, str unit, bint round_ok=True):
dtype = np.dtype(f"M8[{unit}]")
reso = get_unit_from_dtype(dtype)
try:
return self._as_reso(reso, round_ok=round_ok)
except OverflowError as err:
raise OutOfBoundsDatetime(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err

@property
def asm8(self) -> np.datetime64:
"""
Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
NaT,
iNaT,
)
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas.errors import OutOfBoundsTimedelta

import pandas as pd
Expand All @@ -33,23 +34,23 @@ def test_as_unit(self):

res = td._as_unit("us")
assert res.value == td.value // 1000
assert res._reso == td._reso - 1
assert res._reso == NpyDatetimeUnit.NPY_FR_us.value

rt = res._as_unit("ns")
assert rt.value == td.value
assert rt._reso == td._reso

res = td._as_unit("ms")
assert res.value == td.value // 1_000_000
assert res._reso == td._reso - 2
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value

rt = res._as_unit("ns")
assert rt.value == td.value
assert rt._reso == td._reso

res = td._as_unit("s")
assert res.value == td.value // 1_000_000_000
assert res._reso == td._reso - 3
assert res._reso == NpyDatetimeUnit.NPY_FR_s.value

rt = res._as_unit("ns")
assert rt.value == td.value
Expand All @@ -58,15 +59,15 @@ def test_as_unit(self):
def test_as_unit_overflows(self):
# microsecond that would be just out of bounds for nano
us = 9223372800000000
td = Timedelta._from_value_and_reso(us, 9)
td = Timedelta._from_value_and_reso(us, NpyDatetimeUnit.NPY_FR_us.value)

msg = "Cannot cast 106752 days 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
td._as_unit("ns")

res = td._as_unit("ms")
assert res.value == us // 1000
assert res._reso == 8
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value

def test_as_unit_rounding(self):
td = Timedelta(microseconds=1500)
Expand All @@ -75,7 +76,7 @@ def test_as_unit_rounding(self):
expected = Timedelta(milliseconds=1)
assert res == expected

assert res._reso == 8
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value
assert res.value == 1

with pytest.raises(ValueError, match="Cannot losslessly convert units"):
Expand Down
81 changes: 81 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
utc,
)

from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas._libs.tslibs.timezones import (
dateutil_gettz as gettz,
get_timezone,
)
from pandas.errors import OutOfBoundsDatetime
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -850,3 +852,82 @@ def test_timestamp(self, dt64, ts):
def test_to_period(self, dt64, ts):
alt = Timestamp(dt64)
assert ts.to_period("D") == alt.to_period("D")


class TestAsUnit:
def test_as_unit(self):
ts = Timestamp("1970-01-01")

assert ts._as_unit("ns") is ts

res = ts._as_unit("us")
assert res.value == ts.value // 1000
assert res._reso == NpyDatetimeUnit.NPY_FR_us.value

rt = res._as_unit("ns")
assert rt.value == ts.value
assert rt._reso == ts._reso

res = ts._as_unit("ms")
assert res.value == ts.value // 1_000_000
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value

rt = res._as_unit("ns")
assert rt.value == ts.value
assert rt._reso == ts._reso

res = ts._as_unit("s")
assert res.value == ts.value // 1_000_000_000
assert res._reso == NpyDatetimeUnit.NPY_FR_s.value

rt = res._as_unit("ns")
assert rt.value == ts.value
assert rt._reso == ts._reso

def test_as_unit_overflows(self):
# microsecond that would be just out of bounds for nano
us = 9223372800000000
ts = Timestamp._from_value_and_reso(us, NpyDatetimeUnit.NPY_FR_us.value, None)

msg = "Cannot cast 2262-04-12 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsDatetime, match=msg):
ts._as_unit("ns")

res = ts._as_unit("ms")
assert res.value == us // 1000
assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value

def test_as_unit_rounding(self):
ts = Timestamp(1_500_000) # i.e. 1500 microseconds
res = ts._as_unit("ms")

expected = Timestamp(1_000_000) # i.e. 1 millisecond
assert res == expected

assert res._reso == NpyDatetimeUnit.NPY_FR_ms.value
assert res.value == 1

with pytest.raises(ValueError, match="Cannot losslessly convert units"):
ts._as_unit("ms", round_ok=False)

def test_as_unit_non_nano(self):
# case where we are going neither to nor from nano
ts = Timestamp("1970-01-02")._as_unit("ms")
assert ts.year == 1970
assert ts.month == 1
assert ts.day == 2
assert ts.hour == ts.minute == ts.second == ts.microsecond == ts.nanosecond == 0

res = ts._as_unit("s")
assert res.value == 24 * 3600
assert res.year == 1970
assert res.month == 1
assert res.day == 2
assert (
res.hour
== res.minute
== res.second
== res.microsecond
== res.nanosecond
== 0
)