diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index 222e770ef03d5..922fe8c4a4dce 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -4,6 +4,7 @@ class OutOfBoundsDatetime(ValueError): ... # only exposed for testing def py_get_unit_from_dtype(dtype: np.dtype): ... +def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ... def astype_overflowsafe( arr: np.ndarray, dtype: np.dtype, copy: bool = ... ) -> np.ndarray: ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index f7f7a29671d48..2a0b4ceeb98f8 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -189,6 +189,14 @@ cdef inline void td64_to_tdstruct(int64_t td64, return +# just exposed for testing at the moment +def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit): + cdef: + pandas_timedeltastruct tds + pandas_timedelta_to_timedeltastruct(td64, unit, &tds) + return tds # <- returned as a dict to python + + cdef inline int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts): """ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 69c073d223e67..f5400cf8da4df 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -712,7 +712,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, npy_int64 sfrac; npy_int64 ifrac; int sign; - npy_int64 DAY_NS = 86400000000000LL; + npy_int64 per_day; + npy_int64 per_sec; /* Initialize the output to all zeros */ memset(out, 0, sizeof(pandas_timedeltastruct)); @@ -720,11 +721,14 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, switch (base) { case NPY_FR_ns: + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + // put frac in seconds - if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0) - frac = td / (1000LL * 1000LL * 1000LL) - 1; + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; else - frac = td / (1000LL * 1000LL * 1000LL); + frac = td / per_sec; if (frac < 0) { sign = -1; @@ -768,12 +772,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * (1000LL * 1000LL * 1000LL); + + out->sec) * per_sec; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * DAY_NS + sfrac); + ifrac = td - (out->days * per_day + sfrac); if (ifrac != 0) { out->ms = ifrac / (1000LL * 1000LL); @@ -786,10 +790,222 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, out->us = 0; out->ns = 0; } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400000LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } break; default: @@ -797,6 +1013,10 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, "NumPy timedelta metadata is corrupted with " "invalid base unit"); } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; } diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py index 8fff63eaeac05..336c7d30d5f77 100644 --- a/pandas/tests/tslibs/test_np_datetime.py +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -5,6 +5,7 @@ OutOfBoundsDatetime, astype_overflowsafe, py_get_unit_from_dtype, + py_td64_to_tdstruct, ) import pandas._testing as tm @@ -44,6 +45,71 @@ def test_get_unit_from_dtype(): assert py_get_unit_from_dtype(np.dtype("m8[as]")) == 13 +def test_td64_to_tdstruct(): + val = 12454636234 # arbitrary value + + res1 = py_td64_to_tdstruct(val, 10) # ns + exp1 = { + "days": 0, + "hrs": 0, + "min": 0, + "sec": 12, + "ms": 454, + "us": 636, + "ns": 234, + "seconds": 12, + "microseconds": 454636, + "nanoseconds": 234, + } + assert res1 == exp1 + + res2 = py_td64_to_tdstruct(val, 9) # us + exp2 = { + "days": 0, + "hrs": 3, + "min": 27, + "sec": 34, + "ms": 636, + "us": 234, + "ns": 0, + "seconds": 12454, + "microseconds": 636234, + "nanoseconds": 0, + } + assert res2 == exp2 + + res3 = py_td64_to_tdstruct(val, 8) # ms + exp3 = { + "days": 144, + "hrs": 3, + "min": 37, + "sec": 16, + "ms": 234, + "us": 0, + "ns": 0, + "seconds": 13036, + "microseconds": 234000, + "nanoseconds": 0, + } + assert res3 == exp3 + + # Note this out of bounds for nanosecond Timedelta + res4 = py_td64_to_tdstruct(val, 7) # s + exp4 = { + "days": 144150, + "hrs": 21, + "min": 10, + "sec": 34, + "ms": 0, + "us": 0, + "ns": 0, + "seconds": 76234, + "microseconds": 0, + "nanoseconds": 0, + } + assert res4 == exp4 + + class TestAstypeOverflowSafe: def test_pass_non_dt64_array(self): # check that we raise, not segfault