From 18f05c6c69ae3f74a4dcc82fdbf3b5018c401a3a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Mar 2022 16:52:16 -0700 Subject: [PATCH 1/2] ENH: implement pandas_timedelta_to_timedeltastruct for other resos --- pandas/_libs/tslibs/np_datetime.pyx | 8 + .../_libs/tslibs/src/datetime/np_datetime.c | 236 +++++++++++++++++- pandas/tests/tslibs/test_np_datetime.py | 70 +++++- 3 files changed, 307 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 32d2f1ca4e406..7f4c3f61ad4d2 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -163,6 +163,14 @@ cdef inline void td64_to_tdstruct(int64_t td64, return +# just exposed for testing at the moment +def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit): + cdef: + pandas_timedeltastruct tds + pandas_timedelta_to_timedeltastruct(td64, unit, &tds) + return tds # <- returned as a dict to python + + cdef inline int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts): """ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 12e20df256293..708ca878364be 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -682,7 +682,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, npy_int64 sfrac; npy_int64 ifrac; int sign; - npy_int64 DAY_NS = 86400000000000LL; + npy_int64 PER_DAY; + npy_int64 PER_SEC; /* Initialize the output to all zeros */ memset(out, 0, sizeof(pandas_timedeltastruct)); @@ -690,11 +691,14 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, switch (base) { case NPY_FR_ns: + PER_DAY = 86400000000000LL; + PER_SEC = 1000LL * 1000LL * 1000LL; + // put frac in seconds - if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0) - frac = td / (1000LL * 1000LL * 1000LL) - 1; + if (td < 0 && td % PER_SEC != 0) + frac = td / PER_SEC - 1; else - frac = td / (1000LL * 1000LL * 1000LL); + frac = td / PER_SEC; if (frac < 0) { sign = -1; @@ -738,12 +742,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * (1000LL * 1000LL * 1000LL); + + out->sec) * PER_SEC; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * DAY_NS + sfrac); + ifrac = td - (out->days * PER_DAY + sfrac); if (ifrac != 0) { out->ms = ifrac / (1000LL * 1000LL); @@ -762,11 +766,231 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, out->nanoseconds = out->ns; break; + case NPY_FR_us: + + PER_DAY = 86400000000LL; + PER_SEC = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % PER_SEC != 0) + frac = td / PER_SEC - 1; + else + frac = td / PER_SEC; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * PER_SEC; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * PER_DAY + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + PER_DAY = 86400000LL; + PER_SEC = 1000LL; + + // put frac in seconds + if (td < 0 && td % PER_SEC != 0) + frac = td / PER_SEC - 1; + else + frac = td / PER_SEC; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * PER_SEC; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * PER_DAY + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc PER_SEC=1 + + PER_DAY = 86400000LL; + PER_SEC = 1L; + + // put frac in seconds + if (td < 0 && td % PER_SEC != 0) + frac = td / PER_SEC - 1; + else + frac = td / PER_SEC; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * PER_SEC; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * PER_DAY + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + default: PyErr_SetString(PyExc_RuntimeError, "NumPy timedelta metadata is corrupted with " "invalid base unit"); } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; } diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py index 00a2f90217434..67361f60489bc 100644 --- a/pandas/tests/tslibs/test_np_datetime.py +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -1,6 +1,9 @@ import numpy as np -from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype +from pandas._libs.tslibs.np_datetime import ( + py_get_unit_from_dtype, + py_td64_to_tdstruct, +) def test_get_unit_from_dtype(): @@ -35,3 +38,68 @@ def test_get_unit_from_dtype(): assert py_get_unit_from_dtype(np.dtype("m8[ps]")) == 11 assert py_get_unit_from_dtype(np.dtype("m8[fs]")) == 12 assert py_get_unit_from_dtype(np.dtype("m8[as]")) == 13 + + +def test_td64_to_tdstruct(): + val = 12454636234 # arbitrary value + + res1 = py_td64_to_tdstruct(val, 10) # ns + exp1 = { + "days": 0, + "hrs": 0, + "min": 0, + "sec": 12, + "ms": 454, + "us": 636, + "ns": 234, + "seconds": 12, + "microseconds": 454636, + "nanoseconds": 234, + } + assert res1 == exp1 + + res2 = py_td64_to_tdstruct(val, 9) # us + exp2 = { + "days": 0, + "hrs": 3, + "min": 27, + "sec": 34, + "ms": 636, + "us": 234, + "ns": 0, + "seconds": 12454, + "microseconds": 636234, + "nanoseconds": 0, + } + assert res2 == exp2 + + res3 = py_td64_to_tdstruct(val, 8) # ms + exp3 = { + "days": 144, + "hrs": 3, + "min": 37, + "sec": 16, + "ms": 234, + "us": 0, + "ns": 0, + "seconds": 13036, + "microseconds": 234000, + "nanoseconds": 0, + } + assert res3 == exp3 + + # Note this out of bounds for nanosecond Timedelta + res4 = py_td64_to_tdstruct(val, 7) # s + exp4 = { + "days": 144150, + "hrs": 21, + "min": 10, + "sec": 34, + "ms": 0, + "us": 0, + "ns": 0, + "seconds": 76234, + "microseconds": 0, + "nanoseconds": 0, + } + assert res4 == exp4 From 981159fe2a014cc0206fc8c9525c68381852d3e2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 21 Mar 2022 20:58:59 -0700 Subject: [PATCH 2/2] idiomatic naming --- pandas/_libs/tslibs/np_datetime.pyi | 1 + .../_libs/tslibs/src/datetime/np_datetime.c | 66 +++++++++---------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index 5227de4e72f44..ad559ed6881c6 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -4,3 +4,4 @@ class OutOfBoundsDatetime(ValueError): ... # only exposed for testing def py_get_unit_from_dtype(dtype: np.dtype): ... +def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ... diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 708ca878364be..25a250b40693c 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -682,8 +682,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, npy_int64 sfrac; npy_int64 ifrac; int sign; - npy_int64 PER_DAY; - npy_int64 PER_SEC; + npy_int64 per_day; + npy_int64 per_sec; /* Initialize the output to all zeros */ memset(out, 0, sizeof(pandas_timedeltastruct)); @@ -691,14 +691,14 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, switch (base) { case NPY_FR_ns: - PER_DAY = 86400000000000LL; - PER_SEC = 1000LL * 1000LL * 1000LL; + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; // put frac in seconds - if (td < 0 && td % PER_SEC != 0) - frac = td / PER_SEC - 1; + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; else - frac = td / PER_SEC; + frac = td / per_sec; if (frac < 0) { sign = -1; @@ -742,12 +742,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * PER_SEC; + + out->sec) * per_sec; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * PER_DAY + sfrac); + ifrac = td - (out->days * per_day + sfrac); if (ifrac != 0) { out->ms = ifrac / (1000LL * 1000LL); @@ -760,22 +760,18 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, out->us = 0; out->ns = 0; } - - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; break; case NPY_FR_us: - PER_DAY = 86400000000LL; - PER_SEC = 1000LL * 1000LL; + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; // put frac in seconds - if (td < 0 && td % PER_SEC != 0) - frac = td / PER_SEC - 1; + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; else - frac = td / PER_SEC; + frac = td / per_sec; if (frac < 0) { sign = -1; @@ -819,12 +815,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * PER_SEC; + + out->sec) * per_sec; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * PER_DAY + sfrac); + ifrac = td - (out->days * per_day + sfrac); if (ifrac != 0) { out->ms = ifrac / 1000LL; @@ -841,14 +837,14 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, case NPY_FR_ms: - PER_DAY = 86400000LL; - PER_SEC = 1000LL; + per_day = 86400000LL; + per_sec = 1000LL; // put frac in seconds - if (td < 0 && td % PER_SEC != 0) - frac = td / PER_SEC - 1; + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; else - frac = td / PER_SEC; + frac = td / per_sec; if (frac < 0) { sign = -1; @@ -892,12 +888,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * PER_SEC; + + out->sec) * per_sec; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * PER_DAY + sfrac); + ifrac = td - (out->days * per_day + sfrac); if (ifrac != 0) { out->ms = ifrac; @@ -911,16 +907,16 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, break; case NPY_FR_s: - // special case where we can simplify many expressions bc PER_SEC=1 + // special case where we can simplify many expressions bc per_sec=1 - PER_DAY = 86400000LL; - PER_SEC = 1L; + per_day = 86400000LL; + per_sec = 1L; // put frac in seconds - if (td < 0 && td % PER_SEC != 0) - frac = td / PER_SEC - 1; + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; else - frac = td / PER_SEC; + frac = td / per_sec; if (frac < 0) { sign = -1; @@ -964,12 +960,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * PER_SEC; + + out->sec) * per_sec; if (sign < 0) out->days = -out->days; - ifrac = td - (out->days * PER_DAY + sfrac); + ifrac = td - (out->days * per_day + sfrac); if (ifrac != 0) { out->ms = 0;