From eb206687685d3bdde6bb6293ca169fc10d147474 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Mar 2022 18:04:12 -0700 Subject: [PATCH 1/2] REF: implement get_unit_from_dtype --- pandas/_libs/tslibs/conversion.pyx | 5 ++- pandas/_libs/tslibs/np_datetime.pxd | 3 ++ pandas/_libs/tslibs/np_datetime.pyx | 21 +++++++++++ .../_libs/tslibs/src/datetime/np_datetime.c | 18 +++++++++ .../_libs/tslibs/src/datetime/np_datetime.h | 7 ++++ pandas/tests/tslibs/test_np_datetime.py | 37 +++++++++++++++++++ 6 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/tslibs/test_np_datetime.py diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7dce3cad9d339..0adf6f722c9ce 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -36,6 +36,7 @@ from pandas._libs.tslibs.np_datetime cimport ( dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, + get_unit_from_dtype, npy_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, @@ -234,7 +235,9 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True): result = result.copy() return result - unit = get_datetime64_unit(arr.flat[0]) + if arr.dtype.kind != "M": + raise TypeError("ensure_datetime64ns arr must have datetime64 dtype") + unit = get_unit_from_dtype(arr.dtype) if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # without raising explicitly here, we end up with a SystemError # built-in function ensure_datetime64ns returned a result with an error diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c2bbc4fe764fe..4ab10bc431d09 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -1,3 +1,4 @@ +cimport numpy as cnp from cpython.datetime cimport ( date, datetime, @@ -79,3 +80,5 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil cdef int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1 + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 79a58478d630a..075deb141ce9d 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -19,6 +19,9 @@ from cpython.object cimport ( PyDateTime_IMPORT +cimport numpy as cnp + +cnp.import_array() from numpy cimport int64_t from pandas._libs.tslibs.util cimport get_c_string_buf_and_size @@ -42,6 +45,8 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + PyArray_DatetimeMetaData* get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, @@ -74,6 +79,22 @@ cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: """ return (obj).obmeta.base + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype): + # NB: caller is responsible for ensuring this is *some* datetime64 or + # timedelta64 dtype, otherwise we can segfault + cdef: + cnp.PyArray_Descr* descr = dtype + PyArray_DatetimeMetaData* meta + meta = get_datetime_metadata_from_dtype(descr) + return meta.base + + +def py_get_unit_from_dtype(dtype): + # for testing get_unit_from_dtype; adds 896 bytes to the .so file. + return get_unit_from_dtype(dtype) + + # ---------------------------------------------------------------------- # Comparison diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 8f59f53a555d8..d3e6ceb532b3a 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -768,3 +768,21 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, "invalid base unit"); } } + + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +PyArray_DatetimeMetaData * +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + if (!PyDataType_ISDATETIME(dtype)) { + PyErr_SetString(PyExc_TypeError, + "cannot get datetime metadata from non-datetime type"); + return NULL; + } + + return &(((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 0bbc24ed822c5..89de12d7486fd 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -75,5 +75,12 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a, void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + */ +PyArray_DatetimeMetaData* get_datetime_metadata_from_dtype( + PyArray_Descr *dtype); + #endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py new file mode 100644 index 0000000000000..00a2f90217434 --- /dev/null +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -0,0 +1,37 @@ +import numpy as np + +from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype + + +def test_get_unit_from_dtype(): + # datetime64 + assert py_get_unit_from_dtype(np.dtype("M8[Y]")) == 0 + assert py_get_unit_from_dtype(np.dtype("M8[M]")) == 1 + assert py_get_unit_from_dtype(np.dtype("M8[W]")) == 2 + # B has been deprecated and removed -> no 3 + assert py_get_unit_from_dtype(np.dtype("M8[D]")) == 4 + assert py_get_unit_from_dtype(np.dtype("M8[h]")) == 5 + assert py_get_unit_from_dtype(np.dtype("M8[m]")) == 6 + assert py_get_unit_from_dtype(np.dtype("M8[s]")) == 7 + assert py_get_unit_from_dtype(np.dtype("M8[ms]")) == 8 + assert py_get_unit_from_dtype(np.dtype("M8[us]")) == 9 + assert py_get_unit_from_dtype(np.dtype("M8[ns]")) == 10 + assert py_get_unit_from_dtype(np.dtype("M8[ps]")) == 11 + assert py_get_unit_from_dtype(np.dtype("M8[fs]")) == 12 + assert py_get_unit_from_dtype(np.dtype("M8[as]")) == 13 + + # timedelta64 + assert py_get_unit_from_dtype(np.dtype("m8[Y]")) == 0 + assert py_get_unit_from_dtype(np.dtype("m8[M]")) == 1 + assert py_get_unit_from_dtype(np.dtype("m8[W]")) == 2 + # B has been deprecated and removed -> no 3 + assert py_get_unit_from_dtype(np.dtype("m8[D]")) == 4 + assert py_get_unit_from_dtype(np.dtype("m8[h]")) == 5 + assert py_get_unit_from_dtype(np.dtype("m8[m]")) == 6 + assert py_get_unit_from_dtype(np.dtype("m8[s]")) == 7 + assert py_get_unit_from_dtype(np.dtype("m8[ms]")) == 8 + assert py_get_unit_from_dtype(np.dtype("m8[us]")) == 9 + assert py_get_unit_from_dtype(np.dtype("m8[ns]")) == 10 + assert py_get_unit_from_dtype(np.dtype("m8[ps]")) == 11 + assert py_get_unit_from_dtype(np.dtype("m8[fs]")) == 12 + assert py_get_unit_from_dtype(np.dtype("m8[as]")) == 13 From 6cd028a69245455f653b0ef0405ae25bb6d8772a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Mar 2022 21:17:10 -0700 Subject: [PATCH 2/2] avoid cpplint complaint --- pandas/_libs/tslibs/np_datetime.pyi | 5 +++++ pandas/_libs/tslibs/np_datetime.pyx | 4 ++-- pandas/_libs/tslibs/src/datetime/np_datetime.c | 10 ++-------- pandas/_libs/tslibs/src/datetime/np_datetime.h | 4 ++-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index db0c277b73bd5..5227de4e72f44 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -1 +1,6 @@ +import numpy as np + class OutOfBoundsDatetime(ValueError): ... + +# only exposed for testing +def py_get_unit_from_dtype(dtype: np.dtype): ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 075deb141ce9d..5f4ef84a79586 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -45,7 +45,7 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS - PyArray_DatetimeMetaData* get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -85,7 +85,7 @@ cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype): # timedelta64 dtype, otherwise we can segfault cdef: cnp.PyArray_Descr* descr = dtype - PyArray_DatetimeMetaData* meta + PyArray_DatetimeMetaData meta meta = get_datetime_metadata_from_dtype(descr) return meta.base diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index d3e6ceb532b3a..12e20df256293 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -776,13 +776,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, * * Copied near-verbatim from numpy/core/src/multiarray/datetime.c */ -PyArray_DatetimeMetaData * +PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - if (!PyDataType_ISDATETIME(dtype)) { - PyErr_SetString(PyExc_TypeError, - "cannot get datetime metadata from non-datetime type"); - return NULL; - } - - return &(((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); } diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 89de12d7486fd..8e58be1ca8383 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -76,10 +76,10 @@ void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); /* - * This function returns a pointer to the DateTimeMetaData + * This function returns the DateTimeMetaData * contained within the provided datetime dtype. */ -PyArray_DatetimeMetaData* get_datetime_metadata_from_dtype( +PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( PyArray_Descr *dtype);