From 0128e44d3f345800415147f42a857c64e77a2f0a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Jun 2022 16:58:55 -0700 Subject: [PATCH 1/4] REF: avoid ravel in ints_to_pytimedelta --- pandas/_libs/tslibs/timedeltas.pyx | 44 +++++++++++++++++++++--------- pandas/core/arrays/datetimelike.py | 4 +-- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 28a6480f368d9..2c587fe1786e4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -161,42 +161,60 @@ def ints_to_pytimedelta(ndarray m8values, box=False): array of Timedelta or timedeltas objects """ cdef: + NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype) Py_ssize_t i, n = m8values.size int64_t value - object[::1] result = np.empty(n, dtype=object) - NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype) + object res_val + + # Note that `result` (and thus `result_flat`) is C-order and + # `it` iterates C-order as well, so the iteration matches + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0) + object[::1] res_flat = result.ravel() # should NOT be a copy - arr = m8values.view("i8") + ndarray arr = m8values.view("i8") + cnp.flatiter it = cnp.PyArray_IterNew(arr) for i in range(n): + # Analogous to: value = arr[i] + value = (cnp.PyArray_ITER_DATA(it))[0] - value = arr[i] if value == NPY_NAT: + res_val = NaT result[i] = NaT else: if box: result[i] = _timedelta_from_value_and_reso(value, reso=reso) elif reso == NPY_DATETIMEUNIT.NPY_FR_ns: - result[i] = timedelta(microseconds=int(value) / 1000) + res_val = timedelta(microseconds=int(value) / 1000) elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - result[i] = timedelta(microseconds=value) + res_val = timedelta(microseconds=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - result[i] = timedelta(milliseconds=value) + res_val = timedelta(milliseconds=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - result[i] = timedelta(seconds=value) + res_val = timedelta(seconds=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_m: - result[i] = timedelta(minutes=value) + res_val = timedelta(minutes=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_h: - result[i] = timedelta(hours=value) + res_val = timedelta(hours=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_D: - result[i] = timedelta(days=value) + res_val = timedelta(days=value) elif reso == NPY_DATETIMEUNIT.NPY_FR_W: - result[i] = timedelta(weeks=value) + res_val = timedelta(weeks=value) else: # Month, Year, NPY_FR_GENERIC, pico, fempto, atto raise NotImplementedError(reso) - return result.base # .base to access underlying np.ndarray + # Note: we can index result directly instead of using PyArray_MultiIter_DATA + # like we do for the other functions because result is known C-contiguous + # and is the first argument to PyArray_MultiIterNew2. The usual pattern + # does not seem to work with object dtype. + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + res_flat[i] = res_val + + return result # ---------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f81859ced01ed..1dfb070e29c30 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -437,9 +437,7 @@ def astype(self, dtype, copy: bool = True): return converted.reshape(self.shape) elif self.dtype.kind == "m": - i8data = self.asi8.ravel() - converted = ints_to_pytimedelta(self._ndarray.ravel(), box=True) - return converted.reshape(self.shape) + return ints_to_pytimedelta(self._ndarray, box=True) return self._box_values(self.asi8.ravel()).reshape(self.shape) From 896572fde47b4bc12d5425094423c482a923d37d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Jun 2022 20:31:52 -0700 Subject: [PATCH 2/4] copy/paste fixup --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2c587fe1786e4..cdddf11053844 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -170,7 +170,7 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0) + ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) object[::1] res_flat = result.ravel() # should NOT be a copy ndarray arr = m8values.view("i8") From f913a60a2685d6c30bb29ed0a4fd818545b03995 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jun 2022 06:31:11 -0700 Subject: [PATCH 3/4] typo fixup --- pandas/_libs/tslibs/timedeltas.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index cdddf11053844..6fc78b21ad38d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -182,10 +182,9 @@ def ints_to_pytimedelta(ndarray m8values, box=False): if value == NPY_NAT: res_val = NaT - result[i] = NaT else: if box: - result[i] = _timedelta_from_value_and_reso(value, reso=reso) + res_val = _timedelta_from_value_and_reso(value, reso=reso) elif reso == NPY_DATETIMEUNIT.NPY_FR_ns: res_val = timedelta(microseconds=int(value) / 1000) elif reso == NPY_DATETIMEUNIT.NPY_FR_us: From c05c838532e3a249921807bfdfb6c08d0b0f4dce Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jun 2022 11:19:34 -0700 Subject: [PATCH 4/4] copy/paste mixup --- pandas/_libs/tslibs/timedeltas.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 6fc78b21ad38d..f8f01afb06812 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -213,6 +213,8 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 res_flat[i] = res_val + cnp.PyArray_ITER_NEXT(it) + return result