pandas-dev · jreback · May 6, 2022 · Apr 27, 2022 · May 2, 2022 · May 4, 2022
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -354,6 +354,7 @@ def array_to_timedelta64(
                 raise ValueError(
                     "unit must not be specified if the input contains a str"
                 )
+            cnp.PyArray_ITER_NEXT(it)
 
     # Usually, we have all strings. If so, we hit the fast path.
     # If this path fails, we try conversion a different way, and

diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi
@@ -11,24 +11,24 @@ from pandas._libs.tslibs.offsets import BaseOffset
 from pandas._typing import npt
 
 def dt64arr_to_periodarr(
-    stamps: npt.NDArray[np.int64],  # const int64_t[:]
+    stamps: npt.NDArray[np.int64],
     freq: int,
     tz: tzinfo | None,
-) -> npt.NDArray[np.int64]: ...  # np.ndarray[np.int64, ndim=1]
+) -> npt.NDArray[np.int64]: ...
 def is_date_array_normalized(
-    stamps: npt.NDArray[np.int64],  # const int64_t[:]
+    stamps: npt.NDArray[np.int64],
     tz: tzinfo | None = ...,
 ) -> bool: ...
 def normalize_i8_timestamps(
-    stamps: npt.NDArray[np.int64],  # const int64_t[:]
+    stamps: npt.NDArray[np.int64],
     tz: tzinfo | None,
 ) -> npt.NDArray[np.int64]: ...
 def get_resolution(
-    stamps: npt.NDArray[np.int64],  # const int64_t[:]
+    stamps: npt.NDArray[np.int64],
     tz: tzinfo | None = ...,
 ) -> Resolution: ...
 def ints_to_pydatetime(
-    arr: npt.NDArray[np.int64],  # const int64_t[:}]
+    arr: npt.NDArray[np.int64],
     tz: tzinfo | None = ...,
     freq: BaseOffset | None = ...,
     fold: bool = ...,

diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx
@@ -38,7 +38,8 @@ from .tzconversion cimport Localizer
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
+def tz_convert_from_utc(ndarray stamps, tzinfo tz):
+    # stamps is int64_t, arbitrary ndim
     """
     Convert the values (in i8) from UTC to tz
 
@@ -54,27 +55,33 @@ def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
     cdef:
         Localizer info = Localizer(tz)
         int64_t utc_val, local_val
-        Py_ssize_t pos, i, n = stamps.shape[0]
+        Py_ssize_t pos, i, n = stamps.size
 
-        int64_t[::1] result
+        ndarray result
+        cnp.broadcast mi
 
     if tz is None or is_utc(tz) or stamps.size == 0:
         # Much faster than going through the "standard" pattern below
-        return stamps.base.copy()
+        return stamps.copy()
 
-    result = np.empty(n, dtype=np.int64)
+    result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
+    mi = cnp.PyArray_MultiIterNew2(result, stamps)
 
     for i in range(n):
-        utc_val = stamps[i]
+        # Analogous to: utc_val = stamps[i]
+        utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
+
         if utc_val == NPY_NAT:
-            result[i] = NPY_NAT
-            continue
+            local_val = NPY_NAT
+        else:
+            local_val = info.utc_val_to_local_val(utc_val, &pos)
 
-        local_val = info.utc_val_to_local_val(utc_val, &pos)
+        # Analogous to: result[i] = local_val
+        (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = local_val
 
-        result[i] = local_val
+        cnp.PyArray_MultiIter_NEXT(mi)
 
-    return result.base
+    return result
 
 
 # -------------------------------------------------------------------------
@@ -83,12 +90,13 @@ def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def ints_to_pydatetime(
-    const int64_t[:] stamps,
+    ndarray stamps,
     tzinfo tz=None,
     BaseOffset freq=None,
     bint fold=False,
     str box="datetime"
 ) -> np.ndarray:
+    # stamps is int64, arbitrary ndim
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
 
@@ -119,13 +127,21 @@ def ints_to_pydatetime(
     cdef:
         Localizer info = Localizer(tz)
         int64_t utc_val, local_val
-        Py_ssize_t i, n = stamps.shape[0]
+        Py_ssize_t i, n = stamps.size
         Py_ssize_t pos = -1  # unused, avoid not-initialized warning
 
         npy_datetimestruct dts
         tzinfo new_tz
-        ndarray[object] result = np.empty(n, dtype=object)
         bint use_date = False, use_time = False, use_ts = False, use_pydt = False
+        object res_val
+
+        # Note that `result` (and thus `result_flat`) is C-order and
+        #  `it` iterates C-order as well, so the iteration matches
+        #  See discussion at
+        #  github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
+        ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0)
+        object[::1] res_flat = result.ravel()     # should NOT be a copy
+        cnp.flatiter it = cnp.PyArray_IterNew(stamps)
 
     if box == "date":
         assert (tz is None), "tz should be None when converting to date"
@@ -142,31 +158,44 @@ def ints_to_pydatetime(
         )
 
     for i in range(n):
-        utc_val = stamps[i]
+        # Analogous to: utc_val = stamps[i]
+        utc_val = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]
+
         new_tz = tz
 
         if utc_val == NPY_NAT:
-            result[i] = <object>NaT
-            continue
+            res_val = <object>NaT
 
-        local_val = info.utc_val_to_local_val(utc_val, &pos)
-        if info.use_pytz:
-            # find right representation of dst etc in pytz timezone
-            new_tz = tz._tzinfos[tz._transition_info[pos]]
-
-        dt64_to_dtstruct(local_val, &dts)
-
-        if use_ts:
-            result[i] = create_timestamp_from_ts(utc_val, dts, new_tz, freq, fold)
-        elif use_pydt:
-            result[i] = datetime(
-                dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
-                new_tz, fold=fold,
-            )
-        elif use_date:
-            result[i] = date(dts.year, dts.month, dts.day)
         else:
-            result[i] = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold)
+
+            local_val = info.utc_val_to_local_val(utc_val, &pos)
+            if info.use_pytz:
+                # find right representation of dst etc in pytz timezone
+                new_tz = tz._tzinfos[tz._transition_info[pos]]
+
+            dt64_to_dtstruct(local_val, &dts)
+
+            if use_ts:
+                res_val = create_timestamp_from_ts(utc_val, dts, new_tz, freq, fold)
+            elif use_pydt:
+                res_val = datetime(
+                    dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
+                    new_tz, fold=fold,
+                )
+            elif use_date:
+                res_val = date(dts.year, dts.month, dts.day)
+            else:
+                res_val = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold)
+
+        # Note: we can index result directly instead of using PyArray_MultiIter_DATA
+        #  like we do for the other functions because result is known C-contiguous
+        #  and is the first argument to PyArray_MultiIterNew2.  The usual pattern
+        #  does not seem to work with object dtype.
+        #  See discussion at
+        #  github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
+        res_flat[i] = res_val
+
+        cnp.PyArray_ITER_NEXT(it)
 
     return result
 
@@ -190,27 +219,33 @@ cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
+def get_resolution(ndarray stamps, tzinfo tz=None) -> Resolution:
+    # stamps is int64_t, any ndim
     cdef:
         Localizer info = Localizer(tz)
         int64_t utc_val, local_val
-        Py_ssize_t i, n = stamps.shape[0]
+        Py_ssize_t i, n = stamps.size
         Py_ssize_t pos = -1  # unused, avoid not-initialized warning
+        cnp.flatiter it = cnp.PyArray_IterNew(stamps)
 
         npy_datetimestruct dts
         c_Resolution reso = c_Resolution.RESO_DAY, curr_reso
 
     for i in range(n):
-        utc_val = stamps[i]
+        # Analogous to: utc_val = stamps[i]
+        utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
+
         if utc_val == NPY_NAT:
-            continue
+            pass
+        else:
+            local_val = info.utc_val_to_local_val(utc_val, &pos)
 
-        local_val = info.utc_val_to_local_val(utc_val, &pos)
+            dt64_to_dtstruct(local_val, &dts)
+            curr_reso = _reso_stamp(&dts)
+            if curr_reso < reso:
+                reso = curr_reso
 
-        dt64_to_dtstruct(local_val, &dts)
-        curr_reso = _reso_stamp(&dts)
-        if curr_reso < reso:
-            reso = curr_reso
+        cnp.PyArray_ITER_NEXT(it)
 
     return Resolution(reso)
 
@@ -221,7 +256,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
 @cython.cdivision(False)
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz):
+cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz):
+    # stamps is int64_t, arbitrary ndim
     """
     Normalize each of the (nanosecond) timezone aware timestamps in the given
     array by rounding down to the beginning of the day (i.e. midnight).
@@ -238,28 +274,35 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
     """
     cdef:
         Localizer info = Localizer(tz)
-        int64_t utc_val, local_val
-        Py_ssize_t i, n = stamps.shape[0]
+        int64_t utc_val, local_val, res_val
+        Py_ssize_t i, n = stamps.size
         Py_ssize_t pos = -1  # unused, avoid not-initialized warning
 
-        int64_t[::1] result = np.empty(n, dtype=np.int64)
+        ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
+        cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
 
     for i in range(n):
-        utc_val = stamps[i]
+        # Analogous to: utc_val = stamps[i]
+        utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
+
         if utc_val == NPY_NAT:
-            result[i] = NPY_NAT
-            continue
+            res_val = NPY_NAT
+        else:
+            local_val = info.utc_val_to_local_val(utc_val, &pos)
+            res_val = local_val - (local_val % DAY_NANOS)
 
-        local_val = info.utc_val_to_local_val(utc_val, &pos)
+        # Analogous to: result[i] = res_val
+        (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val
 
-        result[i] = local_val - (local_val % DAY_NANOS)
+        cnp.PyArray_MultiIter_NEXT(mi)
 
-    return result.base  # `.base` to access underlying ndarray
+    return result
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
+def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool:
+    # stamps is int64_t, arbitrary ndim
     """
     Check if all of the given (nanosecond) timestamps are normalized to
     midnight, i.e. hour == minute == second == 0.  If the optional timezone
@@ -277,16 +320,21 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
     cdef:
         Localizer info = Localizer(tz)
         int64_t utc_val, local_val
-        Py_ssize_t i, n = stamps.shape[0]
+        Py_ssize_t i, n = stamps.size
         Py_ssize_t pos = -1  # unused, avoid not-initialized warning
+        cnp.flatiter it = cnp.PyArray_IterNew(stamps)
 
     for i in range(n):
-        utc_val = stamps[i]
+        # Analogous to: utc_val = stamps[i]
+        utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
+
         local_val = info.utc_val_to_local_val(utc_val, &pos)
 
         if local_val % DAY_NANOS != 0:
             return False
 
+        cnp.PyArray_ITER_NEXT(it)
+
     return True