diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 8f145d0d66acc..4eb1494c4d56c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -354,6 +354,7 @@ def array_to_timedelta64( raise ValueError( "unit must not be specified if the input contains a str" ) + cnp.PyArray_ITER_NEXT(it) # Usually, we have all strings. If so, we hit the fast path. # If this path fails, we try conversion a different way, and diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index a8f81514c5645..61148605aadea 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -11,24 +11,24 @@ from pandas._libs.tslibs.offsets import BaseOffset from pandas._typing import npt def dt64arr_to_periodarr( - stamps: npt.NDArray[np.int64], # const int64_t[:] + stamps: npt.NDArray[np.int64], freq: int, tz: tzinfo | None, -) -> npt.NDArray[np.int64]: ... # np.ndarray[np.int64, ndim=1] +) -> npt.NDArray[np.int64]: ... def is_date_array_normalized( - stamps: npt.NDArray[np.int64], # const int64_t[:] + stamps: npt.NDArray[np.int64], tz: tzinfo | None = ..., ) -> bool: ... def normalize_i8_timestamps( - stamps: npt.NDArray[np.int64], # const int64_t[:] + stamps: npt.NDArray[np.int64], tz: tzinfo | None, ) -> npt.NDArray[np.int64]: ... def get_resolution( - stamps: npt.NDArray[np.int64], # const int64_t[:] + stamps: npt.NDArray[np.int64], tz: tzinfo | None = ..., ) -> Resolution: ... def ints_to_pydatetime( - arr: npt.NDArray[np.int64], # const int64_t[:}] + arr: npt.NDArray[np.int64], tz: tzinfo | None = ..., freq: BaseOffset | None = ..., fold: bool = ..., diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index fea357c9da98b..31d0579900abd 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -38,7 +38,8 @@ from .tzconversion cimport Localizer @cython.boundscheck(False) @cython.wraparound(False) -def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): +def tz_convert_from_utc(ndarray stamps, tzinfo tz): + # stamps is int64_t, arbitrary ndim """ Convert the values (in i8) from UTC to tz @@ -54,27 +55,33 @@ def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): cdef: Localizer info = Localizer(tz) int64_t utc_val, local_val - Py_ssize_t pos, i, n = stamps.shape[0] + Py_ssize_t pos, i, n = stamps.size - int64_t[::1] result + ndarray result + cnp.broadcast mi if tz is None or is_utc(tz) or stamps.size == 0: # Much faster than going through the "standard" pattern below - return stamps.base.copy() + return stamps.copy() - result = np.empty(n, dtype=np.int64) + result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) + mi = cnp.PyArray_MultiIterNew2(result, stamps) for i in range(n): - utc_val = stamps[i] + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + if utc_val == NPY_NAT: - result[i] = NPY_NAT - continue + local_val = NPY_NAT + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) - local_val = info.utc_val_to_local_val(utc_val, &pos) + # Analogous to: result[i] = local_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = local_val - result[i] = local_val + cnp.PyArray_MultiIter_NEXT(mi) - return result.base + return result # ------------------------------------------------------------------------- @@ -83,12 +90,13 @@ def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): @cython.wraparound(False) @cython.boundscheck(False) def ints_to_pydatetime( - const int64_t[:] stamps, + ndarray stamps, tzinfo tz=None, BaseOffset freq=None, bint fold=False, str box="datetime" ) -> np.ndarray: + # stamps is int64, arbitrary ndim """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. @@ -119,13 +127,21 @@ def ints_to_pydatetime( cdef: Localizer info = Localizer(tz) int64_t utc_val, local_val - Py_ssize_t i, n = stamps.shape[0] + Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning npy_datetimestruct dts tzinfo new_tz - ndarray[object] result = np.empty(n, dtype=object) bint use_date = False, use_time = False, use_ts = False, use_pydt = False + object res_val + + # Note that `result` (and thus `result_flat`) is C-order and + # `it` iterates C-order as well, so the iteration matches + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0) + object[::1] res_flat = result.ravel() # should NOT be a copy + cnp.flatiter it = cnp.PyArray_IterNew(stamps) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -142,31 +158,44 @@ def ints_to_pydatetime( ) for i in range(n): - utc_val = stamps[i] + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_ITER_DATA(it))[0] + new_tz = tz if utc_val == NPY_NAT: - result[i] = NaT - continue + res_val = NaT - local_val = info.utc_val_to_local_val(utc_val, &pos) - if info.use_pytz: - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos]] - - dt64_to_dtstruct(local_val, &dts) - - if use_ts: - result[i] = create_timestamp_from_ts(utc_val, dts, new_tz, freq, fold) - elif use_pydt: - result[i] = datetime( - dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, - new_tz, fold=fold, - ) - elif use_date: - result[i] = date(dts.year, dts.month, dts.day) else: - result[i] = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold) + + local_val = info.utc_val_to_local_val(utc_val, &pos) + if info.use_pytz: + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos]] + + dt64_to_dtstruct(local_val, &dts) + + if use_ts: + res_val = create_timestamp_from_ts(utc_val, dts, new_tz, freq, fold) + elif use_pydt: + res_val = datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, + new_tz, fold=fold, + ) + elif use_date: + res_val = date(dts.year, dts.month, dts.day) + else: + res_val = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold) + + # Note: we can index result directly instead of using PyArray_MultiIter_DATA + # like we do for the other functions because result is known C-contiguous + # and is the first argument to PyArray_MultiIterNew2. The usual pattern + # does not seem to work with object dtype. + # See discussion at + # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 + res_flat[i] = res_val + + cnp.PyArray_ITER_NEXT(it) return result @@ -190,27 +219,33 @@ cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): @cython.wraparound(False) @cython.boundscheck(False) -def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: +def get_resolution(ndarray stamps, tzinfo tz=None) -> Resolution: + # stamps is int64_t, any ndim cdef: Localizer info = Localizer(tz) int64_t utc_val, local_val - Py_ssize_t i, n = stamps.shape[0] + Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning + cnp.flatiter it = cnp.PyArray_IterNew(stamps) npy_datetimestruct dts c_Resolution reso = c_Resolution.RESO_DAY, curr_reso for i in range(n): - utc_val = stamps[i] + # Analogous to: utc_val = stamps[i] + utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it)) + if utc_val == NPY_NAT: - continue + pass + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) - local_val = info.utc_val_to_local_val(utc_val, &pos) + dt64_to_dtstruct(local_val, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso - dt64_to_dtstruct(local_val, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso + cnp.PyArray_ITER_NEXT(it) return Resolution(reso) @@ -221,7 +256,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: @cython.cdivision(False) @cython.wraparound(False) @cython.boundscheck(False) -cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz): +cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): + # stamps is int64_t, arbitrary ndim """ Normalize each of the (nanosecond) timezone aware timestamps in the given array by rounding down to the beginning of the day (i.e. midnight). @@ -238,28 +274,35 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t """ cdef: Localizer info = Localizer(tz) - int64_t utc_val, local_val - Py_ssize_t i, n = stamps.shape[0] + int64_t utc_val, local_val, res_val + Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning - int64_t[::1] result = np.empty(n, dtype=np.int64) + ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps) for i in range(n): - utc_val = stamps[i] + # Analogous to: utc_val = stamps[i] + utc_val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + if utc_val == NPY_NAT: - result[i] = NPY_NAT - continue + res_val = NPY_NAT + else: + local_val = info.utc_val_to_local_val(utc_val, &pos) + res_val = local_val - (local_val % DAY_NANOS) - local_val = info.utc_val_to_local_val(utc_val, &pos) + # Analogous to: result[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - result[i] = local_val - (local_val % DAY_NANOS) + cnp.PyArray_MultiIter_NEXT(mi) - return result.base # `.base` to access underlying ndarray + return result @cython.wraparound(False) @cython.boundscheck(False) -def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: +def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool: + # stamps is int64_t, arbitrary ndim """ Check if all of the given (nanosecond) timestamps are normalized to midnight, i.e. hour == minute == second == 0. If the optional timezone @@ -277,16 +320,21 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: cdef: Localizer info = Localizer(tz) int64_t utc_val, local_val - Py_ssize_t i, n = stamps.shape[0] + Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning + cnp.flatiter it = cnp.PyArray_IterNew(stamps) for i in range(n): - utc_val = stamps[i] + # Analogous to: utc_val = stamps[i] + utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it)) + local_val = info.utc_val_to_local_val(utc_val, &pos) if local_val % DAY_NANOS != 0: return False + cnp.PyArray_ITER_NEXT(it) + return True