diff --git a/asv_bench/benchmarks/tslibs/fields.py b/asv_bench/benchmarks/tslibs/fields.py index 23ae73811204c..203afcdaa7378 100644 --- a/asv_bench/benchmarks/tslibs/fields.py +++ b/asv_bench/benchmarks/tslibs/fields.py @@ -66,9 +66,9 @@ class TimeGetStartEndField: def setup(self, size, side, period, freqstr, month_kw): arr = np.random.randint(0, 10, size=size, dtype="i8") - self.i8data = arr + self.dt64data = arr.view("M8[ns]") self.attrname = f"is_{period}_{side}" def time_get_start_end_field(self, size, side, period, freqstr, month_kw): - get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw) + get_start_end_field(self.dt64data, self.attrname, freqstr, month_kw=month_kw) diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index 571a327b46df8..228f7dbdf5eac 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -12,7 +12,7 @@ def get_date_name_field( locale: str | None = ..., ) -> npt.NDArray[np.object_]: ... def get_start_end_field( - dtindex: npt.NDArray[np.int64], # const int64_t[:] + dt64values: npt.NDArray[np.datetime64], field: str, freqstr: str | None = ..., month_kw: int = ..., diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index cc82deec08a28..e8980dc1a7553 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -5,8 +5,10 @@ objects and arrays from locale import LC_TIME from _strptime import LocaleTime + cimport cython from cython cimport Py_ssize_t + import numpy as np cimport numpy as cnp @@ -41,8 +43,11 @@ from pandas._libs.tslibs.ccalendar cimport ( ) from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, dt64_to_dtstruct, + get_unit_from_dtype, npy_datetimestruct, + pandas_datetime_to_datetimestruct, pandas_timedeltastruct, td64_to_tdstruct, ) @@ -196,22 +201,35 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: @cython.wraparound(False) @cython.boundscheck(False) -def get_start_end_field(const int64_t[:] dtindex, str field, +def get_start_end_field(ndarray dt64values, str field, str freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year (defined by frequency). + + Parameters + ---------- + dt64values : ndarray[datetime64], any resolution + field : str + frestr : str or None, default None + month_kw : int, default 12 + + Returns + ------- + ndarray[bool] """ cdef: Py_ssize_t i - int count = len(dtindex) + int count = dt64values.size bint is_business = 0 int end_month = 12 int start_month = 1 ndarray[int8_t] out npy_datetimestruct dts int compare_month, modby + ndarray dtindex = dt64values.view("i8") + NPY_DATETIMEUNIT reso = get_unit_from_dtype(dt64values.dtype) out = np.zeros(count, dtype='int8') @@ -251,7 +269,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field, out[i] = 0 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) if _is_on_month(dts.month, compare_month, modby) and ( dts.day == get_firstbday(dts.year, dts.month)): @@ -263,7 +281,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field, out[i] = 0 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) if _is_on_month(dts.month, compare_month, modby) and dts.day == 1: out[i] = 1 @@ -275,7 +293,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field, out[i] = 0 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) if _is_on_month(dts.month, compare_month, modby) and ( dts.day == get_lastbday(dts.year, dts.month)): @@ -287,7 +305,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field, out[i] = 0 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) if _is_on_month(dts.month, compare_month, modby) and ( dts.day == get_days_in_month(dts.year, dts.month)): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index db951027e5794..e7ac855d6a832 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -487,9 +487,7 @@ cdef class _Timestamp(ABCTimestamp): dict kwds ndarray[uint8_t, cast=True] out int month_kw - - if self._reso != NPY_FR_ns: - raise NotImplementedError(self._reso) + str unit if freq: kwds = freq.kwds @@ -500,7 +498,9 @@ cdef class _Timestamp(ABCTimestamp): freqstr = None val = self._maybe_convert_value_to_local() - out = get_start_end_field(np.array([val], dtype=np.int64), + + unit = npy_unit_to_abbrev(self._reso) + out = get_start_end_field(np.array([val], dtype=f"M8[{unit}]"), field, freqstr, month_kw) return out[0] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5c8c6d7fe23a3..1e409dc17a06d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -130,7 +130,7 @@ def f(self): month_kw = kwds.get("startingMonth", kwds.get("month", 12)) result = fields.get_start_end_field( - values, field, self.freqstr, month_kw + values.view(self._ndarray.dtype), field, self.freqstr, month_kw ) else: result = fields.get_date_field(values, field) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index ab7bc4c7cb412..bc9e6c0131646 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -732,6 +732,27 @@ def test_non_nano_fields(self, dt64, ts): assert ts.weekday() == alt.weekday() assert ts.isoweekday() == alt.isoweekday() + def test_start_end_fields(self, ts): + assert ts.is_year_start + assert ts.is_quarter_start + assert ts.is_month_start + assert not ts.is_year_end + assert not ts.is_month_end + assert not ts.is_month_end + + freq = offsets.BDay() + ts._set_freq(freq) + + # 2016-01-01 is a Friday, so is year/quarter/month start with this freq + msg = "Timestamp.freq is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert ts.is_year_start + assert ts.is_quarter_start + assert ts.is_month_start + assert not ts.is_year_end + assert not ts.is_month_end + assert not ts.is_month_end + def test_repr(self, dt64, ts): alt = Timestamp(dt64) diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index 9e6464f7727bd..528d08d7f499b 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -28,7 +28,10 @@ def test_get_date_field_readonly(dtindex): def test_get_start_end_field_readonly(dtindex): - result = fields.get_start_end_field(dtindex, "is_month_start", None) + dt64values = dtindex.view("M8[ns]") + dt64values.flags.writeable = False + + result = fields.get_start_end_field(dt64values, "is_month_start", None) expected = np.array([True, False, False, False, False], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) diff --git a/setup.py b/setup.py index 67b91c55dd397..bca919a3aa6f8 100755 --- a/setup.py +++ b/setup.py @@ -506,6 +506,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": {