From 8c9aaa858655d213275f058a47cc247da0442c91 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 3 May 2024 14:33:49 +0200 Subject: [PATCH 1/9] correct def get_start_end_field, add test, add a note to v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/fields.pyx | 4 ++-- pandas/tests/indexes/datetimes/test_scalar_compat.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9e7349a061295..ac4725ec7b9fe 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -410,6 +410,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) +- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning False on double-digit frequencies (:issue:`58523`) - Missing diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index ff4fb4d635d17..c31c13fde29ea 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -3,6 +3,7 @@ Functions for accessing attributes of Timestamp/datetime64/datetime-like objects and arrays """ from locale import LC_TIME +import re from _strptime import LocaleTime @@ -253,8 +254,7 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if (freqstr[0:2] in ["MS", "QS", "YS"]) or ( - freqstr[1:3] in ["MS", "QS", "YS"]): + if re.split("[0-9]*", freqstr, maxsplit=1)[1][0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index f766894a993a0..1f2cfe8b7ddc8 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -328,3 +328,11 @@ def test_dti_is_month_start_custom(self): msg = "Custom business days is not supported by is_month_start" with pytest.raises(ValueError, match=msg): dti.is_month_start + + def test_dti_is_year_quarter_start_doubledigit_freq(self): + # GH#58523 + dr = date_range("2017-01-01", periods=2, freq="10YS") + assert all(dr.is_year_start) + + dr = date_range("2017-01-01", periods=2, freq="10QS") + assert all(dr.is_quarter_start) From 21bd212c06795252bb36592d971c970ba6f95679 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 7 May 2024 15:56:39 +0200 Subject: [PATCH 2/9] replace regex with to_offset --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_libs/tslibs/fields.pyx | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ac4725ec7b9fe..993064ee1b27a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -410,7 +410,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) -- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning False on double-digit frequencies (:issue:`58523`) +- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) - Missing diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index c31c13fde29ea..927271c5b1ebf 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -3,7 +3,6 @@ Functions for accessing attributes of Timestamp/datetime64/datetime-like objects and arrays """ from locale import LC_TIME -import re from _strptime import LocaleTime @@ -50,6 +49,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) +from pandas._libs.tslibs.offsets cimport to_offset import_pandas_datetime() @@ -254,7 +254,8 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if re.split("[0-9]*", freqstr, maxsplit=1)[1][0:2] in ["MS", "QS", "YS"]: + offset = to_offset(freqstr) + if offset.freqstr.replace(str(offset.n), "")[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: From 9f094d715d74bbf83c93e477440e9dc81e385b6f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 8 May 2024 16:25:05 +0200 Subject: [PATCH 3/9] replace offset.freqstr.replace with offset.name --- pandas/_libs/tslibs/fields.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 927271c5b1ebf..e4d83dca6f109 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -254,8 +254,7 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - offset = to_offset(freqstr) - if offset.freqstr.replace(str(offset.n), "")[0:2] in ["MS", "QS", "YS"]: + if to_offset(freqstr).name[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: From 670a8db93cae17c06a499150a4c3f4575f603fd3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 10:50:05 +0200 Subject: [PATCH 4/9] move to_offset from get_start_end_field up --- pandas/_libs/tslibs/fields.pyx | 3 +-- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/core/arrays/datetimes.py | 6 +++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index e4d83dca6f109..88fd0ae9145f7 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -49,7 +49,6 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) -from pandas._libs.tslibs.offsets cimport to_offset import_pandas_datetime() @@ -254,7 +253,7 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if to_offset(freqstr).name[0:2] in ["MS", "QS", "YS"]: + if freqstr[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d2468efd9783d..7a95372dbd551 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -587,7 +587,8 @@ cdef class _Timestamp(ABCTimestamp): val = self._maybe_convert_value_to_local() out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw, self._creso) + field, to_offset(freqstr).name , + month_kw, self._creso) return out[0] @property diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8747f795bebd8..7f2ce935dfc12 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -145,8 +145,12 @@ def f(self): kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + if self.freqstr is not None: + freqstr = to_offset(self.freqstr).name + else: + freqstr = self.freqstr result = fields.get_start_end_field( - values, field, self.freqstr, month_kw, reso=self._creso + values, field, freqstr, month_kw, reso=self._creso ) else: result = fields.get_date_field(values, field, reso=self._creso) From adaf9745e02f1e6d85c05b586e89e82948e22a87 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 12:12:55 +0200 Subject: [PATCH 5/9] fixup test_is_yqm_start_end --- pandas/_libs/tslibs/fields.pyx | 3 ++- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- pandas/core/arrays/datetimes.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 88fd0ae9145f7..a4541ee3ac046 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -253,7 +253,8 @@ def get_start_end_field( # month of year. Other offsets use month, startingMonth as ending # month of year. - if freqstr[0:2] in ["MS", "QS", "YS"]: + freq_name = freqstr.lstrip("B")[0:2] + if freq_name in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7a95372dbd551..138a55b64c476 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,7 +579,7 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = freq.freqstr + freqstr = to_offset(freq.freqstr).name else: month_kw = 12 freqstr = None @@ -587,8 +587,7 @@ cdef class _Timestamp(ABCTimestamp): val = self._maybe_convert_value_to_local() out = get_start_end_field(np.array([val], dtype=np.int64), - field, to_offset(freqstr).name , - month_kw, self._creso) + field, freqstr, month_kw, self._creso) return out[0] @property diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7f2ce935dfc12..db878a2b26a2a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -145,10 +145,10 @@ def f(self): kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - if self.freqstr is not None: - freqstr = to_offset(self.freqstr).name + if freq is not None: + freqstr = to_offset(freq.freqstr).name else: - freqstr = self.freqstr + freqstr = freq result = fields.get_start_end_field( values, field, freqstr, month_kw, reso=self._creso ) From 57a834ed6d8d06d2d2439cf6e97b29aa2d53aac4 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 12:42:40 +0200 Subject: [PATCH 6/9] rename the argument freqstr in get_start_end_field --- pandas/_libs/tslibs/fields.pyi | 2 +- pandas/_libs/tslibs/fields.pyx | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index c6cfd44e9f6ab..bc55e34f3d208 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -16,7 +16,7 @@ def get_date_name_field( def get_start_end_field( dtindex: npt.NDArray[np.int64], field: str, - freqstr: str | None = ..., + freq_name: str | None = ..., month_kw: int = ..., reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index a4541ee3ac046..8f8060b2a5f83 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -210,7 +210,7 @@ cdef bint _is_on_month(int month, int compare_month, int modby) noexcept nogil: def get_start_end_field( const int64_t[:] dtindex, str field, - str freqstr=None, + str freq_name=None, int month_kw=12, NPY_DATETIMEUNIT reso=NPY_FR_ns, ): @@ -223,7 +223,7 @@ def get_start_end_field( ---------- dtindex : ndarray[int64] field : str - frestr : str or None, default None + freq_name : str or None, default None month_kw : int, default 12 reso : NPY_DATETIMEUNIT, default NPY_FR_ns @@ -243,18 +243,17 @@ def get_start_end_field( out = np.zeros(count, dtype="int8") - if freqstr: - if freqstr == "C": + if freq_name: + if freq_name == "C": raise ValueError(f"Custom business days is not supported by {field}") - is_business = freqstr[0] == "B" + is_business = freq_name[0] == "B" # YearBegin(), BYearBegin() use month = starting month of year. # QuarterBegin(), BQuarterBegin() use startingMonth = starting # month of year. Other offsets use month, startingMonth as ending # month of year. - freq_name = freqstr.lstrip("B")[0:2] - if freq_name in ["MS", "QS", "YS"]: + if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]: end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: From d77f24880cc291cc1ab5f69923087c1a6f8afc64 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 13:19:14 +0200 Subject: [PATCH 7/9] rename the variable freqstr in _field_accessor --- pandas/core/arrays/datetimes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 53501b471004a..635c90f7559af 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -146,11 +146,11 @@ def f(self): month_kw = kwds.get("startingMonth", kwds.get("month", 12)) if freq is not None: - freqstr = to_offset(freq.freqstr).name + freq_name = to_offset(freq.freqstr).name else: - freqstr = freq + freq_name = None result = fields.get_start_end_field( - values, field, freqstr, month_kw, reso=self._creso + values, field, freq_name, month_kw, reso=self._creso ) else: result = fields.get_date_field(values, field, reso=self._creso) From 65e6301c31f13a99cb819308e1051b5e6f4826e4 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 9 May 2024 20:41:54 +0200 Subject: [PATCH 8/9] simplify to_offset(freq.freqstr).name --- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/core/arrays/datetimes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 41e5245e28859..9812b65580710 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,7 +579,7 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = to_offset(freq.freqstr).name + freqstr = freq.name else: month_kw = 12 freqstr = None diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 635c90f7559af..b075e3d299ed0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -146,7 +146,7 @@ def f(self): month_kw = kwds.get("startingMonth", kwds.get("month", 12)) if freq is not None: - freq_name = to_offset(freq.freqstr).name + freq_name = freq.name else: freq_name = None result = fields.get_start_end_field( From 45a23244e7ace479218a4fe41ccddec8d45d13ba Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 10 May 2024 16:49:55 +0200 Subject: [PATCH 9/9] rename the variable freqstr --- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9812b65580710..0010497425c02 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,15 +579,15 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = freq.name + freq_name = freq.name else: month_kw = 12 - freqstr = None + freq_name = None val = self._maybe_convert_value_to_local() out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw, self._creso) + field, freq_name, month_kw, self._creso) return out[0] @property