From 7bee90728f94975d8a7a1799fe65215b6c16e022 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Tue, 19 Mar 2024 23:14:25 +0100 Subject: [PATCH 01/11] support offsets of offsets in holiday creation --- pandas/tests/tseries/holiday/test_holiday.py | 28 ++++++++++++++++++++ pandas/tseries/holiday.py | 13 ++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index b2eefd04ef93b..da8f2a06855d3 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -6,6 +6,7 @@ from pandas import ( DatetimeIndex, Series, + to_datetime, ) import pandas._testing as tm @@ -198,6 +199,33 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] +def test_holidays_within_dates_offset_of_offset(): + # see gh-29049 + # Test that the offset of an offset is correctly applied to the holiday + # And that dates can be calculated + holiday1 = Holiday( + "Holiday1", + month=USThanksgivingDay.month, + day=USThanksgivingDay.day, + offset=[USThanksgivingDay.offset, DateOffset(1)], + ) + holiday2 = Holiday( + "Holiday2", + month=holiday1.month, + day=holiday1.day, + offset=[holiday1.offset, DateOffset(3)], + ) + # there shall be no lists of lists here + for offset in holiday2.offset: + assert isinstance(offset, DateOffset) + + min_date, max_date = (to_datetime(x) for x in ["2017-11-1", "2018-11-30"]) + expected_min, expected_max = DatetimeIndex(["2017-11-27", "2018-11-26"]) + actual_min, actual_max = holiday2.dates(min_date, max_date) + assert actual_min == expected_min + assert actual_max == expected_max + + @pytest.mark.parametrize( "transform", [lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] ) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 50d0d33f0339f..39284f610149a 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -223,7 +223,18 @@ class from pandas.tseries.offsets self.year = year self.month = month self.day = day - self.offset = offset + if isinstance(offset, list): + self.offset = [] + for off in offset: + # check if we are handling offsets of another holiday + if isinstance(off, list): + self.offset.extend(np.ravel(off)) + else: + # otherwise it should be a DateOffset, we do not support other + # array-like types + self.offset.append(off) + else: + self.offset = offset self.start_date = ( Timestamp(start_date) if start_date is not None else start_date ) From aaa781f86fc87b494a6ce1f7fc4090e9f72829ef Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Tue, 19 Mar 2024 23:48:56 +0100 Subject: [PATCH 02/11] update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 16be9e0a4fc34..84c5142619b37 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -280,6 +280,7 @@ Performance improvements Bug fixes ~~~~~~~~~ +- Fixed bug in :class:`Series.Holiday` that leads to a ``TypeError`` when using ``Holiday.dates`` with a ``Holiday`` that is initialized with ``offset`` of type ``list`` which itself references another Holiday's list of offsets. (:issue:`29049`) - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) From 0cf929eb3dfb924cec3467fc2611e860628893a6 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 20 Mar 2024 00:45:54 +0100 Subject: [PATCH 03/11] add type annotations --- pandas/tseries/holiday.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 39284f610149a..c5caad7a67481 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -4,6 +4,10 @@ datetime, timedelta, ) +from typing import ( + Callable, + SupportsIndex, +) import warnings from dateutil.relativedelta import ( @@ -156,25 +160,33 @@ class Holiday: def __init__( self, name: str, - year=None, - month=None, - day=None, - offset=None, - observance=None, + year: SupportsIndex | None = None, + month: SupportsIndex | None = None, + day: SupportsIndex | None = None, + offset: None | DateOffset | list[DateOffset | list[DateOffset]] = None, + observance: Callable | None = None, start_date=None, end_date=None, - days_of_week=None, + days_of_week: tuple | None = None, ) -> None: """ Parameters ---------- name : str Name of the holiday , defaults to class name + year: int + Year of the holiday + month: int + Month of the holiday + day: int + Day of the holiday offset : array of pandas.tseries.offsets or class from pandas.tseries.offsets computes offset from date observance: function computes when holiday is given a pandas Timestamp + start_date : datetime-like, optional + end_date : datetime-like, optional days_of_week: provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday Monday=0,..,Sunday=6 From 8a9a4b640c4d17c601506335612935e4a6ac0293 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 20 Mar 2024 01:35:24 +0100 Subject: [PATCH 04/11] update type annotation --- pandas/tseries/holiday.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index c5caad7a67481..3da812c77b86f 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -5,6 +5,7 @@ timedelta, ) from typing import ( + TYPE_CHECKING, Callable, SupportsIndex, ) @@ -37,6 +38,9 @@ Easter, ) +if TYPE_CHECKING: + from pandas._libs.tslibs.offsets import BaseOffset + def next_monday(dt: datetime) -> datetime: """ @@ -163,7 +167,7 @@ def __init__( year: SupportsIndex | None = None, month: SupportsIndex | None = None, day: SupportsIndex | None = None, - offset: None | DateOffset | list[DateOffset | list[DateOffset]] = None, + offset: None | BaseOffset | list[BaseOffset | list[BaseOffset]] = None, observance: Callable | None = None, start_date=None, end_date=None, From 8e572bb8815a8d9e377773e77a288be6fb6987f7 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 20 Mar 2024 22:29:49 +0100 Subject: [PATCH 05/11] make types compatible --- pandas/tseries/holiday.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 3da812c77b86f..6b2180a3ff116 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -7,7 +7,6 @@ from typing import ( TYPE_CHECKING, Callable, - SupportsIndex, ) import warnings @@ -164,10 +163,10 @@ class Holiday: def __init__( self, name: str, - year: SupportsIndex | None = None, - month: SupportsIndex | None = None, - day: SupportsIndex | None = None, - offset: None | BaseOffset | list[BaseOffset | list[BaseOffset]] = None, + year=None, + month=None, + day=None, + offset: BaseOffset | list[BaseOffset | list[BaseOffset]] | None = None, observance: Callable | None = None, start_date=None, end_date=None, @@ -239,15 +238,14 @@ class from pandas.tseries.offsets self.year = year self.month = month self.day = day + self.offset: None | BaseOffset | list[BaseOffset] if isinstance(offset, list): self.offset = [] for off in offset: - # check if we are handling offsets of another holiday + # check if we are handling composite offsets of another holiday if isinstance(off, list): - self.offset.extend(np.ravel(off)) + self.offset.extend(off) else: - # otherwise it should be a DateOffset, we do not support other - # array-like types self.offset.append(off) else: self.offset = offset From 0548ddf27d9becb97212eb63174a133868695d18 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 20 Mar 2024 22:56:39 +0100 Subject: [PATCH 06/11] update docstring --- pandas/tseries/holiday.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 6b2180a3ff116..65b6c02243e57 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -177,21 +177,23 @@ def __init__( ---------- name : str Name of the holiday , defaults to class name - year: int + year : int, default None Year of the holiday - month: int + month : int, default None Month of the holiday - day: int + day : int, default None Day of the holiday offset : array of pandas.tseries.offsets or - class from pandas.tseries.offsets - computes offset from date - observance: function - computes when holiday is given a pandas Timestamp - start_date : datetime-like, optional - end_date : datetime-like, optional - days_of_week: - provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday + class from pandas.tseries.offsets, default None + Computes offset from date + observance : function, default None + Computes when holiday is given a pandas Timestamp + start_date : datetime-like, default None + First date the holiday is observed + end_date : datetime-like, default None + Last date the holiday is observed + days_of_week : tuple of int or dateutil.relativedelta weekday strs, default None + Provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday Monday=0,..,Sunday=6 Examples From c24b1431fe4ee89cd6cdcd23e291f56c5e4ec2d3 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Tue, 26 Mar 2024 22:40:00 +0100 Subject: [PATCH 07/11] don't accept, only raise with more informative exception --- doc/source/whatsnew/v3.0.0.rst | 1 - pandas/tests/tseries/holiday/test_holiday.py | 46 ++++++++------------ pandas/tseries/holiday.py | 19 +++----- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 84c5142619b37..16be9e0a4fc34 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -280,7 +280,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug in :class:`Series.Holiday` that leads to a ``TypeError`` when using ``Holiday.dates`` with a ``Holiday`` that is initialized with ``offset`` of type ``list`` which itself references another Holiday's list of offsets. (:issue:`29049`) - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index da8f2a06855d3..aa9590865059e 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -6,7 +6,6 @@ from pandas import ( DatetimeIndex, Series, - to_datetime, ) import pandas._testing as tm @@ -199,33 +198,6 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -def test_holidays_within_dates_offset_of_offset(): - # see gh-29049 - # Test that the offset of an offset is correctly applied to the holiday - # And that dates can be calculated - holiday1 = Holiday( - "Holiday1", - month=USThanksgivingDay.month, - day=USThanksgivingDay.day, - offset=[USThanksgivingDay.offset, DateOffset(1)], - ) - holiday2 = Holiday( - "Holiday2", - month=holiday1.month, - day=holiday1.day, - offset=[holiday1.offset, DateOffset(3)], - ) - # there shall be no lists of lists here - for offset in holiday2.offset: - assert isinstance(offset, DateOffset) - - min_date, max_date = (to_datetime(x) for x in ["2017-11-1", "2018-11-30"]) - expected_min, expected_max = DatetimeIndex(["2017-11-27", "2018-11-26"]) - actual_min, actual_max = holiday2.dates(min_date, max_date) - assert actual_min == expected_min - assert actual_max == expected_max - - @pytest.mark.parametrize( "transform", [lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] ) @@ -299,6 +271,24 @@ def test_both_offset_observance_raises(): ) +def test_list_of_list_of_offsets_raises(): + # see gh-29049 + # Test that the offsets of offsets are forbidden + holiday1 = Holiday( + "Holiday1", + month=USThanksgivingDay.month, + day=USThanksgivingDay.day, + offset=[USThanksgivingDay.offset, DateOffset(1)], + ) + with pytest.raises(ValueError, match="Nested lists are not supported for offset"): + Holiday( + "Holiday2", + month=holiday1.month, + day=holiday1.day, + offset=[holiday1.offset, DateOffset(3)], + ) + + def test_half_open_interval_with_observance(): # Prompted by GH 49075 # Check for holidays that have a half-open date interval where diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 65b6c02243e57..2a8cd20d8b7be 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -166,7 +166,7 @@ def __init__( year=None, month=None, day=None, - offset: BaseOffset | list[BaseOffset | list[BaseOffset]] | None = None, + offset: BaseOffset | list[BaseOffset] | None = None, observance: Callable | None = None, start_date=None, end_date=None, @@ -235,22 +235,17 @@ class from pandas.tseries.offsets, default None """ if offset is not None and observance is not None: raise NotImplementedError("Cannot use both offset and observance.") + if isinstance(offset, list) and any(isinstance(off, list) for off in offset): + raise ValueError( + "Nested lists are not supported for offset. " + "Flatten composite offsets of `Holiday.offset`s first." + ) self.name = name self.year = year self.month = month self.day = day - self.offset: None | BaseOffset | list[BaseOffset] - if isinstance(offset, list): - self.offset = [] - for off in offset: - # check if we are handling composite offsets of another holiday - if isinstance(off, list): - self.offset.extend(off) - else: - self.offset.append(off) - else: - self.offset = offset + self.offset = offset self.start_date = ( Timestamp(start_date) if start_date is not None else start_date ) From af3f19e3e3e9e0d5cf4f4a551515b84aa24f5999 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Tue, 26 Mar 2024 23:03:15 +0100 Subject: [PATCH 08/11] add attribute type hint --- pandas/tseries/holiday.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 0590c1df9530d..97fbf6a405a63 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -156,6 +156,7 @@ class Holiday: for observance. """ + offset: BaseOffset | list[BaseOffset] | None start_date: Timestamp | None end_date: Timestamp | None days_of_week: tuple[int, ...] | None From a1b9854885f0786a49189cd3ae9636851890a849 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 27 Mar 2024 19:56:44 +0100 Subject: [PATCH 09/11] change array to list in docstring Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tseries/holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 97fbf6a405a63..5345914415eac 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -184,7 +184,7 @@ def __init__( Month of the holiday day : int, default None Day of the holiday - offset : array of pandas.tseries.offsets or + offset : list of pandas.tseries.offsets or class from pandas.tseries.offsets, default None Computes offset from date observance : function, default None From b80523a878b02b7e5c11fefc0215e5d85ef90732 Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 27 Mar 2024 21:14:13 +0100 Subject: [PATCH 10/11] broaden if check; specify error message --- pandas/tests/tseries/holiday/test_holiday.py | 3 +- pandas/tseries/holiday.py | 30 ++++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index aa9590865059e..4b9ba13ce0fc9 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -280,7 +280,8 @@ def test_list_of_list_of_offsets_raises(): day=USThanksgivingDay.day, offset=[USThanksgivingDay.offset, DateOffset(1)], ) - with pytest.raises(ValueError, match="Nested lists are not supported for offset"): + msg = "Only BaseOffsets and flat lists thereof are supported for offset." + with pytest.raises(ValueError, match=msg): Holiday( "Holiday2", month=holiday1.month, diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 97fbf6a405a63..371f3a2edb8c4 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -4,10 +4,7 @@ datetime, timedelta, ) -from typing import ( - TYPE_CHECKING, - Callable, -) +from typing import Callable import warnings from dateutil.relativedelta import ( @@ -21,6 +18,7 @@ ) import numpy as np +from pandas._libs.tslibs.offsets import BaseOffset from pandas.errors import PerformanceWarning from pandas import ( @@ -37,9 +35,6 @@ Easter, ) -if TYPE_CHECKING: - from pandas._libs.tslibs.offsets import BaseOffset - def next_monday(dt: datetime) -> datetime: """ @@ -156,7 +151,6 @@ class Holiday: for observance. """ - offset: BaseOffset | list[BaseOffset] | None start_date: Timestamp | None end_date: Timestamp | None days_of_week: tuple[int, ...] | None @@ -234,13 +228,19 @@ class from pandas.tseries.offsets, default None >>> July3rd Holiday: July 3rd (month=7, day=3, ) """ - if offset is not None and observance is not None: - raise NotImplementedError("Cannot use both offset and observance.") - if isinstance(offset, list) and any(isinstance(off, list) for off in offset): - raise ValueError( - "Nested lists are not supported for offset. " - "Flatten composite offsets of `Holiday.offset`s first." - ) + if offset is not None: + if observance is not None: + raise NotImplementedError("Cannot use both offset and observance.") + if not ( + isinstance(offset, BaseOffset) + or ( + isinstance(offset, list) + and all(isinstance(off, BaseOffset) for off in offset) + ) + ): + raise ValueError( + "Only BaseOffsets and flat lists of them are supported for offset." + ) self.name = name self.year = year From 085a740efb491c7f3a6794b0d0ac50e4eb04617e Mon Sep 17 00:00:00 2001 From: Philipp Hoffmann Date: Wed, 27 Mar 2024 21:16:25 +0100 Subject: [PATCH 11/11] update test raises message --- pandas/tests/tseries/holiday/test_holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index 4b9ba13ce0fc9..08f4a1250392e 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -280,7 +280,7 @@ def test_list_of_list_of_offsets_raises(): day=USThanksgivingDay.day, offset=[USThanksgivingDay.offset, DateOffset(1)], ) - msg = "Only BaseOffsets and flat lists thereof are supported for offset." + msg = "Only BaseOffsets and flat lists of them are supported for offset." with pytest.raises(ValueError, match=msg): Holiday( "Holiday2",