From 6303d76e6ccf986b393e3379c21dd82301cf61a5 Mon Sep 17 00:00:00 2001 From: rockg Date: Fri, 13 Nov 2015 15:10:36 -0500 Subject: [PATCH] BUG: Holiday observance rules could not be applied --- doc/source/timeseries.rst | 6 +- doc/source/whatsnew/v0.17.1.txt | 2 + pandas/tseries/holiday.py | 87 +++++++++++++++++++--------- pandas/tseries/tests/test_holiday.py | 71 +++++++++++++++++++++-- 4 files changed, 132 insertions(+), 34 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 2bc96d1b7b1aa..01b342213de07 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1043,10 +1043,14 @@ An example of how holidays and holiday calendars are defined: cal.holidays(datetime(2012, 1, 1), datetime(2012, 12, 31)) Using this calendar, creating an index or doing offset arithmetic skips weekends -and holidays (i.e., Memorial Day/July 4th). +and holidays (i.e., Memorial Day/July 4th). For example, the below defines +a custom business day offset using the ``ExampleCalendar``. Like any other offset, +it can be used to create a ``DatetimeIndex`` or added to ``datetime`` +or ``Timestamp`` objects. .. ipython:: python + from pandas.tseries.offsets import CDay DatetimeIndex(start='7/1/2012', end='7/10/2012', freq=CDay(calendar=cal)).to_pydatetime() offset = CustomBusinessDay(calendar=cal) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 1d9b02e6a7bb1..65eb3e605950d 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -115,6 +115,8 @@ Bug Fixes - Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`). +- Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`) + diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index e98c5dd93e68a..90f6bff498e62 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU from pandas.tseries.offsets import Easter, Day +import numpy as np def next_monday(dt): @@ -156,8 +157,8 @@ class from pandas.tseries.offsets self.month = month self.day = day self.offset = offset - self.start_date = start_date - self.end_date = end_date + self.start_date = Timestamp(start_date) if start_date is not None else start_date + self.end_date = Timestamp(end_date) if end_date is not None else end_date self.observance = observance assert (days_of_week is None or type(days_of_week) == tuple) self.days_of_week = days_of_week @@ -179,7 +180,7 @@ def __repr__(self): def dates(self, start_date, end_date, return_name=False): """ - Calculate holidays between start date and end date + Calculate holidays observed between start date and end date Parameters ---------- @@ -189,6 +190,12 @@ def dates(self, start_date, end_date, return_name=False): If True, return a series that has dates and holiday names. False will only return dates. """ + start_date = Timestamp(start_date) + end_date = Timestamp(end_date) + + filter_start_date = start_date + filter_end_date = end_date + if self.year is not None: dt = Timestamp(datetime(self.year, self.month, self.day)) if return_name: @@ -196,40 +203,57 @@ def dates(self, start_date, end_date, return_name=False): else: return [dt] - if self.start_date is not None: - start_date = self.start_date - - if self.end_date is not None: - end_date = self.end_date - - start_date = Timestamp(start_date) - end_date = Timestamp(end_date) - - year_offset = DateOffset(years=1) - base_date = Timestamp( - datetime(start_date.year, self.month, self.day), - tz=start_date.tz, - ) - dates = DatetimeIndex(start=base_date, end=end_date, freq=year_offset) + dates = self._reference_dates(start_date, end_date) holiday_dates = self._apply_rule(dates) if self.days_of_week is not None: - holiday_dates = list(filter(lambda x: x is not None and - x.dayofweek in self.days_of_week, - holiday_dates)) - else: - holiday_dates = list(filter(lambda x: x is not None, holiday_dates)) + holiday_dates = holiday_dates[np.in1d(holiday_dates.dayofweek, + self.days_of_week)] + + if self.start_date is not None: + filter_start_date = max(self.start_date.tz_localize(filter_start_date.tz), filter_start_date) + if self.end_date is not None: + filter_end_date = min(self.end_date.tz_localize(filter_end_date.tz), filter_end_date) + holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) & + (holiday_dates <= filter_end_date)] if return_name: return Series(self.name, index=holiday_dates) return holiday_dates + + + def _reference_dates(self, start_date, end_date): + """ + Get reference dates for the holiday. + + Return reference dates for the holiday also returning the year + prior to the start_date and year following the end_date. This ensures + that any offsets to be applied will yield the holidays within + the passed in dates. + """ + if self.start_date is not None: + start_date = self.start_date.tz_localize(start_date.tz) + + if self.end_date is not None: + end_date = self.end_date.tz_localize(start_date.tz) + + year_offset = DateOffset(years=1) + reference_start_date = Timestamp( + datetime(start_date.year-1, self.month, self.day)) + + reference_end_date = Timestamp( + datetime(end_date.year+1, self.month, self.day)) + # Don't process unnecessary holidays + dates = DatetimeIndex(start=reference_start_date, end=reference_end_date, + freq=year_offset, tz=start_date.tz) + + return dates def _apply_rule(self, dates): """ - Apply the given offset/observance to an - iterable of dates. + Apply the given offset/observance to a DatetimeIndex of dates. Parameters ---------- - dates : array-like + dates : DatetimeIndex Dates to apply the given offset/observance rule Returns @@ -237,7 +261,7 @@ def _apply_rule(self, dates): Dates with rules applied """ if self.observance is not None: - return map(lambda d: self.observance(d), dates) + return dates.map(lambda d: self.observance(d)) if self.offset is not None: if not isinstance(self.offset, list): @@ -245,7 +269,7 @@ def _apply_rule(self, dates): else: offsets = self.offset for offset in offsets: - dates = list(map(lambda d: d + offset, dates)) + dates += offset return dates holiday_calendars = {} @@ -303,6 +327,13 @@ def __init__(self, name=None, rules=None): if rules is not None: self.rules = rules + + def rule_from_name(self, name): + for rule in self.rules: + if rule.name == name: + return rule + + return None def holidays(self, start=None, end=None, return_name=False): """ diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py index 7d233ba78e7b6..1da397e768a86 100644 --- a/pandas/tseries/tests/test_holiday.py +++ b/pandas/tseries/tests/test_holiday.py @@ -1,15 +1,17 @@ from datetime import datetime import pandas.util.testing as tm +from pandas import compat from pandas import DatetimeIndex from pandas.tseries.holiday import ( - USFederalHolidayCalendar, USMemorialDay, USThanksgivingDay, + USFederalHolidayCalendar, USMemorialDay, USThanksgivingDay, nearest_workday, next_monday_or_tuesday, next_monday, previous_friday, sunday_to_monday, Holiday, DateOffset, MO, Timestamp, AbstractHolidayCalendar, get_calendar, HolidayCalendarFactory, next_workday, previous_workday, before_nearest_workday, EasterMonday, GoodFriday, - after_nearest_workday, weekend_to_monday) + after_nearest_workday, weekend_to_monday, USLaborDay, + USColumbusDay, USMartinLutherKingJr, USPresidentsDay) from pytz import utc import nose @@ -72,7 +74,20 @@ def __init__(self, name=None, rules=None): jan2.holidays(), DatetimeIndex(['02-Jan-2015']) ) - + + def test_calendar_observance_dates(self): + # Test for issue 11477 + USFedCal = get_calendar('USFederalHolidayCalendar') + holidays0 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,3)) # <-- same start and end dates + holidays1 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,6)) # <-- different start and end dates + holidays2 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,3)) # <-- same start and end dates + + tm.assert_index_equal(holidays0, holidays1) + tm.assert_index_equal(holidays0, holidays2) + + def test_rule_from_name(self): + USFedCal = get_calendar('USFederalHolidayCalendar') + self.assertEqual(USFedCal.rule_from_name('Thanksgiving'), USThanksgivingDay) class TestHoliday(tm.TestCase): @@ -193,6 +208,52 @@ def test_usthanksgivingday(self): datetime(2020, 11, 26), ], ) + + def test_holidays_within_dates(self): + # Fix holiday behavior found in #11477 + # where holiday.dates returned dates outside start/end date + # or observed rules could not be applied as the holiday + # was not in the original date range (e.g., 7/4/2015 -> 7/3/2015) + start_date = datetime(2015, 7, 1) + end_date = datetime(2015, 7, 1) + + calendar = get_calendar('USFederalHolidayCalendar') + new_years = calendar.rule_from_name('New Years Day') + july_4th = calendar.rule_from_name('July 4th') + veterans_day = calendar.rule_from_name('Veterans Day') + christmas = calendar.rule_from_name('Christmas') + + # Holiday: (start/end date, holiday) + holidays = {USMemorialDay: ("2015-05-25", "2015-05-25"), + USLaborDay: ("2015-09-07", "2015-09-07"), + USColumbusDay: ("2015-10-12", "2015-10-12"), + USThanksgivingDay: ("2015-11-26", "2015-11-26"), + USMartinLutherKingJr: ("2015-01-19", "2015-01-19"), + USPresidentsDay: ("2015-02-16", "2015-02-16"), + GoodFriday: ("2015-04-03", "2015-04-03"), + EasterMonday: [("2015-04-06", "2015-04-06"), + ("2015-04-05", [])], + new_years: [("2015-01-01", "2015-01-01"), + ("2011-01-01", []), + ("2010-12-31", "2010-12-31")], + july_4th: [("2015-07-03", "2015-07-03"), + ("2015-07-04", [])], + veterans_day: [("2012-11-11", []), + ("2012-11-12", "2012-11-12")], + christmas: [("2011-12-25", []), + ("2011-12-26", "2011-12-26")]} + + for rule, dates in compat.iteritems(holidays): + empty_dates = rule.dates(start_date, end_date) + self.assertEqual(empty_dates.tolist(), []) + + if isinstance(dates, tuple): + dates = [dates] + + for start, expected in dates: + if len(expected): + expected = [Timestamp(expected)] + self.check_results(rule, start, start, expected) def test_argument_types(self): holidays = USThanksgivingDay.dates(self.start_date, @@ -206,8 +267,8 @@ def test_argument_types(self): Timestamp(self.start_date), Timestamp(self.end_date)) - self.assertEqual(holidays, holidays_1) - self.assertEqual(holidays, holidays_2) + self.assert_index_equal(holidays, holidays_1) + self.assert_index_equal(holidays, holidays_2) def test_special_holidays(self): base_date = [datetime(2012, 5, 28)]