Skip to content

BUG: Holiday observance rules could not be applied #11484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 14, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1043,10 +1043,14 @@ An example of how holidays and holiday calendars are defined:
cal.holidays(datetime(2012, 1, 1), datetime(2012, 12, 31))

Using this calendar, creating an index or doing offset arithmetic skips weekends
and holidays (i.e., Memorial Day/July 4th).
and holidays (i.e., Memorial Day/July 4th). For example, the below defines
a custom business day offset using the ``ExampleCalendar``. Like any other offset,
it can be used to create a ``DatetimeIndex`` or added to ``datetime``
or ``Timestamp`` objects.

.. ipython:: python

from pandas.tseries.offsets import CDay
DatetimeIndex(start='7/1/2012', end='7/10/2012',
freq=CDay(calendar=cal)).to_pydatetime()
offset = CustomBusinessDay(calendar=cal)
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ Bug Fixes


- Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`).
- Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`)




Expand Down
87 changes: 59 additions & 28 deletions pandas/tseries/holiday.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import datetime, timedelta
from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU
from pandas.tseries.offsets import Easter, Day
import numpy as np


def next_monday(dt):
Expand Down Expand Up @@ -156,8 +157,8 @@ class from pandas.tseries.offsets
self.month = month
self.day = day
self.offset = offset
self.start_date = start_date
self.end_date = end_date
self.start_date = Timestamp(start_date) if start_date is not None else start_date
self.end_date = Timestamp(end_date) if end_date is not None else end_date
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a closed='left|right|both parameter to make this non-ambiguous? (default prob both)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would maybe prefer it to remain ambiguous and let the user be explicit.

self.observance = observance
assert (days_of_week is None or type(days_of_week) == tuple)
self.days_of_week = days_of_week
Expand All @@ -179,7 +180,7 @@ def __repr__(self):

def dates(self, start_date, end_date, return_name=False):
"""
Calculate holidays between start date and end date
Calculate holidays observed between start date and end date

Parameters
----------
Expand All @@ -189,63 +190,86 @@ def dates(self, start_date, end_date, return_name=False):
If True, return a series that has dates and holiday names.
False will only return dates.
"""
start_date = Timestamp(start_date)
end_date = Timestamp(end_date)

filter_start_date = start_date
filter_end_date = end_date

if self.year is not None:
dt = Timestamp(datetime(self.year, self.month, self.day))
if return_name:
return Series(self.name, index=[dt])
else:
return [dt]

if self.start_date is not None:
start_date = self.start_date

if self.end_date is not None:
end_date = self.end_date

start_date = Timestamp(start_date)
end_date = Timestamp(end_date)

year_offset = DateOffset(years=1)
base_date = Timestamp(
datetime(start_date.year, self.month, self.day),
tz=start_date.tz,
)
dates = DatetimeIndex(start=base_date, end=end_date, freq=year_offset)
dates = self._reference_dates(start_date, end_date)
holiday_dates = self._apply_rule(dates)
if self.days_of_week is not None:
holiday_dates = list(filter(lambda x: x is not None and
x.dayofweek in self.days_of_week,
holiday_dates))
else:
holiday_dates = list(filter(lambda x: x is not None, holiday_dates))
holiday_dates = holiday_dates[np.in1d(holiday_dates.dayofweek,
self.days_of_week)]

if self.start_date is not None:
filter_start_date = max(self.start_date.tz_localize(filter_start_date.tz), filter_start_date)
if self.end_date is not None:
filter_end_date = min(self.end_date.tz_localize(filter_end_date.tz), filter_end_date)
holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) &
(holiday_dates <= filter_end_date)]
if return_name:
return Series(self.name, index=holiday_dates)
return holiday_dates


def _reference_dates(self, start_date, end_date):
"""
Get reference dates for the holiday.

Return reference dates for the holiday also returning the year
prior to the start_date and year following the end_date. This ensures
that any offsets to be applied will yield the holidays within
the passed in dates.
"""
if self.start_date is not None:
start_date = self.start_date.tz_localize(start_date.tz)

if self.end_date is not None:
end_date = self.end_date.tz_localize(start_date.tz)

year_offset = DateOffset(years=1)
reference_start_date = Timestamp(
datetime(start_date.year-1, self.month, self.day))

reference_end_date = Timestamp(
datetime(end_date.year+1, self.month, self.day))
# Don't process unnecessary holidays
dates = DatetimeIndex(start=reference_start_date, end=reference_end_date,
freq=year_offset, tz=start_date.tz)

return dates

def _apply_rule(self, dates):
"""
Apply the given offset/observance to an
iterable of dates.
Apply the given offset/observance to a DatetimeIndex of dates.

Parameters
----------
dates : array-like
dates : DatetimeIndex
Dates to apply the given offset/observance rule

Returns
-------
Dates with rules applied
"""
if self.observance is not None:
return map(lambda d: self.observance(d), dates)
return dates.map(lambda d: self.observance(d))

if self.offset is not None:
if not isinstance(self.offset, list):
offsets = [self.offset]
else:
offsets = self.offset
for offset in offsets:
dates = list(map(lambda d: d + offset, dates))
dates += offset
return dates

holiday_calendars = {}
Expand Down Expand Up @@ -303,6 +327,13 @@ def __init__(self, name=None, rules=None):

if rules is not None:
self.rules = rules

def rule_from_name(self, name):
for rule in self.rules:
if rule.name == name:
return rule

return None

def holidays(self, start=None, end=None, return_name=False):
"""
Expand Down
71 changes: 66 additions & 5 deletions pandas/tseries/tests/test_holiday.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@

from datetime import datetime
import pandas.util.testing as tm
from pandas import compat
from pandas import DatetimeIndex
from pandas.tseries.holiday import (
USFederalHolidayCalendar, USMemorialDay, USThanksgivingDay,
USFederalHolidayCalendar, USMemorialDay, USThanksgivingDay,
nearest_workday, next_monday_or_tuesday, next_monday,
previous_friday, sunday_to_monday, Holiday, DateOffset,
MO, Timestamp, AbstractHolidayCalendar, get_calendar,
HolidayCalendarFactory, next_workday, previous_workday,
before_nearest_workday, EasterMonday, GoodFriday,
after_nearest_workday, weekend_to_monday)
after_nearest_workday, weekend_to_monday, USLaborDay,
USColumbusDay, USMartinLutherKingJr, USPresidentsDay)
from pytz import utc
import nose

Expand Down Expand Up @@ -72,7 +74,20 @@ def __init__(self, name=None, rules=None):
jan2.holidays(),
DatetimeIndex(['02-Jan-2015'])
)


def test_calendar_observance_dates(self):
# Test for issue 11477
USFedCal = get_calendar('USFederalHolidayCalendar')
holidays0 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,3)) # <-- same start and end dates
holidays1 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,6)) # <-- different start and end dates
holidays2 = USFedCal.holidays(datetime(2015,7,3), datetime(2015,7,3)) # <-- same start and end dates

tm.assert_index_equal(holidays0, holidays1)
tm.assert_index_equal(holidays0, holidays2)

def test_rule_from_name(self):
USFedCal = get_calendar('USFederalHolidayCalendar')
self.assertEqual(USFedCal.rule_from_name('Thanksgiving'), USThanksgivingDay)

class TestHoliday(tm.TestCase):

Expand Down Expand Up @@ -193,6 +208,52 @@ def test_usthanksgivingday(self):
datetime(2020, 11, 26),
],
)

def test_holidays_within_dates(self):
# Fix holiday behavior found in #11477
# where holiday.dates returned dates outside start/end date
# or observed rules could not be applied as the holiday
# was not in the original date range (e.g., 7/4/2015 -> 7/3/2015)
start_date = datetime(2015, 7, 1)
end_date = datetime(2015, 7, 1)

calendar = get_calendar('USFederalHolidayCalendar')
new_years = calendar.rule_from_name('New Years Day')
july_4th = calendar.rule_from_name('July 4th')
veterans_day = calendar.rule_from_name('Veterans Day')
christmas = calendar.rule_from_name('Christmas')

# Holiday: (start/end date, holiday)
holidays = {USMemorialDay: ("2015-05-25", "2015-05-25"),
USLaborDay: ("2015-09-07", "2015-09-07"),
USColumbusDay: ("2015-10-12", "2015-10-12"),
USThanksgivingDay: ("2015-11-26", "2015-11-26"),
USMartinLutherKingJr: ("2015-01-19", "2015-01-19"),
USPresidentsDay: ("2015-02-16", "2015-02-16"),
GoodFriday: ("2015-04-03", "2015-04-03"),
EasterMonday: [("2015-04-06", "2015-04-06"),
("2015-04-05", [])],
new_years: [("2015-01-01", "2015-01-01"),
("2011-01-01", []),
("2010-12-31", "2010-12-31")],
july_4th: [("2015-07-03", "2015-07-03"),
("2015-07-04", [])],
veterans_day: [("2012-11-11", []),
("2012-11-12", "2012-11-12")],
christmas: [("2011-12-25", []),
("2011-12-26", "2011-12-26")]}

for rule, dates in compat.iteritems(holidays):
empty_dates = rule.dates(start_date, end_date)
self.assertEqual(empty_dates.tolist(), [])

if isinstance(dates, tuple):
dates = [dates]

for start, expected in dates:
if len(expected):
expected = [Timestamp(expected)]
self.check_results(rule, start, start, expected)

def test_argument_types(self):
holidays = USThanksgivingDay.dates(self.start_date,
Expand All @@ -206,8 +267,8 @@ def test_argument_types(self):
Timestamp(self.start_date),
Timestamp(self.end_date))

self.assertEqual(holidays, holidays_1)
self.assertEqual(holidays, holidays_2)
self.assert_index_equal(holidays, holidays_1)
self.assert_index_equal(holidays, holidays_2)

def test_special_holidays(self):
base_date = [datetime(2012, 5, 28)]
Expand Down