diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 83aa2d96f565c..f7b54d10508f9 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -62,6 +62,7 @@ Bug Fixes - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) - Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) - Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index eeb6faf20ffce..6988528af415f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1141,6 +1141,16 @@ def _get_time_bins(self, ax): tz=tz, name=ax.name) + # GH 15549 + # In edge case of tz-aware resapmling binner last index can be + # less than the last variable in data object, this happens because of + # DST time change + if len(binner) > 1 and binner[-1] < last: + extra_date_range = pd.date_range(binner[-1], last + self.freq, + freq=self.freq, tz=tz, + name=ax.name) + binner = labels = binner.append(extra_date_range[1:]) + # a little hack trimmed = False if (len(binner) > 2 and binner[-2] == last and diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 5f8c69a8152ac..c9e40074c06ad 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2719,6 +2719,34 @@ def test_resample_weekly_bug_1726(self): # it works! df.resample('W-MON', closed='left', label='left').first() + def test_resample_with_dst_time_change(self): + # GH 15549 + index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], + tz='UTC').tz_convert('America/Chicago') + df = pd.DataFrame([1, 2], index=index) + result = df.resample('12h', closed='right', + label='right').last().ffill() + + expected_index_values = ['2016-03-09 12:00:00-06:00', + '2016-03-10 00:00:00-06:00', + '2016-03-10 12:00:00-06:00', + '2016-03-11 00:00:00-06:00', + '2016-03-11 12:00:00-06:00', + '2016-03-12 00:00:00-06:00', + '2016-03-12 12:00:00-06:00', + '2016-03-13 00:00:00-06:00', + '2016-03-13 13:00:00-05:00', + '2016-03-14 01:00:00-05:00', + '2016-03-14 13:00:00-05:00', + '2016-03-15 01:00:00-05:00', + '2016-03-15 13:00:00-05:00'] + index = pd.DatetimeIndex(expected_index_values, + tz='UTC').tz_convert('America/Chicago') + expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0], index=index) + assert_frame_equal(result, expected) + def test_resample_bms_2752(self): # GH2753 foo = Series(index=pd.bdate_range('20000101', '20000201'))