From 21a4f240766e44004026abec54f361549a0f6e22 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 24 Sep 2018 22:03:46 -0700 Subject: [PATCH 1/4] test resample fix --- pandas/core/resample.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 878ac957a8557..5b4db94af238a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -3,6 +3,7 @@ import warnings import copy from textwrap import dedent +from pytz import UTC import pandas as pd from pandas.core.groupby.base import GroupByMixin @@ -16,7 +17,7 @@ from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds +from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds, CDay from pandas.core.indexes.period import PeriodIndex from pandas.errors import AbstractMethodError import pandas.core.algorithms as algos @@ -27,6 +28,7 @@ from pandas._libs import lib from pandas._libs.tslibs import Timestamp, NaT +from pandas._libs.tslibs.conversion import tz_localize_to_utc from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.util._decorators import Appender, Substitution @@ -1394,10 +1396,33 @@ def _get_time_bins(self, ax): def _adjust_bin_edges(self, binner, ax_values): # Some hacks for > daily data, see #1471, #1458, #1483 + #import pdb; pdb.set_trace() + #bin_edges = binner.asi8 - bin_edges = binner.asi8 + if not isinstance(self.freq, Day) and is_superperiod(self.freq, 'D'): + if self.closed == 'right': + bin_edges = (binner + CDay()).asi8 + else: + bin_edges = binner.asi8 - if self.freq != 'D' and is_superperiod(self.freq, 'D'): + bin_edges = bin_edges - 1 + # intraday values on last day + if bin_edges[-2] > ax_values.max(): + bin_edges = bin_edges[:-1] + binner = binner[:-1] + else: + bin_edges = binner.asi8 + + return binner, bin_edges + """ + def _adjust_bin_edges(self, binner, ax_values): + # Some hacks for > daily data, see #1471, #1458, #1483 + if binner.tz is not None: + bin_edges = binner.tz_localize(None).asi8 + else: + bin_edges = binner.asi8 + + if not isinstance(self.freq, Day) and is_superperiod(self.freq, 'D'): day_nanos = delta_to_nanoseconds(timedelta(1)) if self.closed == 'right': bin_edges = bin_edges + day_nanos - 1 @@ -1406,9 +1431,10 @@ def _adjust_bin_edges(self, binner, ax_values): if bin_edges[-2] > ax_values.max(): bin_edges = bin_edges[:-1] binner = binner[:-1] - + if binner.tz is not None: + bin_edges = tz_localize_to_utc(bin_edges, binner.tz) return binner, bin_edges - + """ def _get_time_delta_bins(self, ax): if not isinstance(ax, TimedeltaIndex): raise TypeError('axis must be a TimedeltaIndex, but got ' From bce52a56ad1d3cc60ff473f23c46750872646f62 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 1 Oct 2018 09:38:04 -0700 Subject: [PATCH 2/4] move the localization until needed --- pandas/core/resample.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5b4db94af238a..395534bb111b3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1393,7 +1393,7 @@ def _get_time_bins(self, ax): labels = labels[:len(bins)] return binner, bins, labels - + """ def _adjust_bin_edges(self, binner, ax_values): # Some hacks for > daily data, see #1471, #1458, #1483 #import pdb; pdb.set_trace() @@ -1415,6 +1415,7 @@ def _adjust_bin_edges(self, binner, ax_values): return binner, bin_edges """ + """ def _adjust_bin_edges(self, binner, ax_values): # Some hacks for > daily data, see #1471, #1458, #1483 if binner.tz is not None: @@ -1435,6 +1436,31 @@ def _adjust_bin_edges(self, binner, ax_values): bin_edges = tz_localize_to_utc(bin_edges, binner.tz) return binner, bin_edges """ + def _adjust_bin_edges(self, binner, ax_values): + # Some hacks for > daily data, see #1471, #1458, #1483 + + bin_edges = binner.asi8 + + if self.freq != 'D' and is_superperiod(self.freq, 'D'): + day_nanos = delta_to_nanoseconds(timedelta(1)) + if self.closed == 'right': + if binner.tz is not None: + bin_edges = binner.tz_localize(None).asi8 + else: + bin_edges = binner.asi8 + bin_edges = bin_edges + day_nanos - 1 + + if binner.tz is not None: + bin_edges = tz_localize_to_utc(bin_edges, binner.tz) + + # intraday values on last day + if bin_edges[-2] > ax_values.max(): + bin_edges = bin_edges[:-1] + binner = binner[:-1] + return binner, bin_edges + else: + return binner, binner.asi8 + def _get_time_delta_bins(self, ax): if not isinstance(ax, TimedeltaIndex): raise TypeError('axis must be a TimedeltaIndex, but got ' From e23fcd9068c522c8862f5090a6738c20a359dbd5 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 2 Oct 2018 00:04:56 -0700 Subject: [PATCH 3/4] BUG: Correctly weekly resample over DST --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/resample.py | 64 +++++---------------------------- pandas/tests/test_resample.py | 22 ++++++++++++ 3 files changed, 31 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 851c1a3fbd6e9..f865ad2ec0eba 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -691,6 +691,7 @@ Timezones - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) Offsets ^^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 395534bb111b3..70a8deb33b7f2 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -3,7 +3,6 @@ import warnings import copy from textwrap import dedent -from pytz import UTC import pandas as pd from pandas.core.groupby.base import GroupByMixin @@ -17,7 +16,8 @@ from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds, CDay +from pandas.tseries.offsets import (DateOffset, Tick, Day, + delta_to_nanoseconds, Nano) from pandas.core.indexes.period import PeriodIndex from pandas.errors import AbstractMethodError import pandas.core.algorithms as algos @@ -28,7 +28,6 @@ from pandas._libs import lib from pandas._libs.tslibs import Timestamp, NaT -from pandas._libs.tslibs.conversion import tz_localize_to_utc from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.util._decorators import Appender, Substitution @@ -1393,73 +1392,26 @@ def _get_time_bins(self, ax): labels = labels[:len(bins)] return binner, bins, labels - """ + def _adjust_bin_edges(self, binner, ax_values): # Some hacks for > daily data, see #1471, #1458, #1483 - #import pdb; pdb.set_trace() - #bin_edges = binner.asi8 - if not isinstance(self.freq, Day) and is_superperiod(self.freq, 'D'): + if self.freq != 'D' and is_superperiod(self.freq, 'D'): if self.closed == 'right': - bin_edges = (binner + CDay()).asi8 + # GH 21459, GH 9119: Adjust the bins relative to the wall time + bin_edges = binner.tz_localize(None) + bin_edges = bin_edges + timedelta(1) - Nano(1) + bin_edges = bin_edges.tz_localize(binner.tz).asi8 else: bin_edges = binner.asi8 - bin_edges = bin_edges - 1 # intraday values on last day if bin_edges[-2] > ax_values.max(): bin_edges = bin_edges[:-1] binner = binner[:-1] else: bin_edges = binner.asi8 - - return binner, bin_edges - """ - """ - def _adjust_bin_edges(self, binner, ax_values): - # Some hacks for > daily data, see #1471, #1458, #1483 - if binner.tz is not None: - bin_edges = binner.tz_localize(None).asi8 - else: - bin_edges = binner.asi8 - - if not isinstance(self.freq, Day) and is_superperiod(self.freq, 'D'): - day_nanos = delta_to_nanoseconds(timedelta(1)) - if self.closed == 'right': - bin_edges = bin_edges + day_nanos - 1 - - # intraday values on last day - if bin_edges[-2] > ax_values.max(): - bin_edges = bin_edges[:-1] - binner = binner[:-1] - if binner.tz is not None: - bin_edges = tz_localize_to_utc(bin_edges, binner.tz) return binner, bin_edges - """ - def _adjust_bin_edges(self, binner, ax_values): - # Some hacks for > daily data, see #1471, #1458, #1483 - - bin_edges = binner.asi8 - - if self.freq != 'D' and is_superperiod(self.freq, 'D'): - day_nanos = delta_to_nanoseconds(timedelta(1)) - if self.closed == 'right': - if binner.tz is not None: - bin_edges = binner.tz_localize(None).asi8 - else: - bin_edges = binner.asi8 - bin_edges = bin_edges + day_nanos - 1 - - if binner.tz is not None: - bin_edges = tz_localize_to_utc(bin_edges, binner.tz) - - # intraday values on last day - if bin_edges[-2] > ax_values.max(): - bin_edges = bin_edges[:-1] - binner = binner[:-1] - return binner, bin_edges - else: - return binner, binner.asi8 def _get_time_delta_bins(self, ax): if not isinstance(ax, TimedeltaIndex): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ccd2461d1512e..5cd31e08e0a9b 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2114,6 +2114,28 @@ def test_downsample_across_dst(self): freq='H')) tm.assert_series_equal(result, expected) + def test_downsample_across_dst_weekly(self): + # GH 9119, GH 21459 + df = DataFrame(index=DatetimeIndex([ + '2017-03-25', '2017-03-26', '2017-03-27', + '2017-03-28', '2017-03-29' + ], tz='Europe/Amsterdam'), + data=[11, 12, 13, 14, 15]) + result = df.resample('1W').sum() + expected = DataFrame([23, 42], index=pd.DatetimeIndex([ + '2017-03-26', '2017-04-02' + ], tz='Europe/Amsterdam')) + tm.assert_frame_equal(result, expected) + + idx = pd.date_range("2013-04-01", "2013-05-01", tz='Europe/London', + freq='H') + s = Series(index=idx) + result = s.resample('W').mean() + expected = Series(index=pd.date_range( + '2013-04-07', freq='W', periods=5, tz='Europe/London' + )) + tm.assert_series_equal(result, expected) + def test_resample_with_nat(self): # GH 13020 index = DatetimeIndex([pd.NaT, From bf1ddd5ddd599ba85824fa777b25fd2f9f73295f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 3 Oct 2018 08:24:27 -0700 Subject: [PATCH 4/4] Move whatsnew to new section --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f865ad2ec0eba..a64576565d093 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -691,7 +691,6 @@ Timezones - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) -- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) Offsets ^^^^^^^ @@ -795,6 +794,7 @@ Groupby/Resample/Rolling - Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). - Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) - :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) Sparse ^^^^^^