From 3a552a36bce52f03fc7158beb16a4c5654e036ab Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 00:15:17 -0500 Subject: [PATCH 1/7] BUG - fix resample for Day offsets n>1 and update associated test --- pandas/core/resample.py | 2 +- pandas/tests/groupby/test_timegrouper.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index dc1f94c479a37..334efc1086f23 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1588,7 +1588,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0): day_nanos = delta_to_nanoseconds(timedelta(1)) # #1165 - if (is_day and day_nanos % offset.nanos == 0) or not is_day: + if (is_day and not offset.nanos % day_nanos) or not is_day: return _adjust_dates_anchored(first, last, offset, closed=closed, base=base) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 183ccfb5182a2..cb7b419710837 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -43,8 +43,8 @@ def test_groupby_with_timegrouper(self): expected = DataFrame( {'Quantity': 0}, - index=date_range('20130901 13:00:00', - '20131205 13:00:00', freq='5D', + index=date_range('20130901', + '20131205', freq='5D', name='Date', closed='left')) expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype='int64') From ecdd31c420663e502e54b8c2425d1fd144e1e294 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 00:22:08 -0500 Subject: [PATCH 2/7] DOC - add whatsnew entry --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0b2b526dfe9e7..488971c13508a 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1516,6 +1516,7 @@ Groupby/Resample/Rolling - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) - Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) +- Bug in date anchoring for :meth:`DatetimeIndex.resample` with offset :class:`Day` when n > 1 (:issue:`24127`) - Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). - Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'`` and a From be07f5ba35a065ba3d2bffac105638dd47d70d58 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 00:51:45 -0500 Subject: [PATCH 3/7] TST - add tests for equivalent offsets --- pandas/tests/resample/test_datetime_index.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index b287eb468cd94..3fd4aa572c117 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1463,3 +1463,25 @@ def f(data, add_arg): result = df.groupby("A").resample("D").agg(f, multiplier) expected = df.groupby("A").resample('D').mean().multiply(multiplier) assert_frame_equal(result, expected) + + @pytest.mark.parametrize('n1, freq1, n2, freq2', [ + (60, 'Min', 1, 'H'), + (1440, 'Min', 1, 'D'), + (24, 'H', 1, 'D'), + (60, 'S', 1, 'Min'), + (3600, 'S', 1, 'H'), + (86400, 'S', 1, 'D') + ]) + def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2): + for i in range(1, 2): + n1_ = n1 * i + n2_ = n2 * i + start_freq = freq1 + s = pd.Series(0, index=pd.date_range('19910905 13:00', + '19911005 07:00', + freq=freq1)) + s = s + range(len(s)) + + result1 = s.resample(str(n1_)+freq1).mean() + result2 = s.resample(str(n2_)+freq2).mean() + assert_series_equal(result1, result2) From 9b63c380ab4f59221cfe2374b2a1f1b1cbf8c03a Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 16:23:19 -0500 Subject: [PATCH 4/7] CLN - pep8 adherence --- pandas/tests/resample/test_datetime_index.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3fd4aa572c117..b26225a3bf610 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1465,23 +1465,22 @@ def f(data, add_arg): assert_frame_equal(result, expected) @pytest.mark.parametrize('n1, freq1, n2, freq2', [ - (60, 'Min', 1, 'H'), - (1440, 'Min', 1, 'D'), - (24, 'H', 1, 'D'), - (60, 'S', 1, 'Min'), - (3600, 'S', 1, 'H'), - (86400, 'S', 1, 'D') + (60, 'Min', 1, 'H'), + (1440, 'Min', 1, 'D'), + (24, 'H', 1, 'D'), + (60, 'S', 1, 'Min'), + (3600, 'S', 1, 'H'), + (86400, 'S', 1, 'D') ]) def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2): for i in range(1, 2): n1_ = n1 * i n2_ = n2 * i - start_freq = freq1 s = pd.Series(0, index=pd.date_range('19910905 13:00', '19911005 07:00', freq=freq1)) s = s + range(len(s)) - + result1 = s.resample(str(n1_)+freq1).mean() result2 = s.resample(str(n2_)+freq2).mean() assert_series_equal(result1, result2) From 4967c951f124d0e4d527b309e91ac619d8298d43 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 17:17:20 -0500 Subject: [PATCH 5/7] CLN - pep8 adherence --- pandas/tests/resample/test_datetime_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index b26225a3bf610..2c153190c874e 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1481,6 +1481,6 @@ def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2): freq=freq1)) s = s + range(len(s)) - result1 = s.resample(str(n1_)+freq1).mean() - result2 = s.resample(str(n2_)+freq2).mean() + result1 = s.resample(str(n1_) + freq1).mean() + result2 = s.resample(str(n2_) + freq2).mean() assert_series_equal(result1, result2) From 1d4ac73aa1e3fb41acde6f7d726a07833652f9f0 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 9 Dec 2018 17:13:04 -0500 Subject: [PATCH 6/7] CLN - parametrize tests and add issue reference --- pandas/core/resample.py | 2 +- pandas/tests/resample/test_datetime_index.py | 25 ++++++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 334efc1086f23..6d80d747f21b3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1587,7 +1587,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0): is_day = isinstance(offset, Day) day_nanos = delta_to_nanoseconds(timedelta(1)) - # #1165 + # #1165 and #24127 if (is_day and not offset.nanos % day_nanos) or not is_day: return _adjust_dates_anchored(first, last, offset, closed=closed, base=base) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 2c153190c874e..992af07e55af8 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1464,6 +1464,7 @@ def f(data, add_arg): expected = df.groupby("A").resample('D').mean().multiply(multiplier) assert_frame_equal(result, expected) + @pytest.mark.parametrize('k', [1, 2, 3]) @pytest.mark.parametrize('n1, freq1, n2, freq2', [ (60, 'Min', 1, 'H'), (1440, 'Min', 1, 'D'), @@ -1472,15 +1473,15 @@ def f(data, add_arg): (3600, 'S', 1, 'H'), (86400, 'S', 1, 'D') ]) - def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2): - for i in range(1, 2): - n1_ = n1 * i - n2_ = n2 * i - s = pd.Series(0, index=pd.date_range('19910905 13:00', - '19911005 07:00', - freq=freq1)) - s = s + range(len(s)) - - result1 = s.resample(str(n1_) + freq1).mean() - result2 = s.resample(str(n2_) + freq2).mean() - assert_series_equal(result1, result2) + def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2, k): + # GH 24127 + n1_ = n1 * k + n2_ = n2 * k + s = pd.Series(0, index=pd.date_range('19910905 13:00', + '19911005 07:00', + freq=freq1)) + s = s + range(len(s)) + + result1 = s.resample(str(n1_) + freq1).mean() + result2 = s.resample(str(n2_) + freq2).mean() + assert_series_equal(result1, result2) From 37252a0261a99e2907e1fd3223f1abb40efa5e57 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 10 Dec 2018 20:21:52 -0500 Subject: [PATCH 7/7] TST - add more test cases --- pandas/tests/resample/test_datetime_index.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 992af07e55af8..69fb92486d523 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1466,12 +1466,16 @@ def f(data, add_arg): @pytest.mark.parametrize('k', [1, 2, 3]) @pytest.mark.parametrize('n1, freq1, n2, freq2', [ + (30, 'S', 0.5, 'Min'), + (60, 'S', 1, 'Min'), + (3600, 'S', 1, 'H'), (60, 'Min', 1, 'H'), + (21600, 'S', 0.25, 'D'), + (86400, 'S', 1, 'D'), + (43200, 'S', 0.5, 'D'), (1440, 'Min', 1, 'D'), + (12, 'H', 0.5, 'D'), (24, 'H', 1, 'D'), - (60, 'S', 1, 'Min'), - (3600, 'S', 1, 'H'), - (86400, 'S', 1, 'D') ]) def test_resample_equivalent_offsets(self, n1, freq1, n2, freq2, k): # GH 24127