From 32b3a0617649ce468b97fc1065a662c6f48fba17 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 28 Mar 2014 20:40:24 -0400 Subject: [PATCH] BUG: Bug in resample when how=None resample freq is the same as the axis frequency (GH5955) --- doc/source/release.rst | 1 + pandas/tseries/resample.py | 13 ++++++++-- pandas/tseries/tests/test_resample.py | 35 ++++++++++++++++++++------- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index dcbf8b8c7f271..3d4ff0610f43f 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -291,6 +291,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata`` which incorrectly handles nan values and ignores 'with_index' keyword argument (:issue:`6685`) - Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`) - Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`) +- Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`) pandas 0.13.1 ------------- diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 8b65882fb1279..7f243c20fe56e 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -246,7 +246,9 @@ def _resample_timestamps(self): # Determine if we're downsampling if axlabels.freq is not None or axlabels.inferred_freq is not None: + if len(grouper.binlabels) < len(axlabels) or self.how is not None: + # downsample grouped = obj.groupby(grouper, axis=self.axis) result = grouped.aggregate(self._agg_method) else: @@ -259,8 +261,15 @@ def _resample_timestamps(self): else: res_index = binner[:-1] - result = obj.reindex(res_index, method=self.fill_method, - limit=self.limit) + # if we have the same frequency as our axis, then we are equal sampling + # even if how is None + if self.fill_method is None and self.limit is None and to_offset( + axlabels.inferred_freq) == self.freq: + result = obj.copy() + result.index = res_index + else: + result = obj.reindex(res_index, method=self.fill_method, + limit=self.limit) else: # Irregular data, have to use groupby grouped = obj.groupby(grouper, axis=self.axis) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 55d96ec6fbaeb..5f975105cd80e 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -160,6 +160,32 @@ def test_resample_basic_from_daily(self): self.assertEquals(result.irow(5), s['1/9/2005']) self.assertEqual(result.index.name, 'index') + def test_resample_upsampling_picked_but_not_correct(self): + + # Test for issue #3020 + dates = date_range('01-Jan-2014','05-Jan-2014', freq='D') + series = Series(1, index=dates) + + result = series.resample('D') + self.assertEquals(result.index[0], dates[0]) + + # GH 5955 + # incorrect deciding to upsample when the axis frequency matches the resample frequency + + import datetime + s = Series(np.arange(1.,6),index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)]) + expected = Series(np.arange(1.,6),index=date_range('19750101',periods=5,freq='D')) + + result = s.resample('D',how='count') + assert_series_equal(result,Series(1,index=expected.index)) + + result1 = s.resample('D',how='sum') + result2 = s.resample('D',how='mean') + result3 = s.resample('D') + assert_series_equal(result1,expected) + assert_series_equal(result2,expected) + assert_series_equal(result3,expected) + def test_resample_frame_basic(self): df = tm.makeTimeDataFrame() @@ -1078,15 +1104,6 @@ def test_all_values_single_bin(self): result = s.resample("A", how='mean') tm.assert_almost_equal(result[0], s.mean()) - def test_resample_doesnt_truncate(self): - # Test for issue #3020 - import pandas as pd - dates = pd.date_range('01-Jan-2014','05-Jan-2014', freq='D') - series = Series(1, index=dates) - - result = series.resample('D') - self.assertEquals(result.index[0], dates[0]) - def test_evenly_divisible_with_no_extra_bins(self): # 4076 # when the frequency is evenly divisible, sometimes extra bins