From 68a02aadfdda0c1da40157b4faa74f3181c384e3 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 2 Mar 2017 22:27:16 +0200 Subject: [PATCH 01/29] !B [pandas-dev/pandas#15549] resample with tz-aware: Values falls after last bin --- pandas/tseries/resample.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 75e550a065fd2..f82f535f1c41b 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1144,17 +1144,18 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) - if len(ax) == 0: binner = labels = DatetimeIndex( data=[], freq=self.freq, name=ax.name) return binner, [], labels - + + tz = ax.tz + ax = ax.tz_convert('UTC') + first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, closed=self.closed, base=self.base) - tz = ax.tz # GH #12037 # use first/last directly instead of call replace() on them # because replace() will swallow the nanosecond part @@ -1163,8 +1164,7 @@ def _get_time_bins(self, ax): binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last, - tz=tz, - name=ax.name) + name=ax.name).tz_convert(tz) # a little hack trimmed = False From 47b99acee84dd15001548179e16de55f56e90611 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 2 Mar 2017 22:29:57 +0200 Subject: [PATCH 02/29] restore mistakenly removed line --- pandas/tseries/resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index f82f535f1c41b..39ca8002f27ac 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1144,6 +1144,7 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) + if len(ax) == 0: binner = labels = DatetimeIndex( data=[], freq=self.freq, name=ax.name) From 04ce929073df83c3144244f9c6772636ce29f0c7 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 2 Mar 2017 22:31:00 +0200 Subject: [PATCH 03/29] remove redundant whitespaces --- pandas/tseries/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 39ca8002f27ac..ac74802e4046d 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1144,7 +1144,7 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) - + if len(ax) == 0: binner = labels = DatetimeIndex( data=[], freq=self.freq, name=ax.name) From a43bb1932986c6b5999e753477069105c6695350 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 2 Mar 2017 22:31:51 +0200 Subject: [PATCH 04/29] remove redundant whitespaces --- pandas/tseries/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index ac74802e4046d..3b938c305137f 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1144,7 +1144,7 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) - + if len(ax) == 0: binner = labels = DatetimeIndex( data=[], freq=self.freq, name=ax.name) From 44fc3f29d83f657011570127c9d69dab64ef188a Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Fri, 3 Mar 2017 11:34:04 +0200 Subject: [PATCH 05/29] !U add tz to DatetimeIndex initialization --- pandas/tseries/resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 3b938c305137f..538cba2b5901c 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1165,6 +1165,7 @@ def _get_time_bins(self, ax): binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last, + tz='UTC', name=ax.name).tz_convert(tz) # a little hack From 2a1d24c6d328941e2ab7b7f229d4c2367e4b3ff7 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Fri, 3 Mar 2017 11:47:14 +0200 Subject: [PATCH 06/29] !U add test for a bug: resample with tz-aware: Values falls after last bin --- pandas/tests/tseries/test_resample.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 1535bd665fe8b..56953dc378965 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -2670,6 +2670,11 @@ def test_resample_weekly_bug_1726(self): # it works! df.resample('W-MON', closed='left', label='left').first() + def test_resample_tz_aware_bug_15549(self): + index = pd.DatetimeIndex([1450137600000000000, 1474059600000000000], tz='UTC').tz_convert('America/Chicago') + df = pd.DataFrame([1, 2], index=index) + df.resample('12h', closed='right', label='right').last().ffill() + def test_resample_bms_2752(self): # GH2753 foo = pd.Series(index=pd.bdate_range('20000101', '20000201')) From d28ff98c89fb173b8b58dd0fc4e89b84440590e2 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Fri, 3 Mar 2017 20:27:44 +0200 Subject: [PATCH 07/29] !U do not convert TZ to UTC if it not set --- pandas/tseries/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 538cba2b5901c..f759cd9613a81 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1151,7 +1151,8 @@ def _get_time_bins(self, ax): return binner, [], labels tz = ax.tz - ax = ax.tz_convert('UTC') + if tz: + ax = ax.tz_convert('UTC') first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, From 7c770c0778288b29f096ab1b72a85e6ec8b55336 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 16 Nov 2017 20:43:06 -0600 Subject: [PATCH 08/29] !U revert changes with timezone --- pandas/tseries/resample.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index f759cd9613a81..64fc37de1d9ff 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1150,14 +1150,11 @@ def _get_time_bins(self, ax): data=[], freq=self.freq, name=ax.name) return binner, [], labels - tz = ax.tz - if tz: - ax = ax.tz_convert('UTC') - first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, closed=self.closed, base=self.base) + tz = ax.tz # GH #12037 # use first/last directly instead of call replace() on them # because replace() will swallow the nanosecond part @@ -1166,8 +1163,8 @@ def _get_time_bins(self, ax): binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last, - tz='UTC', - name=ax.name).tz_convert(tz) + tz=tz, + name=ax.name) # a little hack trimmed = False From effa6509a527e3d3e613acc80797f4c20536d98c Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 16 Nov 2017 20:44:26 -0600 Subject: [PATCH 09/29] !U revert spaces --- pandas/tseries/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 64fc37de1d9ff..75e550a065fd2 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1149,7 +1149,7 @@ def _get_time_bins(self, ax): binner = labels = DatetimeIndex( data=[], freq=self.freq, name=ax.name) return binner, [], labels - + first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, closed=self.closed, From aa8c0af6528c80e8a0ba1850dccf5ae737673129 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Thu, 16 Nov 2017 21:04:38 -0600 Subject: [PATCH 10/29] !U change way the end timestamp is defined --- pandas/tseries/index.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5f00e8b648689..7f2f78d73c42a 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1944,10 +1944,7 @@ def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: b = Timestamp(start).value - # cannot just use e = Timestamp(end) + 1 because arange breaks when - # stride is too large, see GH10887 - e = (b + (Timestamp(end).value - b) // stride * stride + - stride // 2 + 1) + e = (Timestamp(end).value + (stride//2) + 1) # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: From fb4d40598ff728da00e182a701ecd61ee4d9015d Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Fri, 17 Nov 2017 15:01:24 -0800 Subject: [PATCH 11/29] !U add conditional change in the formula for defining the end timestamp --- pandas/core/indexes/datetimes.py | 14 +++++++++----- pandas/core/resample.py | 5 ++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 275dcc3a3c135..1b0fb8dc02195 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -366,9 +366,10 @@ def __new__(cls, data=None, pass if data is None: + values_present = kwargs.pop('values_present', False) return cls._generate(start, end, periods, name, freq, tz=tz, normalize=normalize, closed=closed, - ambiguous=ambiguous) + ambiguous=ambiguous, values_present=values_present) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -463,7 +464,7 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, offset, - tz=None, normalize=False, ambiguous='raise', closed=None): + tz=None, normalize=False, ambiguous='raise', closed=None, values_present=False): if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and ' 'periods, exactly two must be specified') @@ -552,7 +553,7 @@ def _generate(cls, start, end, periods, name, offset, index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: - index = _generate_regular_range(start, end, periods, offset) + index = _generate_regular_range(start, end, periods, offset, values_present) else: @@ -2016,12 +2017,15 @@ def to_julian_date(self): DatetimeIndex._add_datetimelike_methods() -def _generate_regular_range(start, end, periods, offset): +def _generate_regular_range(start, end, periods, offset, values_present): if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value - e = (Timestamp(end).value + (stride//2) + 1) + if values_present: + e = (Timestamp(end).value + stride//2 + 1) + else: + e = (b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1) # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index eeb6faf20ffce..98b16a87076e3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1130,6 +1130,8 @@ def _get_time_bins(self, ax): closed=self.closed, base=self.base) tz = ax.tz + values_present = isinstance(getattr(self, 'obj', None), (pd.DataFrame, pd.Series)) + # GH #12037 # use first/last directly instead of call replace() on them # because replace() will swallow the nanosecond part @@ -1139,7 +1141,8 @@ def _get_time_bins(self, ax): start=first, end=last, tz=tz, - name=ax.name) + name=ax.name, + values_present=values_present) # a little hack trimmed = False From 93842d0b332b919a4227b8588090a6c431d2c5f3 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Fri, 17 Nov 2017 16:30:47 -0800 Subject: [PATCH 12/29] !U make the values_present argument optional --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1b0fb8dc02195..c8ed107a06643 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2017,7 +2017,7 @@ def to_julian_date(self): DatetimeIndex._add_datetimelike_methods() -def _generate_regular_range(start, end, periods, offset, values_present): +def _generate_regular_range(start, end, periods, offset, values_present=False): if isinstance(offset, Tick): stride = offset.nanos if periods is None: From fc795c68c489371ffa8f42eb8373bd1c14d20260 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sat, 18 Nov 2017 16:59:46 -0800 Subject: [PATCH 13/29] !U add line feed --- pandas/core/indexes/datetimes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c8ed107a06643..c4f1c563e0862 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2025,7 +2025,8 @@ def _generate_regular_range(start, end, periods, offset, values_present=False): if values_present: e = (Timestamp(end).value + stride//2 + 1) else: - e = (b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1) + e = (b + (Timestamp(end).value - b) // stride * stride + + stride // 2 + 1) # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: From 514fe24578fcce58e0135b3a78671084c63760cd Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sat, 18 Nov 2017 17:01:12 -0800 Subject: [PATCH 14/29] !U fix formatting --- pandas/core/indexes/datetimes.py | 9 ++++++--- pandas/tests/test_resample.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c4f1c563e0862..8d17bdedf7b52 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -369,7 +369,8 @@ def __new__(cls, data=None, values_present = kwargs.pop('values_present', False) return cls._generate(start, end, periods, name, freq, tz=tz, normalize=normalize, closed=closed, - ambiguous=ambiguous, values_present=values_present) + ambiguous=ambiguous, + values_present=values_present) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -464,7 +465,8 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, offset, - tz=None, normalize=False, ambiguous='raise', closed=None, values_present=False): + tz=None, normalize=False, ambiguous='raise', closed=None, + values_present=False): if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and ' 'periods, exactly two must be specified') @@ -553,7 +555,8 @@ def _generate(cls, start, end, periods, name, offset, index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: - index = _generate_regular_range(start, end, periods, offset, values_present) + index = _generate_regular_range(start, end, periods, offset, + values_present) else: diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 63e7c754e89f7..42a1729369d8d 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2720,7 +2720,8 @@ def test_resample_weekly_bug_1726(self): df.resample('W-MON', closed='left', label='left').first() def test_resample_tz_aware_bug_15549(self): - index = pd.DatetimeIndex([1450137600000000000, 1474059600000000000], tz='UTC').tz_convert('America/Chicago') + index = pd.DatetimeIndex([1450137600000000000, 1474059600000000000], + tz='UTC').tz_convert('America/Chicago') df = pd.DataFrame([1, 2], index=index) df.resample('12h', closed='right', label='right').last().ffill() From 777a12a6180e82873d298c2090324804da30f141 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sat, 18 Nov 2017 17:01:45 -0800 Subject: [PATCH 15/29] !U add a line feed --- pandas/core/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 98b16a87076e3..654e1629f496e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1130,7 +1130,8 @@ def _get_time_bins(self, ax): closed=self.closed, base=self.base) tz = ax.tz - values_present = isinstance(getattr(self, 'obj', None), (pd.DataFrame, pd.Series)) + values_present = isinstance(getattr(self, 'obj', None), + (pd.DataFrame, pd.Series)) # GH #12037 # use first/last directly instead of call replace() on them From 037c9dcdead16bd75e902be305faf35b645def5c Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sat, 18 Nov 2017 19:25:09 -0800 Subject: [PATCH 16/29] !U fix formatting --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8d17bdedf7b52..81283691eb1b4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2026,7 +2026,7 @@ def _generate_regular_range(start, end, periods, offset, values_present=False): if periods is None: b = Timestamp(start).value if values_present: - e = (Timestamp(end).value + stride//2 + 1) + e = (Timestamp(end).value + stride // 2 + 1) else: e = (b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1) From d5ac67e1162e4cfcc483ad2c2eee67a6bece77ef Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 12:58:26 -0800 Subject: [PATCH 17/29] !U change the fix to only handle the problem on core/resample.py level --- pandas/core/indexes/datetimes.py | 21 ++++++++------------- pandas/core/resample.py | 15 ++++++++++----- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 81283691eb1b4..ba96979435f81 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -366,11 +366,9 @@ def __new__(cls, data=None, pass if data is None: - values_present = kwargs.pop('values_present', False) return cls._generate(start, end, periods, name, freq, tz=tz, normalize=normalize, closed=closed, - ambiguous=ambiguous, - values_present=values_present) + ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -465,8 +463,7 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, offset, - tz=None, normalize=False, ambiguous='raise', closed=None, - values_present=False): + tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and ' 'periods, exactly two must be specified') @@ -555,8 +552,7 @@ def _generate(cls, start, end, periods, name, offset, index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: - index = _generate_regular_range(start, end, periods, offset, - values_present) + index = _generate_regular_range(start, end, periods, offset) else: @@ -2020,16 +2016,15 @@ def to_julian_date(self): DatetimeIndex._add_datetimelike_methods() -def _generate_regular_range(start, end, periods, offset, values_present=False): +def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value - if values_present: - e = (Timestamp(end).value + stride // 2 + 1) - else: - e = (b + (Timestamp(end).value - b) // stride * stride + - stride // 2 + 1) + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = (b + (Timestamp(end).value - b) // stride * stride + + stride // 2 + 1) # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 654e1629f496e..4a2b41818dac0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1130,8 +1130,6 @@ def _get_time_bins(self, ax): closed=self.closed, base=self.base) tz = ax.tz - values_present = isinstance(getattr(self, 'obj', None), - (pd.DataFrame, pd.Series)) # GH #12037 # use first/last directly instead of call replace() on them @@ -1140,10 +1138,17 @@ def _get_time_bins(self, ax): # nanosecond part and lead to `Values falls after last bin` error binner = labels = DatetimeIndex(freq=self.freq, start=first, - end=last, + end=last , tz=tz, - name=ax.name, - values_present=values_present) + name=ax.name) + + # GH 15549 + values_present = isinstance(getattr(self, 'obj', None), + (pd.DataFrame, pd.Series)) + if values_present and binner[-1] < last: + extra_date_range = pd.date_range(binner[-2], last + self.freq, + freq=self.freq) + binner = labels = binner.append(extra_date_range[-1:]) # a little hack trimmed = False From 61e84d6c4d1e1734eaab9164f1ef28a3269d007d Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 12:59:08 -0800 Subject: [PATCH 18/29] !U fix formatting --- pandas/core/resample.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4a2b41818dac0..2c617b1141577 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1130,7 +1130,6 @@ def _get_time_bins(self, ax): closed=self.closed, base=self.base) tz = ax.tz - # GH #12037 # use first/last directly instead of call replace() on them # because replace() will swallow the nanosecond part @@ -1138,7 +1137,7 @@ def _get_time_bins(self, ax): # nanosecond part and lead to `Values falls after last bin` error binner = labels = DatetimeIndex(freq=self.freq, start=first, - end=last , + end=last, tz=tz, name=ax.name) From e049112cff68c44e167b0d95880fefa1d92ac7d9 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 13:05:56 -0800 Subject: [PATCH 19/29] !U change the index from which we get the dates to appent --- pandas/core/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2c617b1141577..1ec0f7b44b15a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1147,7 +1147,7 @@ def _get_time_bins(self, ax): if values_present and binner[-1] < last: extra_date_range = pd.date_range(binner[-2], last + self.freq, freq=self.freq) - binner = labels = binner.append(extra_date_range[-1:]) + binner = labels = binner.append(extra_date_range[1:]) # a little hack trimmed = False From e252b20b54329b01e422dab2d0780275ef93e8bd Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 13:10:29 -0800 Subject: [PATCH 20/29] !U remove redundant check and fix index --- pandas/core/resample.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 1ec0f7b44b15a..0e78298448d86 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1142,10 +1142,8 @@ def _get_time_bins(self, ax): name=ax.name) # GH 15549 - values_present = isinstance(getattr(self, 'obj', None), - (pd.DataFrame, pd.Series)) - if values_present and binner[-1] < last: - extra_date_range = pd.date_range(binner[-2], last + self.freq, + if binner[-1] < last: + extra_date_range = pd.date_range(binner[-1], last + self.freq, freq=self.freq) binner = labels = binner.append(extra_date_range[1:]) From 1888355bc64792b193cec2206148bb089fb1eee7 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 13:11:25 -0800 Subject: [PATCH 21/29] !U add check for binner len --- pandas/core/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0e78298448d86..45fe35fc2dad2 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1142,7 +1142,7 @@ def _get_time_bins(self, ax): name=ax.name) # GH 15549 - if binner[-1] < last: + if len(binner) > 1 and binner[-1] < last: extra_date_range = pd.date_range(binner[-1], last + self.freq, freq=self.freq) binner = labels = binner.append(extra_date_range[1:]) From f4ed7c083d3373ee97456fec5845ecdd0ae0842a Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Sun, 19 Nov 2017 19:06:47 -0800 Subject: [PATCH 22/29] !U add missing arguments to the extra dates generation --- pandas/core/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 45fe35fc2dad2..fc2bb2c01b602 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1144,7 +1144,8 @@ def _get_time_bins(self, ax): # GH 15549 if len(binner) > 1 and binner[-1] < last: extra_date_range = pd.date_range(binner[-1], last + self.freq, - freq=self.freq) + freq=self.freq, tz=tz, + name=ax.name) binner = labels = binner.append(extra_date_range[1:]) # a little hack From 9b18f4b7582e46ac2a206030c7acd30de114e52e Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 08:26:01 -0800 Subject: [PATCH 23/29] !U add comments --- pandas/core/resample.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index fc2bb2c01b602..456625de6e8d4 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1142,6 +1142,9 @@ def _get_time_bins(self, ax): name=ax.name) # GH 15549 + # In edge case of tz-aware resapmling binner last index can be + # less than the last variable in data object. + # This leads to `Values falls after last bin` error if len(binner) > 1 and binner[-1] < last: extra_date_range = pd.date_range(binner[-1], last + self.freq, freq=self.freq, tz=tz, From 5b035f09b584abda6bafe2bcf4829b3b0e55fa37 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 08:44:24 -0800 Subject: [PATCH 24/29] !U change test name and add expected df to compare result to --- pandas/tests/test_resample.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 42a1729369d8d..170e86d5f94b8 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2719,11 +2719,29 @@ def test_resample_weekly_bug_1726(self): # it works! df.resample('W-MON', closed='left', label='left').first() - def test_resample_tz_aware_bug_15549(self): - index = pd.DatetimeIndex([1450137600000000000, 1474059600000000000], + def test_resample_tz_aware(self): + # GH 15549 + + index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], tz='UTC').tz_convert('America/Chicago') df = pd.DataFrame([1, 2], index=index) - df.resample('12h', closed='right', label='right').last().ffill() + res_df = df.resample('12h', closed='right', label='right').last().ffill() + + index = pd.DatetimeIndex(['2016-03-09 12:00:00-06:00', + '2016-03-10 00:00:00-06:00', + '2016-03-10 12:00:00-06:00', + '2016-03-11 00:00:00-06:00', + '2016-03-11 12:00:00-06:00', + '2016-03-12 00:00:00-06:00', + '2016-03-12 12:00:00-06:00', + '2016-03-13 00:00:00-06:00', + '2016-03-13 13:00:00-05:00', + '2016-03-14 01:00:00-05:00', + '2016-03-14 13:00:00-05:00', + '2016-03-15 01:00:00-05:00', + '2016-03-15 13:00:00-05:00', ], tz='UTC').tz_convert('America/Chicago') + expected_df = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], index=index) + assert_frame_equal(res_df, expected_df) def test_resample_bms_2752(self): # GH2753 From 0ec3bd098d7d736b5361c3415108ec08444912d3 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 08:47:01 -0800 Subject: [PATCH 25/29] !R reformat the code --- pandas/tests/test_resample.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 170e86d5f94b8..0774277cc102f 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2721,26 +2721,29 @@ def test_resample_weekly_bug_1726(self): def test_resample_tz_aware(self): # GH 15549 - index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], tz='UTC').tz_convert('America/Chicago') df = pd.DataFrame([1, 2], index=index) res_df = df.resample('12h', closed='right', label='right').last().ffill() - index = pd.DatetimeIndex(['2016-03-09 12:00:00-06:00', - '2016-03-10 00:00:00-06:00', - '2016-03-10 12:00:00-06:00', - '2016-03-11 00:00:00-06:00', - '2016-03-11 12:00:00-06:00', - '2016-03-12 00:00:00-06:00', - '2016-03-12 12:00:00-06:00', - '2016-03-13 00:00:00-06:00', - '2016-03-13 13:00:00-05:00', - '2016-03-14 01:00:00-05:00', - '2016-03-14 13:00:00-05:00', - '2016-03-15 01:00:00-05:00', - '2016-03-15 13:00:00-05:00', ], tz='UTC').tz_convert('America/Chicago') - expected_df = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], index=index) + expected_index_values = ['2016-03-09 12:00:00-06:00', + '2016-03-10 00:00:00-06:00', + '2016-03-10 12:00:00-06:00', + '2016-03-11 00:00:00-06:00', + '2016-03-11 12:00:00-06:00', + '2016-03-12 00:00:00-06:00', + '2016-03-12 12:00:00-06:00', + '2016-03-13 00:00:00-06:00', + '2016-03-13 13:00:00-05:00', + '2016-03-14 01:00:00-05:00', + '2016-03-14 13:00:00-05:00', + '2016-03-15 01:00:00-05:00', + '2016-03-15 13:00:00-05:00'] + index = pd.DatetimeIndex(expected_index_values, + tz='UTC').tz_convert('America/Chicago') + expected_df = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0], index=index) assert_frame_equal(res_df, expected_df) def test_resample_bms_2752(self): From b523915e2d9cccea72e655ee2908ba04d714f482 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 08:53:59 -0800 Subject: [PATCH 26/29] !U add whatsnew bug fix entry --- doc/source/whatsnew/v0.21.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 0ab536f2898c7..637b143657f18 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -62,6 +62,7 @@ Bug Fixes - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) - Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) +- Bug in ``DataFrame.resample(...)`` when there is a time change, resampling frequecy is big enough (:issue:`15549`) Conversion ^^^^^^^^^^ From a1b59c43ef0dd44096bf7b9c18e14569f1d39fed Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 08:55:19 -0800 Subject: [PATCH 27/29] !U reformat the code --- pandas/tests/test_resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 0774277cc102f..ff62e98847e8b 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2724,7 +2724,8 @@ def test_resample_tz_aware(self): index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], tz='UTC').tz_convert('America/Chicago') df = pd.DataFrame([1, 2], index=index) - res_df = df.resample('12h', closed='right', label='right').last().ffill() + res_df = df.resample('12h', closed='right', + label='right').last().ffill() expected_index_values = ['2016-03-09 12:00:00-06:00', '2016-03-10 00:00:00-06:00', From aec765fbb4aecc0df03b157484fe0e9c760f13b1 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 17:45:59 -0800 Subject: [PATCH 28/29] !U reformat the code according to PR comments --- doc/source/whatsnew/v0.21.1.txt | 2 +- pandas/core/resample.py | 4 ++-- pandas/tests/test_resample.py | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 4fc677b4127f9..f7b54d10508f9 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -62,7 +62,7 @@ Bug Fixes - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) - Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) -- Bug in ``DataFrame.resample(...)`` when there is a time change, resampling frequecy is big enough (:issue:`15549`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) - Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) - Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 456625de6e8d4..6988528af415f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1143,8 +1143,8 @@ def _get_time_bins(self, ax): # GH 15549 # In edge case of tz-aware resapmling binner last index can be - # less than the last variable in data object. - # This leads to `Values falls after last bin` error + # less than the last variable in data object, this happens because of + # DST time change if len(binner) > 1 and binner[-1] < last: extra_date_range = pd.date_range(binner[-1], last + self.freq, freq=self.freq, tz=tz, diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ff62e98847e8b..d155923e08ce1 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2719,12 +2719,12 @@ def test_resample_weekly_bug_1726(self): # it works! df.resample('W-MON', closed='left', label='left').first() - def test_resample_tz_aware(self): + def test_resample_with_dst_time_change(self): # GH 15549 index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], tz='UTC').tz_convert('America/Chicago') df = pd.DataFrame([1, 2], index=index) - res_df = df.resample('12h', closed='right', + result = df.resample('12h', closed='right', label='right').last().ffill() expected_index_values = ['2016-03-09 12:00:00-06:00', @@ -2740,12 +2740,12 @@ def test_resample_tz_aware(self): '2016-03-14 13:00:00-05:00', '2016-03-15 01:00:00-05:00', '2016-03-15 13:00:00-05:00'] - index = pd.DatetimeIndex(expected_index_values, + expected_index = pd.DatetimeIndex(expected_index_values, tz='UTC').tz_convert('America/Chicago') - expected_df = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, + expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 2.0], index=index) - assert_frame_equal(res_df, expected_df) + 1.0, 1.0, 2.0], index=expected_index) + assert_frame_equal(result, expected) def test_resample_bms_2752(self): # GH2753 From 2e77f72d0ac7feea2fe734cafdf4e756ed540410 Mon Sep 17 00:00:00 2001 From: Alexander Buchkovsky Date: Mon, 20 Nov 2017 23:36:09 -0800 Subject: [PATCH 29/29] !U fix formatting --- pandas/tests/test_resample.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index d155923e08ce1..c9e40074c06ad 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2740,11 +2740,11 @@ def test_resample_with_dst_time_change(self): '2016-03-14 13:00:00-05:00', '2016-03-15 01:00:00-05:00', '2016-03-15 13:00:00-05:00'] - expected_index = pd.DatetimeIndex(expected_index_values, + index = pd.DatetimeIndex(expected_index_values, tz='UTC').tz_convert('America/Chicago') expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 2.0], index=expected_index) + 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0], index=index) assert_frame_equal(result, expected) def test_resample_bms_2752(self):