Closed
Description
Here's the bug:
In [27]: idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
freq="30T", tz="Europe/London")
In [28]: series = pd.Series(np.random.randn(len(idx)), index=idx)
In [31]: series
Out[31]:
2014-10-25 22:00:00+01:00 -0.874014
2014-10-25 22:30:00+01:00 1.316258
2014-10-25 23:00:00+01:00 -1.334616
2014-10-25 23:30:00+01:00 -1.200390
2014-10-26 00:00:00+01:00 -0.341764
2014-10-26 00:30:00+01:00 1.509091
Freq: 30T, dtype: float64
In [29]: series.resample('30T')
---------------------------------------------------------------------------
AmbiguousTimeError Traceback (most recent call last)
<ipython-input-29-bb9e86068ce1> in <module>()
----> 1 series.resample('30T')
/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in resample(self, rule, how, axis, fill_method, closed, label, convention, kind, loffset, limit, base)
3195 fill_method=fill_method, convention=convention,
3196 limit=limit, base=base)
-> 3197 return sampler.resample(self).__finalize__(self)
3198
3199 def first(self, offset):
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in resample(self, obj)
83
84 if isinstance(ax, DatetimeIndex):
---> 85 rs = self._resample_timestamps()
86 elif isinstance(ax, PeriodIndex):
87 offset = to_offset(self.freq)
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in _resample_timestamps(self, kind)
273 axlabels = self.ax
274
--> 275 self._get_binner_for_resample(kind=kind)
276 grouper = self.grouper
277 binner = self.binner
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in _get_binner_for_resample(self, kind)
121 kind = self.kind
122 if kind is None or kind == 'timestamp':
--> 123 self.binner, bins, binlabels = self._get_time_bins(ax)
124 elif kind == 'timedelta':
125 self.binner, bins, binlabels = self._get_time_delta_bins(ax)
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in _get_time_bins(self, ax)
162 first, last = ax.min(), ax.max()
163 first, last = _get_range_edges(first, last, self.freq, closed=self.closed,
--> 164 base=self.base)
165 tz = ax.tz
166 binner = labels = DatetimeIndex(freq=self.freq,
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in _get_range_edges(first, last, offset, closed, base)
392 if (is_day and day_nanos % offset.nanos == 0) or not is_day:
393 return _adjust_dates_anchored(first, last, offset,
--> 394 closed=closed, base=base)
395
396 if not isinstance(offset, Tick): # and first.time() != last.time():
/usr/local/lib/python2.7/dist-packages/pandas/tseries/resample.pyc in _adjust_dates_anchored(first, last, offset, closed, base)
459
460 return (Timestamp(fresult).tz_localize(first_tzinfo),
--> 461 Timestamp(lresult).tz_localize(first_tzinfo))
462
463
pandas/tslib.pyx in pandas.tslib.Timestamp.tz_localize (pandas/tslib.c:10535)()
pandas/tslib.pyx in pandas.tslib.tz_localize_to_utc (pandas/tslib.c:50297)()
AmbiguousTimeError: Cannot infer dst time from Timestamp('2014-10-26 01:00:00'),
try using the 'ambiguous' argument
This is my hacky work-around:
In [35]: series.tz_convert('UTC').resample('30T').tz_convert('Europe/London')
Out[35]:
2014-10-25 22:00:00+01:00 -0.874014
2014-10-25 22:30:00+01:00 1.316258
2014-10-25 23:00:00+01:00 -1.334616
2014-10-25 23:30:00+01:00 -1.200390
2014-10-26 00:00:00+01:00 -0.341764
2014-10-26 00:30:00+01:00 1.509091
Freq: 30T, dtype: float64
The bug disappears if the end date of the index is beyond the DST boundary:
In [37]: idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 02:30:00",
freq="30T", tz="Europe/London")
In [38]: series = pd.Series(np.random.randn(len(idx)), index=idx)
In [39]: series.resample('30T')
Out[39]:
2014-10-25 22:00:00+01:00 -0.626598
2014-10-25 22:30:00+01:00 1.799176
2014-10-25 23:00:00+01:00 -0.388075
2014-10-25 23:30:00+01:00 0.641487
2014-10-26 00:00:00+01:00 -0.488203
2014-10-26 00:30:00+01:00 0.477301
2014-10-26 01:00:00+01:00 0.040997
2014-10-26 01:30:00+01:00 -1.996542
2014-10-26 01:00:00+00:00 -0.016655
2014-10-26 01:30:00+00:00 -1.445823
2014-10-26 02:00:00+00:00 -0.713523
2014-10-26 02:30:00+00:00 -0.122274
Freq: 30T, dtype: float64
All is fine if the start date is on a DST boundary but the end date is beyond the boundary:
In [47]: idx = pd.date_range("2014-10-26 00:30:00", "2014-10-26 02:30:00",
freq="30T", tz="Europe/London")
In [48]: series = pd.Series(np.random.randn(len(idx)), index=idx)
In [49]: series.resample('30T')
Out[49]:
2014-10-26 00:30:00+01:00 2.371051
2014-10-26 01:00:00+01:00 -0.033473
2014-10-26 01:30:00+01:00 -0.988517
2014-10-26 01:00:00+00:00 -0.664475
2014-10-26 01:30:00+00:00 0.865772
2014-10-26 02:00:00+00:00 -1.051219
2014-10-26 02:30:00+00:00 -1.478477
Freq: 30T, dtype: float64
(As always, I must say a huge THANK YOU to everyone working on Pandas; it really is a great bit of software)
In [50]: pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.9.final.0
python-bits: 64
OS: Linux
OS-release: 3.19.0-16-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_GB.UTF-8
pandas: 0.16.1
nose: 1.3.4
Cython: 0.22
numpy: 1.9.2
scipy: 0.14.0
statsmodels: 0.6.1
IPython: 3.1.0
sphinx: 1.2.3
patsy: 0.3.0
dateutil: 2.4.2
pytz: 2015.2
bottleneck: None
tables: 3.1.1
numexpr: 2.4
matplotlib: 1.4.3
openpyxl: None
xlrd: 0.9.2
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.3.2
html5lib: 0.999
httplib2: 0.9
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: 2.5.3 (dt dec pq3 ext)
Possibly related: