Closed
Description
Should raise that kind='period'
is not accepted for DatetimeIndex
when resampling
Possible issue with period index resampling hanging (see @cpcloud example below)
version = 0.12.0.dev-f61d7e3
This bug also exists in 0.11.
The bug
In [20]: s.resample('T', kind='period')
-----------------
AssertionError
Traceback (most recent call last)
<ipython-input-79-c290c0578332> in <module>()
----> 1 s.resample('T', kind='period')
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/generic.py in resample(self, rule, how, axis, fill_method, closed, label, convention, kind, loffset, limit, base)
255 fill_method=fill_method, convention=convention,
256 limit=limit, base=base)
--> 257 return sampler.resample(self)
258
259 def first(self, offset):
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/tseries/resample.py in resample(self, obj)
81
82 if isinstance(axis, DatetimeIndex):
---> 83 rs = self._resample_timestamps(obj)
84 elif isinstance(axis, PeriodIndex):
85 offset = to_offset(self.freq)
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/tseries/resample.py in _resample_timestamps(self, obj)
224 # Irregular data, have to use groupby
225 grouped = obj.groupby(grouper, axis=self.axis)
--> 226 result = grouped.aggregate(self._agg_method)
227
228 if self.fill_method is not None:
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs)
1410 if isinstance(func_or_funcs, basestring):
-> 1411 return getattr(self, func_or_funcs)(*args, **kwargs)
1412
1413 if hasattr(func_or_funcs, '__iter__'):
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/groupby.py in mean(self)
356 except Exception: # pragma: no cover
357 f = lambda x: x.mean(axis=self.axis)
--> 358 return self._python_agg_general(f)
359
360 def median(self):
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/groupby.py in _python_agg_general(self, func, *args, **kwargs)
498 output[name] = self._try_cast(values[mask],result)
499
--> 500 return self._wrap_aggregated_output(output)
501
502 def _wrap_applied_output(self, *args, **kwargs):
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/groupby.py in _wrap_aggregated_output(self, output, names)
1473 return DataFrame(output, index=index, columns=names)
1474 else:
-> 1475 return Series(output, index=index, name=self.name)
1476
1477 def _wrap_applied_output(self, keys, values, not_indexed_same=False):
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/core/series.py in __new__(cls, data, index, dtype, name, copy)
494 else:
495 subarr = subarr.view(Series)
--> 496 subarr.index = index
497 subarr.name = name
498
/home/dk3810/workspace/python/pda/scripts/src/pandas/pandas/lib.so in pandas.lib.SeriesIndex.__set__ (pandas/lib.c:29775)()
AssertionError: Index length did not match values
A workaround / expected behaviour
In [81]: s.resample('T').to_period()
Out[81]:
2013-04-12 19:15 325.000000
2013-04-12 19:16 326.899994
...
2013-04-12 22:58 305.600006
2013-04-12 22:59 320.444458
Freq: T, Length: 225, dtype: float32
More information
In [83]: s
Out[83]:
2013-04-12 19:15:25 323
2013-04-12 19:15:28 NaN
...
2013-04-12 22:59:55 319
2013-04-12 22:59:56 NaN
2013-04-12 22:59:57 NaN
2013-04-12 22:59:58 NaN
2013-04-12 22:59:59 NaN
Name: aggregate, Length: 13034, dtype: float32
In [76]: s.index
Out[76]:
<class 'pandas.tseries.index.DatetimeIndex'>
[2013-04-12 19:15:25, ..., 2013-04-12 22:59:59]
Length: 13034, Freq: None, Timezone: None
In [77]: s.head()
Out[77]:
2013-04-12 19:15:25 323
2013-04-12 19:15:28 NaN
2013-04-12 19:15:29 NaN
2013-04-12 19:15:30 NaN
2013-04-12 19:15:31 327
Name: aggregate, dtype: float32
In [78]: s.resample('T')
Out[78]:
2013-04-12 19:15:00 325.000000
2013-04-12 19:16:00 326.899994
...
2013-04-12 22:58:00 305.600006
2013-04-12 22:59:00 320.444458
Freq: T, Length: 225, dtype: float32
In [80]: pd.__version__
Out[80]: '0.12.0.dev-f61d7e3'
In [84]: type(s)
Out[84]: pandas.core.series.TimeSeries
(Please let me know if you need more info! I'm using Ubuntu 13.04. It's entirely possible that this isn't a bug but instead I am doing something stupid. Oh, and let me take this opportunity to thank the Pandas dev team! Pandas is awesome!!! THANK YOU!)