Closed
Description
I start with a dataframe (df
) containing staggered measurements (select->copy->from_clipboard()
):
CO2 CO2 CO2 CO2
2016-09-08 00:00:00 1581.0
2016-09-08 00:00:01 1415.0
2016-09-08 00:00:03 1455.0 1446.0
2016-09-08 00:00:05 1581.0
2016-09-08 00:00:06 1415.0
2016-09-08 00:00:08 1456.0 1445.0
2016-09-08 00:00:10 1581.0
2016-09-08 00:00:11 1415.0
2016-09-08 00:00:13 1456.0 1445.0
2016-09-08 00:00:15 1581.0
2016-09-08 00:00:17 1415.0
2016-09-08 00:00:18 1456.0 1445.0
2016-09-08 00:00:20 1581.0
2016-09-08 00:00:22 1415.0
2016-09-08 00:00:23 1456.0 1445.0
2016-09-08 00:00:25 1581.0
2016-09-08 00:00:27 1415.0
2016-09-08 00:00:28 1457.0 1444.0
2016-09-08 00:00:30 1581.0
2016-09-08 00:00:32 1415.0
2016-09-08 00:00:33 1456.0 1444.0
2016-09-08 00:00:35 1581.0
2016-09-08 00:00:37 1415.0
2016-09-08 00:00:38 1456.0 1444.0
2016-09-08 00:00:40 1580.0
2016-09-08 00:00:42 1415.0
2016-09-08 00:00:44 1456.0 1444.0
2016-09-08 00:00:45 1580.0
2016-09-08 00:00:47 1415.0
2016-09-08 00:00:49 1456.0 1445.0
2016-09-08 00:00:50 1579.0
2016-09-08 00:00:52 1415.0
2016-09-08 00:00:54 1456.0 1445.0
2016-09-08 00:00:55 1579.0
2016-09-08 00:00:57 1415.0
2016-09-08 00:00:59 1456.0 1445.0
When I try to aggregate measurements into 5-second intervals using df.resample('5s').median()
, I get this traceback:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in median(self)
980 try:
--> 981 return self._cython_agg_general('median')
982 except GroupByError:
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_agg_general(self, how, numeric_only)
3047 new_items, new_blocks = self._cython_agg_blocks(
-> 3048 how, numeric_only=numeric_only)
3049 return self._wrap_agged_blocks(new_items, new_blocks)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_agg_blocks(self, how, numeric_only)
3084 result, _ = self.grouper.aggregate(
-> 3085 block.values, how, axis=agg_axis)
3086
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, values, how, axis)
1821 def aggregate(self, values, how, axis=0):
-> 1822 return self._cython_operation('aggregate', values, how, axis)
1823
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_operation(self, kind, values, how, axis)
1757 func, dtype_str = self._get_cython_function(
-> 1758 kind, how, values, is_numeric)
1759 except NotImplementedError:
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _get_cython_function(self, kind, how, values, is_numeric)
1698
-> 1699 ftype = self._cython_functions[kind][how]
1700
KeyError: 'median'
During handling of the above exception, another exception occurred:
AssertionError Traceback (most recent call last)
<ipython-input-55-c17a77e187f3> in <module>()
1 df = chamber_k30['20160908':'20160908T0000']
----> 2 df.resample('5s').median()
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\resample.py in f(self, _method)
508
509 def f(self, _method=method):
--> 510 return self._downsample(_method)
511 f.__doc__ = getattr(GroupBy, method).__doc__
512 setattr(Resampler, method, f)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\resample.py in _downsample(self, how, **kwargs)
661 # we want to call the actual grouper method here
662 result = obj.groupby(
--> 663 self.grouper, axis=self.axis).aggregate(how, **kwargs)
664
665 result = self._apply_loffset(result)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, arg, *args, **kwargs)
3595 @Appender(SelectionMixin._agg_doc)
3596 def aggregate(self, arg, *args, **kwargs):
-> 3597 return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
3598
3599 agg = aggregate
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, arg, *args, **kwargs)
3112
3113 _level = kwargs.pop('_level', None)
-> 3114 result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
3115 if how is None:
3116 return result
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
426 _level = kwargs.pop('_level', None)
427 if isinstance(arg, compat.string_types):
--> 428 return getattr(self, arg)(*args, **kwargs), None
429
430 if isinstance(arg, dict):
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in median(self)
990 x = Series(x)
991 return x.median(axis=self.axis)
--> 992 return self._python_agg_general(f)
993
994 @Substitution(name='groupby')
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _python_agg_general(self, func, *args, **kwargs)
775 for name, obj in self._iterate_slices():
776 try:
--> 777 result, counts = self.grouper.agg_series(obj, f)
778 output[name] = self._try_cast(result, obj)
779 except TypeError:
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in agg_series(self, obj, func)
2063 dummy = obj[:0]
2064 grouper = lib.SeriesBinGrouper(obj, func, self.bins, dummy)
-> 2065 return grouper.get_result()
2066
2067 # ----------------------------------------------------------------------
pandas\src\reduce.pyx in pandas.lib.SeriesBinGrouper.get_result (pandas\lib.c:35367)()
pandas\src\reduce.pyx in pandas.lib.Slider.__init__ (pandas\lib.c:40335)()
AssertionError:
The other documented dispatching methods (sum
, mean
, std
, sem
, max
, min
, first
, last
) work just fine (except for ohlc
, which produces an InvalidIndexError
).
I can work around the problem like so: df.resample('5s').apply(lambda x: x.median())
. But it seems like dispatching should work here...
output of pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 23 Stepping 10, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
pandas: 0.18.1
nose: 1.3.7
pip: 8.1.2
setuptools: 23.0.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.17.1
statsmodels: 0.6.1
xarray: None
IPython: 4.2.0
sphinx: 1.3.1
patsy: 0.4.1
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.6.0
matplotlib: 1.5.1
openpyxl: 2.3.2
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.2
lxml: 3.6.0
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.13
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.40.0
pandas_datareader: None