Skip to content

BUG: KeyError from resample().median() with duplicate column names #14233

Closed
@patricktokeeffe

Description

@patricktokeeffe

I start with a dataframe (df) containing staggered measurements (select->copy->from_clipboard()):

    CO2 CO2 CO2 CO2
2016-09-08 00:00:00         1581.0  
2016-09-08 00:00:01 1415.0          
2016-09-08 00:00:03     1455.0      1446.0
2016-09-08 00:00:05         1581.0  
2016-09-08 00:00:06 1415.0          
2016-09-08 00:00:08     1456.0      1445.0
2016-09-08 00:00:10         1581.0  
2016-09-08 00:00:11 1415.0          
2016-09-08 00:00:13     1456.0      1445.0
2016-09-08 00:00:15         1581.0  
2016-09-08 00:00:17 1415.0          
2016-09-08 00:00:18     1456.0      1445.0
2016-09-08 00:00:20         1581.0  
2016-09-08 00:00:22 1415.0          
2016-09-08 00:00:23     1456.0      1445.0
2016-09-08 00:00:25         1581.0  
2016-09-08 00:00:27 1415.0          
2016-09-08 00:00:28     1457.0      1444.0
2016-09-08 00:00:30         1581.0  
2016-09-08 00:00:32 1415.0          
2016-09-08 00:00:33     1456.0      1444.0
2016-09-08 00:00:35         1581.0  
2016-09-08 00:00:37 1415.0          
2016-09-08 00:00:38     1456.0      1444.0
2016-09-08 00:00:40         1580.0  
2016-09-08 00:00:42 1415.0          
2016-09-08 00:00:44     1456.0      1444.0
2016-09-08 00:00:45         1580.0  
2016-09-08 00:00:47 1415.0          
2016-09-08 00:00:49     1456.0      1445.0
2016-09-08 00:00:50         1579.0  
2016-09-08 00:00:52 1415.0          
2016-09-08 00:00:54     1456.0      1445.0
2016-09-08 00:00:55         1579.0  
2016-09-08 00:00:57 1415.0          
2016-09-08 00:00:59     1456.0      1445.0

When I try to aggregate measurements into 5-second intervals using df.resample('5s').median(), I get this traceback:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in median(self)
    980         try:
--> 981             return self._cython_agg_general('median')
    982         except GroupByError:

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_agg_general(self, how, numeric_only)
   3047         new_items, new_blocks = self._cython_agg_blocks(
-> 3048             how, numeric_only=numeric_only)
   3049         return self._wrap_agged_blocks(new_items, new_blocks)

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_agg_blocks(self, how, numeric_only)
   3084             result, _ = self.grouper.aggregate(
-> 3085                 block.values, how, axis=agg_axis)
   3086 

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, values, how, axis)
   1821     def aggregate(self, values, how, axis=0):
-> 1822         return self._cython_operation('aggregate', values, how, axis)
   1823 

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _cython_operation(self, kind, values, how, axis)
   1757             func, dtype_str = self._get_cython_function(
-> 1758                 kind, how, values, is_numeric)
   1759         except NotImplementedError:

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _get_cython_function(self, kind, how, values, is_numeric)
   1698 
-> 1699         ftype = self._cython_functions[kind][how]
   1700 

KeyError: 'median'

During handling of the above exception, another exception occurred:

AssertionError                            Traceback (most recent call last)
<ipython-input-55-c17a77e187f3> in <module>()
      1 df = chamber_k30['20160908':'20160908T0000']
----> 2 df.resample('5s').median()

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\resample.py in f(self, _method)
    508 
    509     def f(self, _method=method):
--> 510         return self._downsample(_method)
    511     f.__doc__ = getattr(GroupBy, method).__doc__
    512     setattr(Resampler, method, f)

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\resample.py in _downsample(self, how, **kwargs)
    661         # we want to call the actual grouper method here
    662         result = obj.groupby(
--> 663             self.grouper, axis=self.axis).aggregate(how, **kwargs)
    664 
    665         result = self._apply_loffset(result)

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, arg, *args, **kwargs)
   3595     @Appender(SelectionMixin._agg_doc)
   3596     def aggregate(self, arg, *args, **kwargs):
-> 3597         return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
   3598 
   3599     agg = aggregate

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in aggregate(self, arg, *args, **kwargs)
   3112 
   3113         _level = kwargs.pop('_level', None)
-> 3114         result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
   3115         if how is None:
   3116             return result

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
    426         _level = kwargs.pop('_level', None)
    427         if isinstance(arg, compat.string_types):
--> 428             return getattr(self, arg)(*args, **kwargs), None
    429 
    430         if isinstance(arg, dict):

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in median(self)
    990                     x = Series(x)
    991                 return x.median(axis=self.axis)
--> 992             return self._python_agg_general(f)
    993 
    994     @Substitution(name='groupby')

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in _python_agg_general(self, func, *args, **kwargs)
    775         for name, obj in self._iterate_slices():
    776             try:
--> 777                 result, counts = self.grouper.agg_series(obj, f)
    778                 output[name] = self._try_cast(result, obj)
    779             except TypeError:

C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\groupby.py in agg_series(self, obj, func)
   2063         dummy = obj[:0]
   2064         grouper = lib.SeriesBinGrouper(obj, func, self.bins, dummy)
-> 2065         return grouper.get_result()
   2066 
   2067     # ----------------------------------------------------------------------

pandas\src\reduce.pyx in pandas.lib.SeriesBinGrouper.get_result (pandas\lib.c:35367)()

pandas\src\reduce.pyx in pandas.lib.Slider.__init__ (pandas\lib.c:40335)()

AssertionError: 

The other documented dispatching methods (sum, mean, std, sem, max, min, first, last) work just fine (except for ohlc, which produces an InvalidIndexError).

I can work around the problem like so: df.resample('5s').apply(lambda x: x.median()). But it seems like dispatching should work here...

output of pd.show_versions()

INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 23 Stepping 10, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None

pandas: 0.18.1
nose: 1.3.7
pip: 8.1.2
setuptools: 23.0.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.17.1
statsmodels: 0.6.1
xarray: None
IPython: 4.2.0
sphinx: 1.3.1
patsy: 0.4.1
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.6.0
matplotlib: 1.5.1
openpyxl: 2.3.2
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.2
lxml: 3.6.0
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.13
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.40.0
pandas_datareader: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions