Description
Code Sample, a copy-pastable example if possible
In [2]: df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
columns=pd.MultiIndex.from_arrays([['a', 'b', 'b', 'c'],
[1, 1, 2, 2]]))
In [3]: df.groupby([('a', 1)])['b'].mean()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5083)()
138 try:
--> 139 return self.mapping.get_item(val)
140 except (TypeError, ValueError):
/home/nobackup/repo/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item (pandas/_libs/hashtable.c:14478)()
810
--> 811 cpdef get_item(self, int64_t val):
812 cdef khiter_t k
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2521 try:
-> 2522 return self._engine.get_loc(key)
2523 except KeyError:
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5237)()
116
--> 117 cpdef get_loc(self, object val):
118 if is_definitely_invalid_key(val):
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5167)()
140 except (TypeError, ValueError):
--> 141 raise KeyError(val)
142
KeyError: 'b'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5083)()
138 try:
--> 139 return self.mapping.get_item(val)
140 except (TypeError, ValueError):
/home/nobackup/repo/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item (pandas/_libs/hashtable.c:14478)()
810
--> 811 cpdef get_item(self, int64_t val):
812 cdef khiter_t k
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/core/groupby.py in mean(self, *args, **kwargs)
1041 try:
-> 1042 return self._cython_agg_general('mean', **kwargs)
1043 except GroupByError:
/home/nobackup/repo/pandas/pandas/core/groupby.py in _cython_agg_general(self, how, alt, numeric_only)
828 output = {}
--> 829 for name, obj in self._iterate_slices():
830 is_numeric = is_numeric_dtype(obj.dtype)
/home/nobackup/repo/pandas/pandas/core/groupby.py in _iterate_slices(self)
732 def _iterate_slices(self):
--> 733 yield self._selection_name, self._selected_obj
734
/home/nobackup/repo/pandas/pandas/_libs/properties.pyx in pandas._libs.properties.cache_readonly.__get__ (pandas/_libs/properties.c:1604)()
37 else:
---> 38 val = self.func(obj)
39 PyDict_SetItem(cache, self.name, val)
/home/nobackup/repo/pandas/pandas/core/groupby.py in _selected_obj(self)
494 else:
--> 495 return self.obj[self._selection]
496
/home/nobackup/repo/pandas/pandas/core/frame.py in __getitem__(self, key)
2136 else:
-> 2137 return self._getitem_column(key)
2138
/home/nobackup/repo/pandas/pandas/core/frame.py in _getitem_column(self, key)
2143 if self.columns.is_unique:
-> 2144 return self._get_item_cache(key)
2145
/home/nobackup/repo/pandas/pandas/core/generic.py in _get_item_cache(self, item)
1886 if res is None:
-> 1887 values = self._data.get(item)
1888 res = self._box_item_values(item, values)
/home/nobackup/repo/pandas/pandas/core/internals.py in get(self, item, fastpath)
3837 if not isna(item):
-> 3838 loc = self.items.get_loc(item)
3839 else:
/home/nobackup/repo/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2523 except KeyError:
-> 2524 return self._engine.get_loc(self._maybe_cast_indexer(key))
2525
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5237)()
116
--> 117 cpdef get_loc(self, object val):
118 if is_definitely_invalid_key(val):
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5167)()
140 except (TypeError, ValueError):
--> 141 raise KeyError(val)
142
KeyError: 'b'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5083)()
138 try:
--> 139 return self.mapping.get_item(val)
140 except (TypeError, ValueError):
/home/nobackup/repo/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item (pandas/_libs/hashtable.c:14478)()
810
--> 811 cpdef get_item(self, int64_t val):
812 cdef khiter_t k
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2521 try:
-> 2522 return self._engine.get_loc(key)
2523 except KeyError:
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5237)()
116
--> 117 cpdef get_loc(self, object val):
118 if is_definitely_invalid_key(val):
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5167)()
140 except (TypeError, ValueError):
--> 141 raise KeyError(val)
142
KeyError: 'b'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5083)()
138 try:
--> 139 return self.mapping.get_item(val)
140 except (TypeError, ValueError):
/home/nobackup/repo/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item (pandas/_libs/hashtable.c:14478)()
810
--> 811 cpdef get_item(self, int64_t val):
812 cdef khiter_t k
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-3-2998a9dcb677> in <module>()
----> 1 df.groupby([('a', 1)])['b'].mean()
/home/nobackup/repo/pandas/pandas/core/groupby.py in mean(self, *args, **kwargs)
1046 self._set_group_selection()
1047 f = lambda x: x.mean(axis=self.axis, **kwargs)
-> 1048 return self._python_agg_general(f)
1049
1050 @Substitution(name='groupby')
/home/nobackup/repo/pandas/pandas/core/groupby.py in _python_agg_general(self, func, *args, **kwargs)
849 # iterate through "columns" ex exclusions to populate output dict
850 output = {}
--> 851 for name, obj in self._iterate_slices():
852 try:
853 result, counts = self.grouper.agg_series(obj, f)
/home/nobackup/repo/pandas/pandas/core/groupby.py in _iterate_slices(self)
731
732 def _iterate_slices(self):
--> 733 yield self._selection_name, self._selected_obj
734
735 def transform(self, func, *args, **kwargs):
/home/nobackup/repo/pandas/pandas/_libs/properties.pyx in pandas._libs.properties.cache_readonly.__get__ (pandas/_libs/properties.c:1604)()
36 val = <object> PyDict_GetItem(cache, self.name)
37 else:
---> 38 val = self.func(obj)
39 PyDict_SetItem(cache, self.name, val)
40 return val
/home/nobackup/repo/pandas/pandas/core/groupby.py in _selected_obj(self)
493 return self.obj
494 else:
--> 495 return self.obj[self._selection]
496
497 def _reset_group_selection(self):
/home/nobackup/repo/pandas/pandas/core/frame.py in __getitem__(self, key)
2135 return self._getitem_multilevel(key)
2136 else:
-> 2137 return self._getitem_column(key)
2138
2139 def _getitem_column(self, key):
/home/nobackup/repo/pandas/pandas/core/frame.py in _getitem_column(self, key)
2142 # get column
2143 if self.columns.is_unique:
-> 2144 return self._get_item_cache(key)
2145
2146 # duplicate columns & possible reduce dimensionality
/home/nobackup/repo/pandas/pandas/core/generic.py in _get_item_cache(self, item)
1885 res = cache.get(item)
1886 if res is None:
-> 1887 values = self._data.get(item)
1888 res = self._box_item_values(item, values)
1889 cache[item] = res
/home/nobackup/repo/pandas/pandas/core/internals.py in get(self, item, fastpath)
3836
3837 if not isna(item):
-> 3838 loc = self.items.get_loc(item)
3839 else:
3840 indexer = np.arange(len(self.items))[isna(self.items)]
/home/nobackup/repo/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2522 return self._engine.get_loc(key)
2523 except KeyError:
-> 2524 return self._engine.get_loc(self._maybe_cast_indexer(key))
2525
2526 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5237)()
115 util.set_value_at(arr, loc, value)
116
--> 117 cpdef get_loc(self, object val):
118 if is_definitely_invalid_key(val):
119 raise TypeError("'{val}' is an invalid key".format(val=val))
/home/nobackup/repo/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5167)()
139 return self.mapping.get_item(val)
140 except (TypeError, ValueError):
--> 141 raise KeyError(val)
142
143 cdef inline _get_loc_duplicates(self, object val):
KeyError: 'b'
(most horrible stack trace I have ever seen in pandas!)
Problem description
DataFrameGroupBy.__getitem__
is expected to parallel DataFrame.__getitem__
, so it should support partial keys on MultiIndex
columns.
Expected Output
In [5]: df.groupby([('a', 1)])[[('b', 1), ('b', 2)]].mean()
Out[5]:
b
1 2
(a, 1)
1 3.0 2.5
3 4.0 5.0
Output of pd.show_versions()
INSTALLED VERSIONS
commit: b539298
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.9.0-3-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.UTF-8
LOCALE: it_IT.UTF-8
pandas: 0.21.0rc1+30.gb539298ca
pytest: 3.0.6
pip: 9.0.1
setuptools: None
Cython: 0.25.2
numpy: 1.12.1
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 5.1.0.dev
sphinx: 1.5.6
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: 1.2.1
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: None
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: None
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.2.1