Description
Code Sample, a copy-pastable example if possible
In [1]: import pandas as pd
In [2]: pd.__version__
Out[2]: '0.23.0rc2'
In [3]: dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific')
...: for i in range(1, 5)]
...: df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates})
...: grouped = df.groupby('A')
...:
In [4]: df
Out[4]:
A B
0 a 2016-01-01 12:00:00-08:00
1 b 2016-01-02 12:00:00-08:00
2 a 2016-01-03 12:00:00-08:00
3 b 2016-01-04 12:00:00-08:00
In [5]: grouped.apply(lambda x: x.iloc[0])[0]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3062 try:
-> 3063 return self._engine.get_loc(key)
3064 except KeyError:
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5720)()
138
--> 139 cpdef get_loc(self, object val):
140 if is_definitely_invalid_key(val):
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5566)()
160 try:
--> 161 return self.mapping.get_item(val)
162 except (TypeError, ValueError):
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:22442)()
1491
-> 1492 cpdef get_item(self, object val):
1493 cdef khiter_t k
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:22396)()
1499 else:
-> 1500 raise KeyError(val)
1501
KeyError: 0
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-5-2b16555d6e05> in <module>()
----> 1 grouped.apply(lambda x: x.iloc[0])[0]
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\frame.py in __getitem__(self, key)
2685 return self._getitem_multilevel(key)
2686 else:
-> 2687 return self._getitem_column(key)
2688
2689 def _getitem_column(self, key):
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\frame.py in _getitem_column(self, key)
2692 # get column
2693 if self.columns.is_unique:
-> 2694 return self._get_item_cache(key)
2695
2696 # duplicate columns & possible reduce dimensionality
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\generic.py in _get_item_cache(self, item)
2485 res = cache.get(item)
2486 if res is None:
-> 2487 values = self._data.get(item)
2488 res = self._box_item_values(item, values)
2489 cache[item] = res
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\internals.py in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3063 return self._engine.get_loc(key)
3064 except KeyError:
-> 3065 return self._engine.get_loc(self._maybe_cast_indexer(key))
3066
3067 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5720)()
137 util.set_value_at(arr, loc, value)
138
--> 139 cpdef get_loc(self, object val):
140 if is_definitely_invalid_key(val):
141 raise TypeError("'{val}' is an invalid key".format(val=val))
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5566)()
159
160 try:
--> 161 return self.mapping.get_item(val)
162 except (TypeError, ValueError):
163 raise KeyError(val)
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:22442)()
1490 sizeof(uint32_t)) # flags
1491
-> 1492 cpdef get_item(self, object val):
1493 cdef khiter_t k
1494 if val != val or val is None:
C:\EclipseWorkspaces\LiClipseWorkspace\pandas-dev\pandas36\pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:22396)()
1498 return self.table.vals[k]
1499 else:
-> 1500 raise KeyError(val)
1501
1502 cpdef set_item(self, object key, Py_ssize_t val):
KeyError: 0
Problem description
Related to #20949 (and moved from there at request).
Note that if you do the following, it works:
In [6]: grouped.nth(0)['B'].iloc[0]
Out[6]: Timestamp('2016-01-01 12:00:00-0800', tz='US/Pacific')
In [7]: grouped.apply(lambda x: x.iloc[0])[0]
Out[7]: Timestamp('2016-01-01 12:00:00-0800', tz='US/Pacific')
So doing one operation (in this case nth
) prior to the apply
then makes the apply
work.
Expected Output
In [5]: grouped.apply(lambda x: x.iloc[0])[0]
Out[5]: Timestamp('2016-01-01 12:00:00-0800', tz='US/Pacific')
Output of pd.show_versions()
INSTALLED VERSIONS
commit: b02c69a
python: 3.6.4.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
pandas: 0.23.0rc2
pytest: 3.4.0
pip: 9.0.1
setuptools: 38.5.1
Cython: 0.25.1
numpy: 1.14.1
scipy: 1.0.0
pyarrow: 0.8.0
xarray: None
IPython: 6.2.1
sphinx: 1.7.1
patsy: 0.5.0
dateutil: 2.6.1
pytz: 2018.3
blosc: 1.5.1
bottleneck: 1.2.1
tables: 3.4.2
numexpr: 2.6.4
feather: None
matplotlib: 2.2.0
openpyxl: 2.5.0
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.2
lxml: 4.1.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.5
pymysql: 0.8.0
psycopg2: None
jinja2: 2.10
s3fs: 0.1.3
fastparquet: None
pandas_gbq: None
pandas_datareader: None