-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: Lambda function returns KeyError in DataFrameGroupBy.agg #27921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
7e461a1
1314059
8bcb313
e313083
197c879
b518b2f
c817df2
74d4684
7df87cb
d5e52cb
5be9c54
29d8348
275a039
5dd61da
b5b44e9
473800f
bad1d72
60e426a
943437a
a3ba061
a6719f1
aabfcd2
0950bc4
c992fec
fe33469
ace9035
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -268,7 +268,9 @@ def aggregate(self, func, *args, **kwargs): | |
result.index = np.arange(len(result)) | ||
|
||
if relabeling: | ||
result = result[order] | ||
|
||
# used reordered index of columns | ||
result = result.iloc[:, order] | ||
result.columns = columns | ||
|
||
return result._convert(datetime=True) | ||
|
@@ -1731,8 +1733,8 @@ def _normalize_keyword_aggregation(kwargs): | |
The transformed kwargs. | ||
columns : List[str] | ||
The user-provided keys. | ||
order : List[Tuple[str, str]] | ||
Pairs of the input and output column names. | ||
col_idx_order : List[int] | ||
List of columns indices. | ||
|
||
Examples | ||
-------- | ||
|
@@ -1759,7 +1761,39 @@ def _normalize_keyword_aggregation(kwargs): | |
else: | ||
aggspec[column] = [aggfunc] | ||
order.append((column, com.get_callable_name(aggfunc) or aggfunc)) | ||
return aggspec, columns, order | ||
|
||
# uniquify aggfunc name if duplicated in order list | ||
uniquified_order = _uniquify_aggfunc(order) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you verify that the output example in the docstring still passes? Can you also add a docstring example were this new code is hit in https://github.com/pandas-dev/pandas/pull/27921/files#diff-bfee1ba9e7cb79839776fac1a57ed940R1742? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, verified. And added tests for |
||
|
||
# GH 25719, due to aggspec will change the order of assigned columns in aggregation | ||
# uniquified_aggspec will store uniquified order list and will compare it with order | ||
# based on index | ||
aggspec_order = [ | ||
(column, com.get_callable_name(aggfunc) or aggfunc) | ||
for column, aggfuncs in aggspec.items() | ||
for aggfunc in aggfuncs | ||
] | ||
uniquified_aggspec = _uniquify_aggfunc(aggspec_order) | ||
|
||
# get the new indice of columns by comparison | ||
col_idx_order = [uniquified_aggspec.index(o) for o in uniquified_order] | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return aggspec, columns, col_idx_order | ||
|
||
|
||
def _uniquify_aggfunc(seq): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Uniquify aggfunc name of the pairs in the order list | ||
|
||
Examples: | ||
-------- | ||
>>> _uniquify_aggfunc([('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')]) | ||
[('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')] | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
return [ | ||
(pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) | ||
if seq.count(pair) > 1 | ||
else pair | ||
for i, pair in enumerate(seq) | ||
] | ||
|
||
|
||
# TODO: Can't use, because mypy doesn't like us setting __name__ | ||
|
Uh oh!
There was an error while loading. Please reload this page.