-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: Support nested renaming / selection #26399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
aa43cf6
8bd8e31
10c8f40
2e52653
06a86ec
9e636c1
14f66e6
2c3d11a
cdf9373
2c544f0
c0cd575
386cca1
2f6e1dc
6d8a18a
6c1f567
bcc63f5
769a909
1da90d4
a028f48
0ddd51f
42e69a1
769d7d3
1cee0e2
6369eb1
02d7169
eb9ba8f
7df14d7
cf8db51
9501e82
d65afe4
25dca1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -144,8 +144,30 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, | |
return new_items, new_blocks | ||
|
||
def aggregate(self, func, *args, **kwargs): | ||
|
||
_level = kwargs.pop('_level', None) | ||
|
||
relabeling = func is None and _is_multi_agg_with_relabel(**kwargs) | ||
if relabeling: | ||
# Normalize the aggregation functions as Dict[column, List[func]], | ||
# process normally, then fixup the names. | ||
# TODO(Py35): When we drop python 3.5, change this to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm couldn't we just do this now since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't recall if I checked, but I thought we needed this to ensure that the order of the arguments is respected in |
||
# defaultdict(list) | ||
func = OrderedDict() | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
order = [] | ||
columns, pairs = list(zip(*kwargs.items())) | ||
|
||
for i, (name, (column, aggfunc)) in enumerate(zip(columns, pairs)): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if column in func: | ||
func[column].append(aggfunc) | ||
else: | ||
func[column] = [aggfunc] | ||
order.append((column, _get_agg_name(aggfunc))) | ||
kwargs = {} | ||
elif func is None: | ||
# nicer error message | ||
raise TypeError("Must provide 'func' or tuples of " | ||
"'(column, aggfunc).") | ||
|
||
result, how = self._aggregate(func, _level=_level, *args, **kwargs) | ||
if how is None: | ||
return result | ||
|
@@ -179,6 +201,10 @@ def aggregate(self, func, *args, **kwargs): | |
self._insert_inaxis_grouper_inplace(result) | ||
result.index = np.arange(len(result)) | ||
|
||
if relabeling: | ||
result = result[order] | ||
result.columns = columns | ||
|
||
return result._convert(datetime=True) | ||
|
||
agg = aggregate | ||
|
@@ -791,11 +817,8 @@ def _aggregate_multiple_funcs(self, arg, _level): | |
# list of functions / function names | ||
columns = [] | ||
for f in arg: | ||
if isinstance(f, str): | ||
columns.append(f) | ||
else: | ||
# protect against callables without names | ||
columns.append(com.get_callable_name(f)) | ||
columns.append(_get_agg_name(f)) | ||
|
||
arg = zip(columns, arg) | ||
|
||
results = OrderedDict() | ||
|
@@ -1292,6 +1315,16 @@ class DataFrameGroupBy(NDFrameGroupBy): | |
A | ||
1 1 2 0.590716 | ||
2 3 4 0.704907 | ||
|
||
To control the output names with different aggregations | ||
per column, pass tuples of ``(column, aggfunc))`` as kwargs | ||
|
||
>>> df.groupby("A").agg(b_min=("B", "min"), c_sum=("C", "sum")) | ||
>>> | ||
b_min c_sum | ||
A | ||
1 1 0.825627 | ||
2 3 2.218618 | ||
""") | ||
|
||
@Substitution(see_also=_agg_see_also_doc, | ||
|
@@ -1300,7 +1333,7 @@ class DataFrameGroupBy(NDFrameGroupBy): | |
klass='DataFrame', | ||
axis='') | ||
@Appender(_shared_docs['aggregate']) | ||
def aggregate(self, arg, *args, **kwargs): | ||
def aggregate(self, arg=None, *args, **kwargs): | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return super().aggregate(arg, *args, **kwargs) | ||
|
||
agg = aggregate | ||
|
@@ -1573,3 +1606,48 @@ def groupby_series(obj, col=None): | |
return results | ||
|
||
boxplot = boxplot_frame_groupby | ||
|
||
|
||
def _is_multi_agg_with_relabel(**kwargs): | ||
""" | ||
Check whether the kwargs pass to .agg look like multi-agg with relabling. | ||
|
||
Parameters | ||
---------- | ||
**kwargs : dict | ||
|
||
Returns | ||
------- | ||
bool | ||
|
||
Examples | ||
-------- | ||
>>> _is_multi_agg_with_relabel(a='max') | ||
False | ||
>>> _is_multi_agg_with_relabel(a_max=('a', 'max'), | ||
... a_min=('a', 'min')) | ||
True | ||
>>> _is_multi_agg_with_relabel() | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
return all( | ||
isinstance(v, tuple) and len(v) == 2 | ||
for v in kwargs.values() | ||
) and kwargs | ||
|
||
|
||
def _get_agg_name(arg): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
|
||
Parameters | ||
---------- | ||
arg | ||
|
||
Returns | ||
------- | ||
|
||
""" | ||
if isinstance(arg, str): | ||
return arg | ||
else: | ||
# protect against callables without names | ||
return com.get_callable_name(arg) |
Uh oh!
There was an error while loading. Please reload this page.