-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Allow multiple lambdas in Groupby.aggregate #26905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
d877ae9
41b6096
54c36a1
9c2bcf2
d549046
3ece0a5
dde7610
b2947bd
6581abc
868cc60
be712d8
5aa538d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
from pandas.core.dtypes.common import ( | ||
ensure_int64, ensure_platform_int, is_bool, is_datetimelike, | ||
is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar) | ||
from pandas.core.dtypes.inference import is_dict_like, is_list_like | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
from pandas._typing import FrameOrSeries | ||
|
@@ -47,6 +48,7 @@ | |
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) | ||
# TODO(typing) the return value on this callable should be any *scalar*. | ||
AggScalar = Union[str, Callable[..., Any]] | ||
ScalarResult = typing.TypeVar("ScalarResult") # TODO: fix & move to _typing. | ||
|
||
|
||
def whitelist_method_generator(base_class: Type[GroupBy], | ||
|
@@ -208,6 +210,8 @@ def aggregate(self, func, *args, **kwargs): | |
raise TypeError("Must provide 'func' or tuples of " | ||
"'(column, aggfunc).") | ||
|
||
func = _maybe_mangle_lambdas(func) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm I don't think you actually need to do this here, rather put it around https://github.com/pandas-dev/pandas/pull/26905/files#diff-bfee1ba9e7cb79839776fac1a57ed940L810 and pull out the change you have in https://github.com/pandas-dev/pandas/pull/26905/files#diff-bfee1ba9e7cb79839776fac1a57ed940L832 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC, the one on L810 is SeriesGroupBy.aggregate. I think it's entirely separate from NDFramGroupBy.aggregate. |
||
|
||
result, how = self._aggregate(func, _level=_level, *args, **kwargs) | ||
if how is None: | ||
return result | ||
|
@@ -830,6 +834,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs): | |
if isinstance(func_or_funcs, abc.Iterable): | ||
# Catch instances of lists / tuples | ||
# but not the class list / tuple itself. | ||
func_or_funcs = _maybe_mangle_lambdas(func_or_funcs) | ||
ret = self._aggregate_multiple_funcs(func_or_funcs, | ||
(_level or 0) + 1) | ||
if relabeling: | ||
|
@@ -1710,3 +1715,97 @@ def _normalize_keyword_aggregation(kwargs): | |
order.append((column, | ||
com.get_callable_name(aggfunc) or aggfunc)) | ||
return aggspec, columns, order | ||
|
||
|
||
def _make_lambda( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need this? Seems like this could be done a lot more succinctly with a partial, i.e.: f = functools.partial(func)
f.__name__ = "<lambda_{}>".format(i) Could be done directly in loop so one less function and has the added benefit of maintaining other function attributes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you find it strange to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea I can see where that is strange (effectively using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find the currently construction more informative (e.g. it has a doc-string and typing) |
||
func: Callable[..., ScalarResult], i: int | ||
) -> Callable[..., ScalarResult]: | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add Parameters & types |
||
Make a new function with name <lambda_i> | ||
|
||
Parameters | ||
---------- | ||
func : Callable | ||
The lambda function to call. | ||
i : int | ||
The counter to use for the name. | ||
|
||
Returns | ||
------- | ||
Callable | ||
Same as the caller but with name <lambda_i> | ||
""" | ||
def f(*args, **kwargs): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return func(*args, **kwargs) | ||
f.__name__ = "<lambda_{}>".format(i) | ||
return f | ||
|
||
|
||
def _managle_lambda_list( | ||
aggfuncs: typing.Sequence[Callable[..., ScalarResult]] | ||
) -> typing.Sequence[Callable[..., ScalarResult]]: | ||
""" | ||
Possibly mangle a list of aggfuncs. | ||
|
||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Notes | ||
----- | ||
If just one aggfunc is passed, the name will not be mangeld. | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
if len(aggfuncs) <= 1: | ||
# don't mangle for .agg([lambda x: .]) | ||
return aggfuncs | ||
i = 0 | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mangled_aggfuncs = [] | ||
for aggfunc in aggfuncs: | ||
if com.get_callable_name(aggfunc) == "<lambda>": | ||
aggfunc = _make_lambda(aggfunc, i) | ||
i += 1 | ||
mangled_aggfuncs.append(aggfunc) | ||
|
||
return mangled_aggfuncs | ||
|
||
|
||
def _maybe_mangle_lambdas(agg_spec): | ||
""" | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Make new lambdas with unique names. | ||
|
||
Parameters | ||
---------- | ||
agg_spec : Any | ||
An argument to NDFrameGroupBy.agg. | ||
Non-dict-like `agg_spec` are pass through as is. | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
For dict-like `agg_spec` a new spec is returned | ||
with name-mangled lambdas. | ||
|
||
Returns | ||
------- | ||
mangled : Any | ||
Same type as the input. | ||
|
||
Examples | ||
-------- | ||
>>> _maybe_mangle_lambdas('sum') | ||
'sum' | ||
|
||
>>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP | ||
[<function __main__.<lambda>()>, | ||
<function pandas...._make_lambda.<locals>.f(*args, **kwargs)>] | ||
""" | ||
is_dict = is_dict_like(agg_spec) | ||
if not (is_dict or is_list_like(agg_spec)): | ||
return agg_spec | ||
mangled_aggspec = type(agg_spec)() # dict or OrderdDict | ||
|
||
if is_dict: | ||
for key in agg_spec: | ||
aggfuncs = agg_spec[key] | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): | ||
mangled_aggfuncs = _managle_lambda_list(aggfuncs) | ||
else: | ||
mangled_aggfuncs = aggfuncs | ||
|
||
mangled_aggspec[key] = mangled_aggfuncs or aggfuncs | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
mangled_aggspec = _managle_lambda_list(agg_spec) | ||
|
||
return mangled_aggspec |
Uh oh!
There was an error while loading. Please reload this page.