From 21bc1da8ce70573f01c1f8eeca48ff9b236e8cc5 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Thu, 28 May 2020 23:10:20 +0530 Subject: [PATCH 01/24] 'TypeError is raised when tuple or list or NamedAgg is given in named aggregation --- pandas/core/groupby/generic.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea4b6f4e65341..390c03045d476 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -234,9 +234,15 @@ def aggregate( relabeling = func is None columns = None no_arg_message = "Must provide 'func' or named aggregation **kwargs." + tuple_given_message = "'func' is expected but recieved {} in **kwargs." if relabeling: columns = list(kwargs) - func = [kwargs[col] for col in columns] + func = [] + for col in columns: + if isinstance(kwargs[col],(list,NamedAgg,tuple)): + raise TypeError(tuple_given_message.format(type(kwargs[col]))) + func.append(kwargs[col]) + kwargs = {} if not columns: raise TypeError(no_arg_message) @@ -298,11 +304,7 @@ def _aggregate_multiple_funcs(self, arg): columns = list(arg.keys()) arg = arg.items() - elif any(isinstance(x, (tuple, list)) for x in arg): - arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] - # indicated column order - columns = next(zip(*arg)) else: # list of functions / function names columns = [] From 7233382de9a4f84d1cc49c4fee1eb801ebd96165 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Thu, 28 May 2020 23:22:51 +0530 Subject: [PATCH 02/24] 'BUG: #GH34422 raises TypeError when parameters to named aggregation are tuple, list or NamedAgg' --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 390c03045d476..2b7439279fc9a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -240,7 +240,7 @@ def aggregate( func = [] for col in columns: if isinstance(kwargs[col],(list,NamedAgg,tuple)): - raise TypeError(tuple_given_message.format(type(kwargs[col]))) + raise TypeError(tuple_given_message.format(type(kwargs[col]).__name__)) func.append(kwargs[col]) kwargs = {} From 28c74030b69066eff2c736b63ea7ee24e286ed76 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 01:03:41 +0530 Subject: [PATCH 03/24] 'Fixed PEP-8 issues' --- pandas/core/groupby/generic.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2b7439279fc9a..89f1d364396bd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -239,10 +239,10 @@ def aggregate( columns = list(kwargs) func = [] for col in columns: - if isinstance(kwargs[col],(list,NamedAgg,tuple)): - raise TypeError(tuple_given_message.format(type(kwargs[col]).__name__)) + if isinstance(kwargs[col], (list, NamedAgg, tuple)): + raise TypeError(tuple_given_message.\ + format(type(kwargs[col]).__name__)) func.append(kwargs[col]) - kwargs = {} if not columns: raise TypeError(no_arg_message) @@ -290,7 +290,6 @@ def aggregate( ret = concat(ret, axis=1) return ret - agg = aggregate def _aggregate_multiple_funcs(self, arg): From a9ea9d1e5c814a9277eb6d85a6631946b6f3cb16 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 01:29:57 +0530 Subject: [PATCH 04/24] 'Fixed PEP-8 issues' --- pandas/core/groupby/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 89f1d364396bd..310863ae7b81f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -240,8 +240,9 @@ def aggregate( func = [] for col in columns: if isinstance(kwargs[col], (list, NamedAgg, tuple)): - raise TypeError(tuple_given_message.\ - format(type(kwargs[col]).__name__)) + raise TypeError( + tuple_given_message.format(type(kwargs[col]).__name__) + ) func.append(kwargs[col]) kwargs = {} if not columns: From b57f32198f6775599794fde60ef2f975eb32f398 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 01:35:17 +0530 Subject: [PATCH 05/24] 'Fixed PEP-8 issue' --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 310863ae7b81f..7e5894d07c633 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -242,7 +242,7 @@ def aggregate( if isinstance(kwargs[col], (list, NamedAgg, tuple)): raise TypeError( tuple_given_message.format(type(kwargs[col]).__name__) - ) + ) func.append(kwargs[col]) kwargs = {} if not columns: From fa035772727cd99631bd8f2045fb7717d9932873 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 11:22:13 +0530 Subject: [PATCH 06/24] 'Added tests to test_aggregate.py' --- .../tests/groupby/aggregate/test_aggregate.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index d4b061594c364..8c70e8b8d274d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -511,6 +511,33 @@ def test_mangled(self): expected = pd.DataFrame({"a": [0, 0], "b": [1, 1]}) tm.assert_frame_equal(result, expected) + def test_agg_namedtuple(self): + #GH34422 + s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) + msg = "'func' is expected but recieved NamedAgg in **kwargs." + with pytest.raises(TypeError, match=msg): + s.groupby(s.values).agg( + a=pd.NamedAgg(column='anything', aggfunc='min') + ) + + def test_agg_with_tuple(self): + #GH34422 + s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) + msg = "'func' is expected but recieved tuple in **kwargs." + with pytest.raises(TypeError, match=msg): + s.groupby(s.values).agg( + a=('anything', 'min') + ) + + def test_agg_with_list(self): + #GH34422 + s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) + msg = "'func' is expected but recieved list in **kwargs." + with pytest.raises(TypeError, match=msg): + s.groupby(s.values).agg( + a=['anything', 'min'] + ) + class TestNamedAggregationDataFrame: def test_agg_relabel(self): From c7a7bdd14a013b35a6dfa581adf6fe06c2fac3ab Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 12:11:27 +0530 Subject: [PATCH 07/24] 'Added tests in test_aggregate.py' --- pandas/tests/groupby/aggregate/test_aggregate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8c70e8b8d274d..909e9a07746d5 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -512,25 +512,25 @@ def test_mangled(self): tm.assert_frame_equal(result, expected) def test_agg_namedtuple(self): - #GH34422 + # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "'func' is expected but recieved NamedAgg in **kwargs." with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=pd.NamedAgg(column='anything', aggfunc='min') - ) + ) def test_agg_with_tuple(self): - #GH34422 + # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "'func' is expected but recieved tuple in **kwargs." with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=('anything', 'min') - ) + ) def test_agg_with_list(self): - #GH34422 + # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "'func' is expected but recieved list in **kwargs." with pytest.raises(TypeError, match=msg): From 85f1218f26fb8dd46ac46b25725c14f72b503fac Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 13:09:45 +0530 Subject: [PATCH 08/24] 'Fixed bug' --- pandas/core/groupby/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7e5894d07c633..00835fe3fefd2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -304,7 +304,11 @@ def _aggregate_multiple_funcs(self, arg): columns = list(arg.keys()) arg = arg.items() + elif anyisinstance(x, (tuple, list)) for x in arg): + arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] + # indicated column order + columns = next(zip(*arg)) else: # list of functions / function names columns = [] From 02b8139388a40197b1e99a90319887d42bd564ea Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 13:35:46 +0530 Subject: [PATCH 09/24] 'Fixed bug' --- pandas/core/groupby/generic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 00835fe3fefd2..8e5ecedab91f6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -307,8 +307,6 @@ def _aggregate_multiple_funcs(self, arg): elif anyisinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] - # indicated column order - columns = next(zip(*arg)) else: # list of functions / function names columns = [] From 0042811dcc32c40ab02916e45c6218a2beb7a12f Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 13:36:04 +0530 Subject: [PATCH 10/24] 'Fixed bug' --- pandas/core/groupby/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8e5ecedab91f6..5679d7a68ac9b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -304,15 +304,17 @@ def _aggregate_multiple_funcs(self, arg): columns = list(arg.keys()) arg = arg.items() - elif anyisinstance(x, (tuple, list)) for x in arg): + elif any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] + # indicated column order + columns = next(zip(*arg)) + else: # list of functions / function names columns = [] for f in arg: columns.append(com.get_callable_name(f) or f) - arg = zip(columns, arg) results = {} From e620b1e096a40feb3c5df71cf469e7a436c1ca9c Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 14:24:12 +0530 Subject: [PATCH 11/24] 'Added tests in test_aggregate.py' --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 909e9a07746d5..c58f260c94fa4 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -514,7 +514,7 @@ def test_mangled(self): def test_agg_namedtuple(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is expected but recieved NamedAgg in **kwargs." + msg = "'func' is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=pd.NamedAgg(column='anything', aggfunc='min') @@ -523,7 +523,7 @@ def test_agg_namedtuple(self): def test_agg_with_tuple(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is expected but recieved tuple in **kwargs." + msg = "'func' is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=('anything', 'min') @@ -532,7 +532,7 @@ def test_agg_with_tuple(self): def test_agg_with_list(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is expected but recieved list in **kwargs." + msg = "'func' is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=['anything', 'min'] From 884e899943b0030044dfd5f3c42dba26867a5cbd Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 15:01:46 +0530 Subject: [PATCH 12/24] Added tests in test_aggregate.py --- pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5679d7a68ac9b..1e56aa69b49cd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -234,7 +234,7 @@ def aggregate( relabeling = func is None columns = None no_arg_message = "Must provide 'func' or named aggregation **kwargs." - tuple_given_message = "'func' is expected but recieved {} in **kwargs." + tuple_given_message = "func is expected but recieved {} in **kwargs." if relabeling: columns = list(kwargs) func = [] diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c58f260c94fa4..bbd480c907525 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -514,7 +514,7 @@ def test_mangled(self): def test_agg_namedtuple(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is" + msg = "func is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=pd.NamedAgg(column='anything', aggfunc='min') @@ -523,7 +523,7 @@ def test_agg_namedtuple(self): def test_agg_with_tuple(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is" + msg = "func is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=('anything', 'min') @@ -532,7 +532,7 @@ def test_agg_with_tuple(self): def test_agg_with_list(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "'func' is" + msg = "func is" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg( a=['anything', 'min'] From b2ca7a964ea1d5a31c44c526016a4fd319a6c886 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 16:57:27 +0530 Subject: [PATCH 13/24] Fixed Linting issues --- pandas/core/groupby/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1e56aa69b49cd..778b30bbac498 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -291,6 +291,7 @@ def aggregate( ret = concat(ret, axis=1) return ret + agg = aggregate def _aggregate_multiple_funcs(self, arg): From fe1451ab9659fcc82dc154604a6f82415665f0c2 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 17:16:02 +0530 Subject: [PATCH 14/24] Fixed linting issues --- pandas/tests/groupby/aggregate/test_aggregate.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index bbd480c907525..22e7d68c35dc2 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -516,27 +516,21 @@ def test_agg_namedtuple(self): s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "func is" with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg( - a=pd.NamedAgg(column='anything', aggfunc='min') - ) + s.groupby(s.values).agg(a=pd.NamedAgg(column="anything", aggfunc="min")) def test_agg_with_tuple(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "func is" with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg( - a=('anything', 'min') - ) + s.groupby(s.values).agg(a=("anything", "min")) def test_agg_with_list(self): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "func is" with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg( - a=['anything', 'min'] - ) + s.groupby(s.values).agg(a=["anything", "min"]) class TestNamedAggregationDataFrame: From 9a88d94209a271e3390af691f0eb18acda9a0896 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 18:58:16 +0530 Subject: [PATCH 15/24] Added @pytest.mark.parametrize --- .../tests/groupby/aggregate/test_aggregate.py | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 22e7d68c35dc2..3018e0a93f121 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -511,27 +511,21 @@ def test_mangled(self): expected = pd.DataFrame({"a": [0, 0], "b": [1, 1]}) tm.assert_frame_equal(result, expected) - def test_agg_namedtuple(self): + @pytest.mark.parametrize( + "inp", + [ + pd.NamedAgg(column="anything", aggfunc="min"), + ("anything", "min"), + ["anything", "min"], + ], + ) + def test_named_agg_nametuple(self, inp): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "func is" + msg = "func is expected but recieved {}".format(type(inp).__name__) with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg(a=pd.NamedAgg(column="anything", aggfunc="min")) - def test_agg_with_tuple(self): - # GH34422 - s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "func is" - with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg(a=("anything", "min")) - - def test_agg_with_list(self): - # GH34422 - s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "func is" - with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg(a=["anything", "min"]) - class TestNamedAggregationDataFrame: def test_agg_relabel(self): From ce641c945db29c228227d2b93164bfb9d752199f Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 19:04:06 +0530 Subject: [PATCH 16/24] Added @pytest.mark.parameterize --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3018e0a93f121..a25604b8d63fa 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -524,7 +524,7 @@ def test_named_agg_nametuple(self, inp): s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) msg = "func is expected but recieved {}".format(type(inp).__name__) with pytest.raises(TypeError, match=msg): - s.groupby(s.values).agg(a=pd.NamedAgg(column="anything", aggfunc="min")) + s.groupby(s.values).agg(a=inp) class TestNamedAggregationDataFrame: From 6356a9220d884633ecf4f54c959d69db157ea9d2 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Fri, 29 May 2020 19:07:07 +0530 Subject: [PATCH 17/24] Changed NamedAgg to tuple --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 778b30bbac498..776435cfb32fd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -239,7 +239,7 @@ def aggregate( columns = list(kwargs) func = [] for col in columns: - if isinstance(kwargs[col], (list, NamedAgg, tuple)): + if isinstance(kwargs[col], (list, tuple)): raise TypeError( tuple_given_message.format(type(kwargs[col]).__name__) ) @@ -310,12 +310,12 @@ def _aggregate_multiple_funcs(self, arg): # indicated column order columns = next(zip(*arg)) - else: # list of functions / function names columns = [] for f in arg: columns.append(com.get_callable_name(f) or f) + arg = zip(columns, arg) results = {} From ea771f3a142771ac6305279cae3dcbfde709e379 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Tue, 2 Jun 2020 11:53:52 +0530 Subject: [PATCH 18/24] Added function to validate kwargs --- pandas/core/aggregation.py | 39 +++++++++++++++++++++++++++++++++- pandas/core/groupby/generic.py | 14 ++---------- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 6130e05b2a4dc..6509d9550f499 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -5,7 +5,7 @@ from collections import defaultdict from functools import partial -from typing import Any, DefaultDict, List, Sequence, Tuple +from typing import Any, DefaultDict, List, Sequence, Tuple, Union, Callable from pandas.core.dtypes.common import is_dict_like, is_list_like @@ -196,3 +196,40 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: mangled_aggspec = _managle_lambda_list(agg_spec) return mangled_aggspec + +def validate_func_kwargs( + kwargs: dict, +) -> Tuple[List[str], List[Union[str, Callable[..., Any]]]]: + """ + Validates types of user-provided "named aggregation" kwargs. + `TypeError` is raised if aggfunc is not `str` or callable. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + columns : List[str] + List of user-provied keys. + func : List[Union[str, callable[...,Any]]] + List of user-provided aggfuncs + + Examples + -------- + >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) + (['one', 'two'], ['min','max']) + >>> validate_func_kwargs({'one': lambda x: x+1, 'two': np.min}) + (['one', 'two'], [lambda x:x+1, np.min]) + """ + no_arg_message = "Must provide 'func' or named aggregation **kwargs." + tuple_given_message = "func is expected but recieved {} in **kwargs." + columns = list(kwargs) + func = [] + for col_func in kwargs.values(): + if not (isinstance(col_func, str) or callable(col_func)): + raise TypeError(tuple_given_message.format(type(col_func).__name__)) + func.append(col_func) + if not columns: + raise TypeError(no_arg_message) + return columns, func diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 776435cfb32fd..9cda495658582 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -57,6 +57,7 @@ is_multi_agg_with_relabel, maybe_mangle_lambdas, normalize_keyword_aggregation, + validate_func_kwargs ) import pandas.core.algorithms as algorithms from pandas.core.base import DataError, SpecificationError @@ -233,20 +234,9 @@ def aggregate( relabeling = func is None columns = None - no_arg_message = "Must provide 'func' or named aggregation **kwargs." - tuple_given_message = "func is expected but recieved {} in **kwargs." if relabeling: - columns = list(kwargs) - func = [] - for col in columns: - if isinstance(kwargs[col], (list, tuple)): - raise TypeError( - tuple_given_message.format(type(kwargs[col]).__name__) - ) - func.append(kwargs[col]) + columns, func = validate_func_kwargs(kwargs) kwargs = {} - if not columns: - raise TypeError(no_arg_message) if isinstance(func, str): return getattr(self, func)(*args, **kwargs) From bb9a031e79b61a5e28d8a2d4a804d1326f929ab7 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Tue, 2 Jun 2020 11:59:59 +0530 Subject: [PATCH 19/24] Fixed PEP-8 issues --- pandas/core/aggregation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 6509d9550f499..583794034594d 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -197,6 +197,7 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: return mangled_aggspec + def validate_func_kwargs( kwargs: dict, ) -> Tuple[List[str], List[Union[str, Callable[..., Any]]]]: @@ -219,7 +220,7 @@ def validate_func_kwargs( -------- >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) (['one', 'two'], ['min','max']) - >>> validate_func_kwargs({'one': lambda x: x+1, 'two': np.min}) + >>> validate_func_kwargs({'one': lambda x: x+1, 'two': np.min}) (['one', 'two'], [lambda x:x+1, np.min]) """ no_arg_message = "Must provide 'func' or named aggregation **kwargs." From 91b650fa014a5cc00d2bffdad05cb9612c44baeb Mon Sep 17 00:00:00 2001 From: guru kiran Date: Tue, 2 Jun 2020 12:49:58 +0530 Subject: [PATCH 20/24] Fixed PEP-8 isuues --- pandas/core/aggregation.py | 4 ++-- pandas/core/groupby/generic.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 583794034594d..88fcafa78fd70 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -5,7 +5,7 @@ from collections import defaultdict from functools import partial -from typing import Any, DefaultDict, List, Sequence, Tuple, Union, Callable +from typing import Any, Callable, DefaultDict, List, Sequence, Tuple, Union from pandas.core.dtypes.common import is_dict_like, is_list_like @@ -219,7 +219,7 @@ def validate_func_kwargs( Examples -------- >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) - (['one', 'two'], ['min','max']) + (['one', 'two'], ['min', 'max']) >>> validate_func_kwargs({'one': lambda x: x+1, 'two': np.min}) (['one', 'two'], [lambda x:x+1, np.min]) """ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9cda495658582..d589b0e0fe83c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -57,7 +57,7 @@ is_multi_agg_with_relabel, maybe_mangle_lambdas, normalize_keyword_aggregation, - validate_func_kwargs + validate_func_kwargs, ) import pandas.core.algorithms as algorithms from pandas.core.base import DataError, SpecificationError From 5406abfe5086cdcbbe869b26915486bfc88046c3 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Tue, 2 Jun 2020 13:09:54 +0530 Subject: [PATCH 21/24] Fixed docstring in validate_func_kwargs --- pandas/core/aggregation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 88fcafa78fd70..838722f60b380 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -220,8 +220,6 @@ def validate_func_kwargs( -------- >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) (['one', 'two'], ['min', 'max']) - >>> validate_func_kwargs({'one': lambda x: x+1, 'two': np.min}) - (['one', 'two'], [lambda x:x+1, np.min]) """ no_arg_message = "Must provide 'func' or named aggregation **kwargs." tuple_given_message = "func is expected but recieved {} in **kwargs." From 8a107e19729633b5f70110ac9c9a0e4a510966f4 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Tue, 2 Jun 2020 19:25:01 +0530 Subject: [PATCH 22/24] BUG: pandas SeriesGroupBy.agg issue 34422 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5ef1f9dea5091..c34d68bbf15b6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -907,6 +907,7 @@ Groupby/resample/rolling to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical indices. In particular, the result index shape might change if a copy of the input would be returned. The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`) +- Bug in :meth:`SeriesGroupBy.agg` where any column name is accepted in named aggregation previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) Reshaping ^^^^^^^^^ From 46f6ab12161b813d85d4d5ce649daaf99d6341d8 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Wed, 3 Jun 2020 11:02:22 +0530 Subject: [PATCH 23/24] BUG: SeriesGroupBy.agg allows any column name in named aggregation --- doc/source/whatsnew/v1.1.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c34d68bbf15b6..2fb407799a743 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -907,7 +907,8 @@ Groupby/resample/rolling to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical indices. In particular, the result index shape might change if a copy of the input would be returned. The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`) -- Bug in :meth:`SeriesGroupBy.agg` where any column name is accepted in named aggregation previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) +- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. + The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) Reshaping ^^^^^^^^^ From ffaf87b6d168ee961e1c5769db589addbb6e9574 Mon Sep 17 00:00:00 2001 From: guru kiran Date: Thu, 4 Jun 2020 01:06:43 +0530 Subject: [PATCH 24/24] Added f-string in test_aggregate.py and fixed v1.1.0.rst. --- doc/source/whatsnew/v1.1.0.rst | 3 +-- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2fb407799a743..88524b1f458ff 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -907,8 +907,7 @@ Groupby/resample/rolling to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical indices. In particular, the result index shape might change if a copy of the input would be returned. The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`) -- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. - The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) +- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index a25604b8d63fa..371ec11cdba77 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -522,7 +522,7 @@ def test_mangled(self): def test_named_agg_nametuple(self, inp): # GH34422 s = pd.Series([1, 1, 2, 2, 3, 3, 4, 5]) - msg = "func is expected but recieved {}".format(type(inp).__name__) + msg = f"func is expected but recieved {type(inp).__name__}" with pytest.raises(TypeError, match=msg): s.groupby(s.values).agg(a=inp)