diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 78b8ca8d5a480..bd3fe7750a0a1 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -169,7 +169,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) - - diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ba180cc98cb08..69de7630ede2c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2299,8 +2299,7 @@ def _aggregate_series_pure_python(self, obj, func): for label, group in splitter: res = func(group) if result is None: - if (isinstance(res, (Series, Index, np.ndarray)) or - isinstance(res, list)): + if (isinstance(res, (Series, Index, np.ndarray))): raise ValueError('Function does not reduce') result = np.empty(ngroups, dtype='O') @@ -3022,7 +3021,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) - if hasattr(func_or_funcs, '__iter__'): + if isinstance(func_or_funcs, collections.Iterable): + # Catch instances of lists / tuples + # but not the class list / tuple itself. ret = self._aggregate_multiple_funcs(func_or_funcs, (_level or 0) + 1) else: diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 913d3bcc09869..3d27df31cee6e 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -637,7 +637,7 @@ def test_agg_consistency(self): def P1(a): try: return np.percentile(a.dropna(), q=1) - except: + except Exception: return np.nan import datetime as dt @@ -892,3 +892,36 @@ def test_sum_uint64_overflow(self): expected.index.name = 0 result = df.groupby(0).sum() tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("structure, expected", [ + (tuple, pd.DataFrame({'C': {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, pd.DataFrame({'C': {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + (lambda x: tuple(x), pd.DataFrame({'C': {(1, 1): (1, 1, 1), + (3, 4): (3, 4, 4)}})), + (lambda x: list(x), pd.DataFrame({'C': {(1, 1): [1, 1, 1], + (3, 4): [3, 4, 4]}})) + ]) + def test_agg_structs_dataframe(self, structure, expected): + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby(['A', 'B']).aggregate(structure) + expected.index.names = ['A', 'B'] + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("structure, expected", [ + (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')), + (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name='C')), + (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)], + index=[1, 3], name='C')), + (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]], + index=[1, 3], name='C')) + ]) + def test_agg_structs_series(self, structure, expected): + # Issue #18079 + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby('A')['C'].aggregate(structure) + expected.index.name = 'A' + assert_series_equal(result, expected)