diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c0436e9389078..08bbd3940d4c2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,6 +30,7 @@ ensure_int64, ensure_platform_int, is_bool, + is_categorical_dtype, is_datetimelike, is_dict_like, is_integer_dtype, @@ -48,12 +49,7 @@ from pandas.core.frame import DataFrame from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs from pandas.core.groupby import base -from pandas.core.groupby.groupby import ( - GroupBy, - _apply_docs, - _transform_template, - groupby, -) +from pandas.core.groupby.groupby import GroupBy, _apply_docs, _transform_template from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block @@ -161,10 +157,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): result, _ = self.grouper.aggregate( block.values, how, axis=agg_axis, min_count=min_count ) - except NotImplementedError: + except NotImplementedError as err: # generally if we have numeric_only=False # and non-applicable functions # try to python agg + if "type does not support" in str(err): + # exception raised by NumPy, not pandas + # e.g. "timedelta64 type does not support prod operations" + deleted_items.append(locs) + continue if alt is None: # we cannot perform the operation @@ -174,7 +175,7 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): # call our grouper again with only this block obj = self.obj[data.items[locs]] - s = groupby(obj, self.grouper) + s = obj.groupby(self.grouper) try: result = s.aggregate(lambda x: alt(x, axis=self.axis)) except TypeError: @@ -182,10 +183,28 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): # continue and exclude the block deleted_items.append(locs) continue + + if is_object_dtype(block.dtype) and how in ["prod", "cumprod", "sum"]: + # s.aggregate is not reliable for e.g. `prod` with strings + result = no_result + # TODO: why are we raising here and continuing elsewhere? + # (tests.groupby.test_function.test_arg_passthru breaks + # if we continue here) + raise + + if is_categorical_dtype(block.dtype): + # restore Categorical; not all dtypes are conserved by agg + # TODO: will this be right for e.g. sum? + result = result.astype(block.dtype) + + assert len(result._data.blocks) == 1 + result = result._data.blocks[0].values + finally: if result is not no_result: # see if we can cast the block back to the original dtype result = maybe_downcast_numeric(result, block.dtype) + assert not isinstance(result, DataFrame) newb = block.make_block(result) new_items.append(locs) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index d89233f2fd603..c27af160c9898 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -171,13 +171,12 @@ def test_arg_passthru(): result = f(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) - # TODO: min, max *should* handle - # categorical (ordered) dtype expected_columns = Index( [ "int", "float", "string", + "category_string", "category_int", "datetime", "datetimetz", @@ -212,7 +211,9 @@ def test_arg_passthru(): result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) - expected_columns = Index(["int", "float", "string", "category_int", "timedelta"]) + expected_columns = Index( + ["int", "float", "string", "category_string", "category_int", "timedelta"] + ) for attr in ["sum"]: f = getattr(df.groupby("group"), attr) result = f()