From 9869155b5dbda1d691124c67b522ae23cac45bf3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2019 13:11:00 -0700
Subject: [PATCH 1/4] reduction-> libreduction for grepability

---
 pandas/core/apply.py                     |  6 +++---
 pandas/core/groupby/ops.py               | 10 +++++-----
 pandas/tests/groupby/test_bin_groupby.py | 16 ++++++++--------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 5c8599dbb054b..b96b3c7572031 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from pandas._libs import reduction
+from pandas._libs import reduction as libreduction
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import (
@@ -221,7 +221,7 @@ def apply_raw(self):
         """ apply to the values as a numpy array """
 
         try:
-            result = reduction.compute_reduction(self.values, self.f, axis=self.axis)
+            result = libreduction.compute_reduction(self.values, self.f, axis=self.axis)
         except Exception:
             result = np.apply_along_axis(self.f, self.axis, self.values)
 
@@ -281,7 +281,7 @@ def apply_standard(self):
             dummy = Series(empty_arr, index=index, dtype=values.dtype)
 
             try:
-                result = reduction.compute_reduction(
+                result = libreduction.compute_reduction(
                     values, self.f, axis=self.axis, dummy=dummy, labels=labels
                 )
                 return self.obj._constructor_sliced(result, index=labels)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index b0c629f017dd3..56ba1901c4137 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -12,7 +12,7 @@
 
 from pandas._libs import NaT, iNaT, lib
 import pandas._libs.groupby as libgroupby
-import pandas._libs.reduction as reduction
+import pandas._libs.reduction as libreduction
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
@@ -207,7 +207,7 @@ def apply(self, f, data, axis=0):
                 if len(result_values) == len(group_keys):
                     return group_keys, result_values, mutated
 
-            except reduction.InvalidApply:
+            except libreduction.InvalidApply:
                 # Cannot fast apply on MultiIndex (_has_complex_internals).
                 # This Exception is also raised if `f` triggers an exception
                 # but it is preferable to raise the exception in Python.
@@ -678,7 +678,7 @@ def _aggregate_series_fast(self, obj, func):
         indexer = get_group_index_sorter(group_index, ngroups)
         obj = obj.take(indexer)
         group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
-        grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
+        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
         result, counts = grouper.get_result()
         return result, counts
 
@@ -852,7 +852,7 @@ def groupings(self):
 
     def agg_series(self, obj, func):
         dummy = obj[:0]
-        grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy)
+        grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy)
         return grouper.get_result()
 
 
@@ -940,7 +940,7 @@ def fast_apply(self, f, names):
             return [], True
 
         sdata = self._get_sorted_data()
-        return reduction.apply_frame_axis0(sdata, f, names, starts, ends)
+        return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
 
     def _chop(self, sdata, slice_obj):
         if self.axis == 0:
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 2195686ee9c7f..b8f9ecd42bae3 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -2,7 +2,7 @@
 from numpy import nan
 import pytest
 
-from pandas._libs import groupby, lib, reduction
+from pandas._libs import groupby, lib, reduction as libreduction
 
 from pandas.core.dtypes.common import ensure_int64
 
@@ -18,7 +18,7 @@ def test_series_grouper():
 
     labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
 
-    grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
+    grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
     result, counts = grouper.get_result()
 
     expected = np.array([obj[3:6].mean(), obj[6:].mean()])
@@ -34,7 +34,7 @@ def test_series_bin_grouper():
 
     bins = np.array([3, 6])
 
-    grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
+    grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
     result, counts = grouper.get_result()
 
     expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
@@ -120,31 +120,31 @@ class TestMoments:
 class TestReducer:
     def test_int_index(self):
         arr = np.random.randn(100, 4)
-        result = reduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
+        result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(100))
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
         )
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(4))
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         assert_almost_equal(result, expected)

From a870082c1f5c57bbbd00cb00012bff767137880b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2019 18:13:38 -0700
Subject: [PATCH 2/4] fix passing DataFrame to make_blocK

---
 pandas/core/groupby/generic.py        | 41 ++++++++++++++++++++++-----
 pandas/tests/groupby/test_function.py |  7 +++--
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7d6690a0dfa5a..7598debbd52b9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -30,6 +30,7 @@
     ensure_int64,
     ensure_platform_int,
     is_bool,
+    is_categorical_dtype,
     is_datetimelike,
     is_dict_like,
     is_integer_dtype,
@@ -161,10 +162,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                 result, _ = self.grouper.aggregate(
                     block.values, how, axis=agg_axis, min_count=min_count
                 )
-            except NotImplementedError:
+            except NotImplementedError as err:
                 # generally if we have numeric_only=False
                 # and non-applicable functions
                 # try to python agg
+                if "type does not support" in str(err):
+                    # exception raised by NumPy, not pandas
+                    # e.g. "timedelta64 type does not support prod operations"
+                    deleted_items.append(locs)
+                    continue
 
                 if alt is None:
                     # we cannot perform the operation
@@ -182,10 +188,30 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                     # continue and exclude the block
                     deleted_items.append(locs)
                     continue
+
+                if is_categorical_dtype(block.dtype):
+                    # restore e.g. Categorical
+                    # not all dtypes are conserved by agg
+                    result = result.astype(block.dtype)
+
+                assert len(result._data.blocks) == 1
+                result = result._data.blocks[0].values
+
+                # Check that we didn't mess up some corner case
+                # TODO: this isn't a reliable way of doing this
+                grp = obj.loc[s.groups[1]]
+                try:
+                    alt(grp.values, axis=self.axis)
+                except TypeError:
+                    result = no_result
+                    deleted_items.append(locs)
+                    continue
+
             finally:
                 if result is not no_result:
                     # see if we can cast the block back to the original dtype
                     result = maybe_downcast_numeric(result, block.dtype)
+                    assert not isinstance(result, DataFrame)
                     newb = block.make_block(result)
 
             new_items.append(locs)
@@ -242,11 +268,12 @@ def aggregate(self, func, *args, **kwargs):
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
+            elif args or kwargs:
+                result = self._aggregate_generic(func, *args, **kwargs)
             else:
 
                 # try to treat as if we are passing a list
                 try:
-                    assert not args and not kwargs
                     result = self._aggregate_multiple_funcs(
                         [func], _level=_level, _axis=self.axis
                     )
@@ -261,7 +288,7 @@ def aggregate(self, func, *args, **kwargs):
                         # to SparseDataFrame, so we do it here.
                         result = SparseDataFrame(result._data)
                 except Exception:
-                    result = self._aggregate_generic(func, *args, **kwargs)
+                    result = self._aggregate_generic(func)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
@@ -311,10 +338,10 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
         cannot_agg = []
         errors = None
         for item in obj:
-            try:
-                data = obj[item]
-                colg = SeriesGroupBy(data, selection=item, grouper=self.grouper)
+            data = obj[item]
+            colg = SeriesGroupBy(data, selection=item, grouper=self.grouper)
 
+            try:
                 cast = self._transform_should_cast(func)
 
                 result[item] = colg.aggregate(func, *args, **kwargs)
@@ -682,7 +709,7 @@ def _transform_item_by_item(self, obj, wrapper):
 
         return DataFrame(output, index=obj.index, columns=columns)
 
-    def filter(self, func, dropna=True, *args, **kwargs):  # noqa
+    def filter(self, func, dropna=True, *args, **kwargs):
         """
         Return a copy of a DataFrame excluding elements from groups that
         do not satisfy the boolean criterion specified by func.
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index d89233f2fd603..c27af160c9898 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -171,13 +171,12 @@ def test_arg_passthru():
         result = f(numeric_only=False)
         tm.assert_frame_equal(result.reindex_like(expected), expected)
 
-    # TODO: min, max *should* handle
-    # categorical (ordered) dtype
     expected_columns = Index(
         [
             "int",
             "float",
             "string",
+            "category_string",
             "category_int",
             "datetime",
             "datetimetz",
@@ -212,7 +211,9 @@ def test_arg_passthru():
         result = f(numeric_only=False)
         tm.assert_index_equal(result.columns, expected_columns)
 
-    expected_columns = Index(["int", "float", "string", "category_int", "timedelta"])
+    expected_columns = Index(
+        ["int", "float", "string", "category_string", "category_int", "timedelta"]
+    )
     for attr in ["sum"]:
         f = getattr(df.groupby("group"), attr)
         result = f()

From 9b054208d808b62484354ad2f6ff137cacce6713 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2019 20:00:02 -0700
Subject: [PATCH 3/4] raise for object dtype

---
 pandas/core/groupby/generic.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7598debbd52b9..a0c60b1e1c8db 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -53,7 +53,6 @@
     GroupBy,
     _apply_docs,
     _transform_template,
-    groupby,
 )
 from pandas.core.index import Index, MultiIndex, _all_indexes_same
 import pandas.core.indexes.base as ibase
@@ -180,7 +179,7 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
 
                 # call our grouper again with only this block
                 obj = self.obj[data.items[locs]]
-                s = groupby(obj, self.grouper)
+                s = obj.groupby(self.grouper)
                 try:
                     result = s.aggregate(lambda x: alt(x, axis=self.axis))
                 except TypeError:
@@ -189,24 +188,19 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                     deleted_items.append(locs)
                     continue
 
+                if is_object_dtype(block.dtype) and how in["prod", "cumprod", "sum"]:
+                    # s.aggregate is not reliable for e.g. `prod` with strings
+                    result = no_result
+                    raise
+
                 if is_categorical_dtype(block.dtype):
-                    # restore e.g. Categorical
-                    # not all dtypes are conserved by agg
+                    # restore Categorical; not all dtypes are conserved by agg
+                    # TODO: will this be right for e.g. sum?
                     result = result.astype(block.dtype)
 
                 assert len(result._data.blocks) == 1
                 result = result._data.blocks[0].values
 
-                # Check that we didn't mess up some corner case
-                # TODO: this isn't a reliable way of doing this
-                grp = obj.loc[s.groups[1]]
-                try:
-                    alt(grp.values, axis=self.axis)
-                except TypeError:
-                    result = no_result
-                    deleted_items.append(locs)
-                    continue
-
             finally:
                 if result is not no_result:
                     # see if we can cast the block back to the original dtype

From 3a83d2a51b6b0250faa5e701d9aa13b0696eea6f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2019 20:04:45 -0700
Subject: [PATCH 4/4] comment, black

---
 pandas/core/groupby/generic.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index a0c60b1e1c8db..28316fa85c066 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -49,11 +49,7 @@
 from pandas.core.frame import DataFrame
 from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs
 from pandas.core.groupby import base
-from pandas.core.groupby.groupby import (
-    GroupBy,
-    _apply_docs,
-    _transform_template,
-)
+from pandas.core.groupby.groupby import GroupBy, _apply_docs, _transform_template
 from pandas.core.index import Index, MultiIndex, _all_indexes_same
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
@@ -188,9 +184,12 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                     deleted_items.append(locs)
                     continue
 
-                if is_object_dtype(block.dtype) and how in["prod", "cumprod", "sum"]:
+                if is_object_dtype(block.dtype) and how in ["prod", "cumprod", "sum"]:
                     # s.aggregate is not reliable for e.g. `prod` with strings
                     result = no_result
+                    # TODO: why are we raising here and continuing elsewhere?
+                    # (tests.groupby.test_function.test_arg_passthru breaks
+                    #  if we continue here)
                     raise
 
                 if is_categorical_dtype(block.dtype):