From ded8f72c518c7321b92fe5bffb5ba53eb0aafd41 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 30 Apr 2021 14:36:03 -0700
Subject: [PATCH 1/3] REF: simplify cython_agg_general

---
 pandas/core/groupby/generic.py | 56 ++++++++++++++--------------------
 pandas/core/groupby/groupby.py |  4 +--
 2 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ab03cce0d6476..65e81adb9328f 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -345,47 +345,37 @@ def _aggregate_multiple_funcs(self, arg):
     def _cython_agg_general(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
     ):
-        output: dict[base.OutputKey, ArrayLike] = {}
-        # Ideally we would be able to enumerate self._iterate_slices and use
-        # the index from enumeration as the key of output, but ohlc in particular
-        # returns a (n x 4) array. Output requires 1D ndarrays as values, so we
-        # need to slice that up into 1D arrays
-        idx = 0
-        for obj in self._iterate_slices():
-            name = obj.name
-            is_numeric = is_numeric_dtype(obj.dtype)
-            if numeric_only and not is_numeric:
-                continue
 
-            objvals = obj._values
+        obj = self._selected_obj
 
-            if isinstance(objvals, Categorical):
-                if self.grouper.ngroups > 0:
-                    # without special-casing, we would raise, then in fallback
-                    # would eventually call agg_series but without re-casting
-                    # to Categorical
-                    # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
-                    res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
-                else:
-                    # equiv: res_values = self._python_agg_general(alt)
-                    res_values = self._python_apply_general(alt, self._selected_obj)
+        is_numeric = is_numeric_dtype(obj.dtype)
+        if numeric_only and not is_numeric:
+            raise DataError("No numeric types to aggregate")
 
-                result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
+        objvals = obj._values
 
+        if isinstance(objvals, Categorical):
+            if self.grouper.ngroups > 0:
+                # without special-casing, we would raise, then in fallback
+                # would eventually call agg_series but without re-casting
+                # to Categorical
+                # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
+                res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
             else:
-                result = self.grouper._cython_operation(
-                    "aggregate", obj._values, how, axis=0, min_count=min_count
-                )
+                # equiv: res_values = self._python_agg_general(alt)
+                res_values = self._python_apply_general(alt, self._selected_obj)
 
-            assert result.ndim == 1
-            key = base.OutputKey(label=name, position=idx)
-            output[key] = result
-            idx += 1
+            result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
 
-        if not output:
-            raise DataError("No numeric types to aggregate")
+        else:
+            result = self.grouper._cython_operation(
+                "aggregate", obj._values, how, axis=0, min_count=min_count
+            )
 
-        return self._wrap_aggregated_output(output)
+        ser = self.obj._constructor(
+            result, index=self.grouper.result_index, name=obj.name
+        )
+        return self._reindex_output(ser)
 
     def _wrap_aggregated_output(
         self,
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7fe9d7cb49eb5..29e76d8231562 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1282,9 +1282,7 @@ def _agg_general(
             except DataError:
                 pass
             except NotImplementedError as err:
-                if "function is not implemented for this dtype" in str(
-                    err
-                ) or "category dtype not supported" in str(err):
+                if "function is not implemented for this dtype" in str(err):
                     # raised in _get_cython_function, in some cases can
                     #  be trimmed by implementing cython funcs for more dtypes
                     pass

From 9b9ae13efc37a7f64d0c1eed2ede38093847ae40 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 30 Apr 2021 14:59:47 -0700
Subject: [PATCH 2/3] remove unnecessary consolidate

---
 pandas/core/groupby/groupby.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 29e76d8231562..8640298ac39cd 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1815,9 +1815,7 @@ def describe(self, **kwargs):
             result = self.apply(lambda x: x.describe(**kwargs))
             if self.axis == 1:
                 return result.T
-            # FIXME: not being consolidated breaks
-            #  test_describe_with_duplicate_output_column_names
-            return result._consolidate().unstack()
+            return result.unstack()
 
     @final
     def resample(self, rule, *args, **kwargs):

From c2ba234ce06896032407ad2776a9a2e411b4d97c Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 2 May 2021 13:02:04 -0700
Subject: [PATCH 3/3] revert everything but describe

---
 pandas/core/groupby/generic.py        | 56 ++++++++++++++++-----------
 pandas/core/groupby/groupby.py        |  4 +-
 pandas/tests/groupby/test_function.py |  1 +
 3 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e716e05850dbd..b9f1ca0710872 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -345,37 +345,47 @@ def _aggregate_multiple_funcs(self, arg):
     def _cython_agg_general(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
     ):
+        output: dict[base.OutputKey, ArrayLike] = {}
+        # Ideally we would be able to enumerate self._iterate_slices and use
+        # the index from enumeration as the key of output, but ohlc in particular
+        # returns a (n x 4) array. Output requires 1D ndarrays as values, so we
+        # need to slice that up into 1D arrays
+        idx = 0
+        for obj in self._iterate_slices():
+            name = obj.name
+            is_numeric = is_numeric_dtype(obj.dtype)
+            if numeric_only and not is_numeric:
+                continue
 
-        obj = self._selected_obj
+            objvals = obj._values
 
-        is_numeric = is_numeric_dtype(obj.dtype)
-        if numeric_only and not is_numeric:
-            raise DataError("No numeric types to aggregate")
+            if isinstance(objvals, Categorical):
+                if self.grouper.ngroups > 0:
+                    # without special-casing, we would raise, then in fallback
+                    # would eventually call agg_series but without re-casting
+                    # to Categorical
+                    # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
+                    res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
+                else:
+                    # equiv: res_values = self._python_agg_general(alt)
+                    res_values = self._python_apply_general(alt, self._selected_obj)
 
-        objvals = obj._values
+                result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
 
-        if isinstance(objvals, Categorical):
-            if self.grouper.ngroups > 0:
-                # without special-casing, we would raise, then in fallback
-                # would eventually call agg_series but without re-casting
-                # to Categorical
-                # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
-                res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
             else:
-                # equiv: res_values = self._python_agg_general(alt)
-                res_values = self._python_apply_general(alt, self._selected_obj)
+                result = self.grouper._cython_operation(
+                    "aggregate", obj._values, how, axis=0, min_count=min_count
+                )
 
-            result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
+            assert result.ndim == 1
+            key = base.OutputKey(label=name, position=idx)
+            output[key] = result
+            idx += 1
 
-        else:
-            result = self.grouper._cython_operation(
-                "aggregate", obj._values, how, axis=0, min_count=min_count
-            )
+        if not output:
+            raise DataError("No numeric types to aggregate")
 
-        ser = self.obj._constructor(
-            result, index=self.grouper.result_index, name=obj.name
-        )
-        return self._reindex_output(ser)
+        return self._wrap_aggregated_output(output)
 
     def _wrap_aggregated_output(
         self,
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8640298ac39cd..36c7f53d23098 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1282,7 +1282,9 @@ def _agg_general(
             except DataError:
                 pass
             except NotImplementedError as err:
-                if "function is not implemented for this dtype" in str(err):
+                if "function is not implemented for this dtype" in str(
+                    err
+                ) or "category dtype not supported" in str(err):
                     # raised in _get_cython_function, in some cases can
                     #  be trimmed by implementing cython funcs for more dtypes
                     pass
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 40f8135637292..163303168c240 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1077,6 +1077,7 @@ def test_describe_with_duplicate_output_column_names(as_index):
             "c": [10, 20, 30, 40, 50, 60],
         },
         columns=["a", "b", "b"],
+        copy=False,
     )
 
     expected = (