From f8acf9a6a548dfd8d90b944cade338c7f83648b8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 2 May 2021 14:46:10 -0700
Subject: [PATCH 1/3] REF: consolidate casting in groupby agg_series

---
 pandas/_libs/reduction.pyx     |  8 +-------
 pandas/core/groupby/generic.py |  3 +--
 pandas/core/groupby/ops.py     | 18 ++++++++++--------
 pandas/core/series.py          |  2 +-
 4 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 191967585c431..1e56cdbb614ad 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -21,10 +21,7 @@ from pandas._libs.util cimport (
     set_array_not_contiguous,
 )
 
-from pandas._libs.lib import (
-    is_scalar,
-    maybe_convert_objects,
-)
+from pandas._libs.lib import is_scalar
 
 
 cpdef check_result_array(object obj):
@@ -183,7 +180,6 @@ cdef class SeriesBinGrouper(_BaseGrouper):
             islider.reset()
             vslider.reset()
 
-        result = maybe_convert_objects(result)
         return result, counts
 
 
@@ -284,8 +280,6 @@ cdef class SeriesGrouper(_BaseGrouper):
         #  have result initialized by this point.
         assert initialized, "`result` has not been initialized."
 
-        result = maybe_convert_objects(result)
-
         return result, counts
 
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index b9f1ca0710872..ba7c409be222d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -364,8 +364,7 @@ def _cython_agg_general(
                     # without special-casing, we would raise, then in fallback
                     # would eventually call agg_series but without re-casting
                     # to Categorical
-                    # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
-                    res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
+                    res_values, _ = self.grouper.agg_series(obj, alt)
                 else:
                     # equiv: res_values = self._python_agg_general(alt)
                     res_values = self._python_apply_general(alt, self._selected_obj)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 9edbeb412026d..d4b6555515054 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -972,20 +972,25 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
 
         if len(obj) == 0:
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
         elif is_extension_array_dtype(obj.dtype):
             # _aggregate_series_fast would raise TypeError when
             #  calling libreduction.Slider
             # In the datetime64tz case it would incorrectly cast to tz-naive
             # TODO: can we get a performant workaround for EAs backed by ndarray?
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
         elif obj.index._has_complex_internals:
             # Preempt TypeError in _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
-        return self._aggregate_series_fast(obj, func)
+        else:
+            result, counts = self._aggregate_series_fast(obj, func)
+
+        npvalues = lib.maybe_convert_objects(result, try_float=False)
+        out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
+        return out, counts
 
     def _aggregate_series_fast(
         self, obj: Series, func: F
@@ -1033,10 +1038,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             counts[i] = group.shape[0]
             result[i] = res
 
-        npvalues = lib.maybe_convert_objects(result, try_float=False)
-        out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
-
-        return out, counts
+        return result, counts
 
 
 class BinGrouper(BaseGrouper):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6a3e997a58754..f3f4e49b810bd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3086,7 +3086,7 @@ def combine(self, other, func, fill_value=None) -> Series:
                 new_values[:] = [func(lv, other) for lv in self._values]
             new_name = self.name
 
-        # try_float=False is to match _aggregate_series_pure_python
+        # try_float=False is to match agg_series
         npvalues = lib.maybe_convert_objects(new_values, try_float=False)
         res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
         return self._constructor(res_values, index=new_index, name=new_name)

From 5cde819b0403ee106c29c7b0d747a24486067f82 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 2 May 2021 15:41:50 -0700
Subject: [PATCH 2/3] update tests

---
 pandas/core/groupby/generic.py           | 7 ++++++-
 pandas/tests/groupby/test_bin_groupby.py | 6 +++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ba7c409be222d..0fbc146c4cfb0 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -367,7 +367,12 @@ def _cython_agg_general(
                     res_values, _ = self.grouper.agg_series(obj, alt)
                 else:
                     # equiv: res_values = self._python_agg_general(alt)
-                    res_values = self._python_apply_general(alt, self._selected_obj)
+                    # error: Incompatible types in assignment (expression has
+                    # type "Union[DataFrame, Series]", variable has type
+                    # "Union[ExtensionArray, ndarray]")
+                    res_values = self._python_apply_general(  # type: ignore[assignment]
+                        alt, self._selected_obj
+                    )
 
                 result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
 
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 13fddad30eeba..aa126ae801f1e 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -20,7 +20,7 @@ def test_series_grouper():
     grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2)
     result, counts = grouper.get_result()
 
-    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
+    expected = np.array([obj[3:6].mean(), obj[6:].mean()], dtype=object)
     tm.assert_almost_equal(result, expected)
 
     exp_counts = np.array([3, 4], dtype=np.int64)
@@ -36,7 +36,7 @@ def test_series_grouper_result_length_difference():
     grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2)
     result, counts = grouper.get_result()
 
-    expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)])
+    expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)], dtype=object)
     tm.assert_equal(result, expected)
 
     exp_counts = np.array([3, 4], dtype=np.int64)
@@ -61,7 +61,7 @@ def test_series_bin_grouper():
     grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
     result, counts = grouper.get_result()
 
-    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
+    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object)
     tm.assert_almost_equal(result, expected)
 
     exp_counts = np.array([3, 3, 4], dtype=np.int64)

From e885aa4de92c1af0463f085e4110148251d8d429 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 2 May 2021 19:43:43 -0700
Subject: [PATCH 3/3] conditional cast_back

---
 pandas/core/groupby/ops.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index d4b6555515054..30be9968ad91f 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -970,6 +970,7 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
         # Caller is responsible for checking ngroups != 0
         assert self.ngroups != 0
 
+        cast_back = True
         if len(obj) == 0:
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
             result, counts = self._aggregate_series_pure_python(obj, func)
@@ -987,9 +988,14 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
 
         else:
             result, counts = self._aggregate_series_fast(obj, func)
+            cast_back = False
 
         npvalues = lib.maybe_convert_objects(result, try_float=False)
-        out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
+        if cast_back:
+            # TODO: Is there a documented reason why we dont always cast_back?
+            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
+        else:
+            out = npvalues
         return out, counts
 
     def _aggregate_series_fast(