diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index d0f85b75a629e..8dcd01b14535f 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -21,10 +21,7 @@ from pandas._libs.util cimport (
     set_array_not_contiguous,
 )
 
-from pandas._libs.lib import (
-    is_scalar,
-    maybe_convert_objects,
-)
+from pandas._libs.lib import is_scalar
 
 
 cpdef check_result_array(object obj):
@@ -185,7 +182,6 @@ cdef class SeriesBinGrouper(_BaseGrouper):
             islider.reset()
             vslider.reset()
 
-        result = maybe_convert_objects(result)
         return result, counts
 
 
@@ -288,8 +284,6 @@ cdef class SeriesGrouper(_BaseGrouper):
         #  have result initialized by this point.
         assert initialized, "`result` has not been initialized."
 
-        result = maybe_convert_objects(result)
-
         return result, counts
 
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 9edbeb412026d..30be9968ad91f 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -970,22 +970,33 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
         # Caller is responsible for checking ngroups != 0
         assert self.ngroups != 0
 
+        cast_back = True
         if len(obj) == 0:
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
         elif is_extension_array_dtype(obj.dtype):
             # _aggregate_series_fast would raise TypeError when
             #  calling libreduction.Slider
             # In the datetime64tz case it would incorrectly cast to tz-naive
             # TODO: can we get a performant workaround for EAs backed by ndarray?
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
         elif obj.index._has_complex_internals:
             # Preempt TypeError in _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
+            result, counts = self._aggregate_series_pure_python(obj, func)
 
-        return self._aggregate_series_fast(obj, func)
+        else:
+            result, counts = self._aggregate_series_fast(obj, func)
+            cast_back = False
+
+        npvalues = lib.maybe_convert_objects(result, try_float=False)
+        if cast_back:
+            # TODO: Is there a documented reason why we dont always cast_back?
+            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
+        else:
+            out = npvalues
+        return out, counts
 
     def _aggregate_series_fast(
         self, obj: Series, func: F
@@ -1033,10 +1044,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             counts[i] = group.shape[0]
             result[i] = res
 
-        npvalues = lib.maybe_convert_objects(result, try_float=False)
-        out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
-
-        return out, counts
+        return result, counts
 
 
 class BinGrouper(BaseGrouper):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a07a685c2ffde..240f678960969 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3086,7 +3086,7 @@ def combine(self, other, func, fill_value=None) -> Series:
                 new_values[:] = [func(lv, other) for lv in self._values]
             new_name = self.name
 
-        # try_float=False is to match _aggregate_series_pure_python
+        # try_float=False is to match agg_series
         npvalues = lib.maybe_convert_objects(new_values, try_float=False)
         res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
         return self._constructor(res_values, index=new_index, name=new_name)
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 13fddad30eeba..aa126ae801f1e 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -20,7 +20,7 @@ def test_series_grouper():
     grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2)
     result, counts = grouper.get_result()
 
-    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
+    expected = np.array([obj[3:6].mean(), obj[6:].mean()], dtype=object)
     tm.assert_almost_equal(result, expected)
 
     exp_counts = np.array([3, 4], dtype=np.int64)
@@ -36,7 +36,7 @@ def test_series_grouper_result_length_difference():
     grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2)
     result, counts = grouper.get_result()
 
-    expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)])
+    expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)], dtype=object)
     tm.assert_equal(result, expected)
 
     exp_counts = np.array([3, 4], dtype=np.int64)
@@ -61,7 +61,7 @@ def test_series_bin_grouper():
     grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
     result, counts = grouper.get_result()
 
-    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
+    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object)
     tm.assert_almost_equal(result, expected)
 
     exp_counts = np.array([3, 3, 4], dtype=np.int64)