diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index d0f85b75a629e..8dcd01b14535f 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -21,10 +21,7 @@ from pandas._libs.util cimport ( set_array_not_contiguous, ) -from pandas._libs.lib import ( - is_scalar, - maybe_convert_objects, -) +from pandas._libs.lib import is_scalar cpdef check_result_array(object obj): @@ -185,7 +182,6 @@ cdef class SeriesBinGrouper(_BaseGrouper): islider.reset() vslider.reset() - result = maybe_convert_objects(result) return result, counts @@ -288,8 +284,6 @@ cdef class SeriesGrouper(_BaseGrouper): # have result initialized by this point. assert initialized, "`result` has not been initialized." - result = maybe_convert_objects(result) - return result, counts diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 9edbeb412026d..30be9968ad91f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -970,22 +970,33 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]: # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 + cast_back = True if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast - return self._aggregate_series_pure_python(obj, func) + result, counts = self._aggregate_series_pure_python(obj, func) elif is_extension_array_dtype(obj.dtype): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive # TODO: can we get a performant workaround for EAs backed by ndarray? - return self._aggregate_series_pure_python(obj, func) + result, counts = self._aggregate_series_pure_python(obj, func) elif obj.index._has_complex_internals: # Preempt TypeError in _aggregate_series_fast - return self._aggregate_series_pure_python(obj, func) + result, counts = self._aggregate_series_pure_python(obj, func) - return self._aggregate_series_fast(obj, func) + else: + result, counts = self._aggregate_series_fast(obj, func) + cast_back = False + + npvalues = lib.maybe_convert_objects(result, try_float=False) + if cast_back: + # TODO: Is there a documented reason why we dont always cast_back? + out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) + else: + out = npvalues + return out, counts def _aggregate_series_fast( self, obj: Series, func: F @@ -1033,10 +1044,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): counts[i] = group.shape[0] result[i] = res - npvalues = lib.maybe_convert_objects(result, try_float=False) - out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) - - return out, counts + return result, counts class BinGrouper(BaseGrouper): diff --git a/pandas/core/series.py b/pandas/core/series.py index a07a685c2ffde..240f678960969 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3086,7 +3086,7 @@ def combine(self, other, func, fill_value=None) -> Series: new_values[:] = [func(lv, other) for lv in self._values] new_name = self.name - # try_float=False is to match _aggregate_series_pure_python + # try_float=False is to match agg_series npvalues = lib.maybe_convert_objects(new_values, try_float=False) res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) return self._constructor(res_values, index=new_index, name=new_name) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 13fddad30eeba..aa126ae801f1e 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -20,7 +20,7 @@ def test_series_grouper(): grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2) result, counts = grouper.get_result() - expected = np.array([obj[3:6].mean(), obj[6:].mean()]) + expected = np.array([obj[3:6].mean(), obj[6:].mean()], dtype=object) tm.assert_almost_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) @@ -36,7 +36,7 @@ def test_series_grouper_result_length_difference(): grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2) result, counts = grouper.get_result() - expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)]) + expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)], dtype=object) tm.assert_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) @@ -61,7 +61,7 @@ def test_series_bin_grouper(): grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins) result, counts = grouper.get_result() - expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) + expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object) tm.assert_almost_equal(result, expected) exp_counts = np.array([3, 3, 4], dtype=np.int64)