pandas-dev · jreback · Sep 27, 2021 · Sep 24, 2021 · Sep 24, 2021 · Sep 24, 2021
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -48,7 +48,6 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
-    is_numeric_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.missing import (
@@ -78,6 +77,7 @@
     _apply_docs,
     _transform_template,
     group_selection_context,
+    warn_dropping_nuisance_columns_deprecated,
 )
 from pandas.core.indexes.api import (
     Index,
@@ -334,43 +334,6 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame:
         output = self._reindex_output(output)
         return output
 
-    def _cython_agg_general(
-        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
-    ):
-
-        obj = self._selected_obj
-        objvals = obj._values
-        data = obj._mgr
-
-        if numeric_only and not is_numeric_dtype(obj.dtype):
-            # GH#41291 match Series behavior
-            raise NotImplementedError(
-                f"{type(self).__name__}.{how} does not implement numeric_only."
-            )
-
-        # This is overkill because it is only called once, but is here to
-        #  mirror the array_func used in DataFrameGroupBy._cython_agg_general
-        def array_func(values: ArrayLike) -> ArrayLike:
-            try:
-                result = self.grouper._cython_operation(
-                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-                # TODO: shouldn't min_count matter?
-                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
-
-            return result
-
-        result = array_func(objvals)
-
-        ser = self.obj._constructor(
-            result, index=self.grouper.result_index, name=obj.name
-        )
-        return self._reindex_output(ser)
-
     def _indexed_output_to_ndframe(
         self, output: Mapping[base.OutputKey, ArrayLike]
     ) -> Series:
@@ -970,46 +933,6 @@ def _iterate_slices(self) -> Iterable[Series]:
 
                 yield values
 
-    def _cython_agg_general(
-        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
-    ) -> DataFrame:
-        # Note: we never get here with how="ohlc"; that goes through SeriesGroupBy
-
-        data: Manager2D = self._get_data_to_aggregate()
-
-        if numeric_only:
-            data = data.get_numeric_data(copy=False)
-
-        def array_func(values: ArrayLike) -> ArrayLike:
-            try:
-                result = self.grouper._cython_operation(
-                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-                # TODO: shouldn't min_count matter?
-                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
-
-            return result
-
-        # TypeError -> we may have an exception in trying to aggregate
-        #  continue and exclude the block
-        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
-
-        if len(new_mgr) < len(data):
-            warnings.warn(
-                f"Dropping invalid columns in {type(self).__name__}.{how} "
-                "is deprecated. In a future version, a TypeError will be raised. "
-                f"Before calling .{how}, select only columns which should be "
-                "valid for the function.",
-                FutureWarning,
-                stacklevel=4,
-            )
-
-        return self._wrap_agged_manager(new_mgr)
-
     def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
         if self.grouper.nkeys != 1:
             raise AssertionError("Number of keys must be 1")
@@ -1195,14 +1118,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         res_mgr.set_axis(1, mgr.axes[1])
 
         if len(res_mgr) < len(mgr):
-            warnings.warn(
-                f"Dropping invalid columns in {type(self).__name__}.{how} "
-                "is deprecated. In a future version, a TypeError will be raised. "
-                f"Before calling .{how}, select only columns which should be "
-                "valid for the transforming function.",
-                FutureWarning,
-                stacklevel=4,
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), how)
 
         res_df = self.obj._constructor(res_mgr)
         if self.axis == 1:
@@ -1314,14 +1230,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
                 output[i] = sgb.transform(wrapper)
             except TypeError:
                 # e.g. trying to call nanmean with string values
-                warnings.warn(
-                    f"Dropping invalid columns in {type(self).__name__}.transform "
-                    "is deprecated. In a future version, a TypeError will be raised. "
-                    "Before calling .transform, select only columns which should be "
-                    "valid for the transforming function.",
-                    FutureWarning,
-                    stacklevel=5,
-                )
+                warn_dropping_nuisance_columns_deprecated(type(self), "transform")
             else:
                 inds.append(i)
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1422,14 +1422,7 @@ def _python_agg_general(self, func, *args, **kwargs):
                 # if this function is invalid for this dtype, we will ignore it.
                 result = self.grouper.agg_series(obj, f)
             except TypeError:
-                warnings.warn(
-                    f"Dropping invalid columns in {type(self).__name__}.agg "
-                    "is deprecated. In a future version, a TypeError will be raised. "
-                    "Before calling .agg, select only columns which should be "
-                    "valid for the aggregating function.",
-                    FutureWarning,
-                    stacklevel=3,
-                )
+                warn_dropping_nuisance_columns_deprecated(type(self), "agg")
                 continue
 
             key = base.OutputKey(label=name, position=idx)
@@ -1500,10 +1493,52 @@ def _agg_py_fallback(
         # test_groupby_duplicate_columns with object dtype values
         return ensure_block_shape(res_values, ndim=ndim)
 
+    @final
     def _cython_agg_general(
         self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
     ):
-        raise AbstractMethodError(self)
+        # Note: we never get here with how="ohlc" for DataFrameGroupBy;
+        #  that goes through SeriesGroupBy
+
+        data = self._get_data_to_aggregate()
+        is_ser = data.ndim == 1
+
+        if numeric_only:
+            if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
+                # GH#41291 match Series behavior
+                raise NotImplementedError(
+                    f"{type(self).__name__}.{how} does not implement numeric_only."
+                )
+            elif not is_ser:
+                data = data.get_numeric_data(copy=False)
+
+        def array_func(values: ArrayLike) -> ArrayLike:
+            try:
+                result = self.grouper._cython_operation(
+                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
+                )
+            except NotImplementedError:
+                # generally if we have numeric_only=False
+                # and non-applicable functions
+                # try to python agg
+                # TODO: shouldn't min_count matter?
+                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
+
+            return result
+
+        # TypeError -> we may have an exception in trying to aggregate
+        #  continue and exclude the block
+        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
+
+        if not is_ser and len(new_mgr) < len(data):
+            warn_dropping_nuisance_columns_deprecated(type(self), how)
+
+        res = self._wrap_agged_manager(new_mgr)
+        if is_ser:
+            res.index = self.grouper.result_index
+            return self._reindex_output(res)
+        else:
+            return res
 
     def _cython_transform(
         self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
@@ -1769,8 +1804,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
         # _wrap_agged_manager() returns. GH 35028
         with com.temp_setattr(self, "observed", True):
             result = self._wrap_agged_manager(new_mgr)
-            if result.ndim == 1:
-                result.index = self.grouper.result_index
+
+        if result.ndim == 1:
+            result.index = self.grouper.result_index
 
         return self._reindex_output(result, fill_value=0)
 
@@ -2709,18 +2745,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
         res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
         if not is_ser and len(res_mgr.items) != len(mgr.items):
-            warnings.warn(
-                "Dropping invalid columns in "
-                f"{type(self).__name__}.quantile is deprecated. "
-                "In a future version, a TypeError will be raised. "
-                "Before calling .quantile, select only columns which "
-                "should be valid for the function.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), "quantile")
+
             if len(res_mgr.items) == 0:
                 # re-call grouped_reduce to get the desired exception message
                 mgr.grouped_reduce(blk_func, ignore_failures=False)
+                # grouped_reduce _should_ raise, so this should not be reached
+                raise TypeError(  # pragma: no cover
+                    "All columns were dropped in grouped_reduce"
+                )
 
         if is_ser:
             res = self._wrap_agged_manager(res_mgr)
@@ -3150,30 +3183,20 @@ def blk_func(values: ArrayLike) -> ArrayLike:
 
         if not is_ser and len(res_mgr.items) != len(mgr.items):
             howstr = how.replace("group_", "")
-            warnings.warn(
-                "Dropping invalid columns in "
-                f"{type(self).__name__}.{howstr} is deprecated. "
-                "In a future version, a TypeError will be raised. "
-                f"Before calling .{howstr}, select only columns which "
-                "should be valid for the function.",
-                FutureWarning,
-                stacklevel=3,
-            )
+            warn_dropping_nuisance_columns_deprecated(type(self), howstr)
+
             if len(res_mgr.items) == 0:
                 # We re-call grouped_reduce to get the right exception message
-                try:
-                    mgr.grouped_reduce(blk_func, ignore_failures=False)
-                except Exception as err:
-                    error_msg = str(err)
-                    raise TypeError(error_msg)
-                # We should never get here
-                raise TypeError("All columns were dropped in grouped_reduce")
+                mgr.grouped_reduce(blk_func, ignore_failures=False)
+                # grouped_reduce _should_ raise, so this should not be reached
+                raise TypeError(  # pragma: no cover
+                    "All columns were dropped in grouped_reduce"
+                )
 
         if is_ser:
             out = self._wrap_agged_manager(res_mgr)
-            out.index = self.grouper.result_index
         else:
-            out = type(obj)(res_mgr)
+            out = obj._constructor(res_mgr)
 
         return self._wrap_aggregated_output(out)
 
@@ -3627,3 +3650,15 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
     else:
         mi = MultiIndex.from_product([idx, qs])
     return mi
+
+
+def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
+    warnings.warn(
+        "Dropping invalid columns in "
+        f"{cls.__name__}.{how} is deprecated. "
+        "In a future version, a TypeError will be raised. "
+        f"Before calling .{how}, select only columns which "
+        "should be valid for the function.",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )