diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f0408db7f4ef8..b1f7f56aaad8c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -48,7 +48,6 @@ is_dict_like, is_integer_dtype, is_interval_dtype, - is_numeric_dtype, is_scalar, ) from pandas.core.dtypes.missing import ( @@ -78,6 +77,7 @@ _apply_docs, _transform_template, group_selection_context, + warn_dropping_nuisance_columns_deprecated, ) from pandas.core.indexes.api import ( Index, @@ -334,43 +334,6 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: output = self._reindex_output(output) return output - def _cython_agg_general( - self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 - ): - - obj = self._selected_obj - objvals = obj._values - data = obj._mgr - - if numeric_only and not is_numeric_dtype(obj.dtype): - # GH#41291 match Series behavior - raise NotImplementedError( - f"{type(self).__name__}.{how} does not implement numeric_only." - ) - - # This is overkill because it is only called once, but is here to - # mirror the array_func used in DataFrameGroupBy._cython_agg_general - def array_func(values: ArrayLike) -> ArrayLike: - try: - result = self.grouper._cython_operation( - "aggregate", values, how, axis=data.ndim - 1, min_count=min_count - ) - except NotImplementedError: - # generally if we have numeric_only=False - # and non-applicable functions - # try to python agg - # TODO: shouldn't min_count matter? - result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) - - return result - - result = array_func(objvals) - - ser = self.obj._constructor( - result, index=self.grouper.result_index, name=obj.name - ) - return self._reindex_output(ser) - def _indexed_output_to_ndframe( self, output: Mapping[base.OutputKey, ArrayLike] ) -> Series: @@ -970,46 +933,6 @@ def _iterate_slices(self) -> Iterable[Series]: yield values - def _cython_agg_general( - self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 - ) -> DataFrame: - # Note: we never get here with how="ohlc"; that goes through SeriesGroupBy - - data: Manager2D = self._get_data_to_aggregate() - - if numeric_only: - data = data.get_numeric_data(copy=False) - - def array_func(values: ArrayLike) -> ArrayLike: - try: - result = self.grouper._cython_operation( - "aggregate", values, how, axis=data.ndim - 1, min_count=min_count - ) - except NotImplementedError: - # generally if we have numeric_only=False - # and non-applicable functions - # try to python agg - # TODO: shouldn't min_count matter? - result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) - - return result - - # TypeError -> we may have an exception in trying to aggregate - # continue and exclude the block - new_mgr = data.grouped_reduce(array_func, ignore_failures=True) - - if len(new_mgr) < len(data): - warnings.warn( - f"Dropping invalid columns in {type(self).__name__}.{how} " - "is deprecated. In a future version, a TypeError will be raised. " - f"Before calling .{how}, select only columns which should be " - "valid for the function.", - FutureWarning, - stacklevel=4, - ) - - return self._wrap_agged_manager(new_mgr) - def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: if self.grouper.nkeys != 1: raise AssertionError("Number of keys must be 1") @@ -1195,14 +1118,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr.set_axis(1, mgr.axes[1]) if len(res_mgr) < len(mgr): - warnings.warn( - f"Dropping invalid columns in {type(self).__name__}.{how} " - "is deprecated. In a future version, a TypeError will be raised. " - f"Before calling .{how}, select only columns which should be " - "valid for the transforming function.", - FutureWarning, - stacklevel=4, - ) + warn_dropping_nuisance_columns_deprecated(type(self), how) res_df = self.obj._constructor(res_mgr) if self.axis == 1: @@ -1314,14 +1230,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: output[i] = sgb.transform(wrapper) except TypeError: # e.g. trying to call nanmean with string values - warnings.warn( - f"Dropping invalid columns in {type(self).__name__}.transform " - "is deprecated. In a future version, a TypeError will be raised. " - "Before calling .transform, select only columns which should be " - "valid for the transforming function.", - FutureWarning, - stacklevel=5, - ) + warn_dropping_nuisance_columns_deprecated(type(self), "transform") else: inds.append(i) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b41935902b9cf..3720da80ad34d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1422,14 +1422,7 @@ def _python_agg_general(self, func, *args, **kwargs): # if this function is invalid for this dtype, we will ignore it. result = self.grouper.agg_series(obj, f) except TypeError: - warnings.warn( - f"Dropping invalid columns in {type(self).__name__}.agg " - "is deprecated. In a future version, a TypeError will be raised. " - "Before calling .agg, select only columns which should be " - "valid for the aggregating function.", - FutureWarning, - stacklevel=3, - ) + warn_dropping_nuisance_columns_deprecated(type(self), "agg") continue key = base.OutputKey(label=name, position=idx) @@ -1500,10 +1493,52 @@ def _agg_py_fallback( # test_groupby_duplicate_columns with object dtype values return ensure_block_shape(res_values, ndim=ndim) + @final def _cython_agg_general( self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 ): - raise AbstractMethodError(self) + # Note: we never get here with how="ohlc" for DataFrameGroupBy; + # that goes through SeriesGroupBy + + data = self._get_data_to_aggregate() + is_ser = data.ndim == 1 + + if numeric_only: + if is_ser and not is_numeric_dtype(self._selected_obj.dtype): + # GH#41291 match Series behavior + raise NotImplementedError( + f"{type(self).__name__}.{how} does not implement numeric_only." + ) + elif not is_ser: + data = data.get_numeric_data(copy=False) + + def array_func(values: ArrayLike) -> ArrayLike: + try: + result = self.grouper._cython_operation( + "aggregate", values, how, axis=data.ndim - 1, min_count=min_count + ) + except NotImplementedError: + # generally if we have numeric_only=False + # and non-applicable functions + # try to python agg + # TODO: shouldn't min_count matter? + result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) + + return result + + # TypeError -> we may have an exception in trying to aggregate + # continue and exclude the block + new_mgr = data.grouped_reduce(array_func, ignore_failures=True) + + if not is_ser and len(new_mgr) < len(data): + warn_dropping_nuisance_columns_deprecated(type(self), how) + + res = self._wrap_agged_manager(new_mgr) + if is_ser: + res.index = self.grouper.result_index + return self._reindex_output(res) + else: + return res def _cython_transform( self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs @@ -1769,8 +1804,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: # _wrap_agged_manager() returns. GH 35028 with com.temp_setattr(self, "observed", True): result = self._wrap_agged_manager(new_mgr) - if result.ndim == 1: - result.index = self.grouper.result_index + + if result.ndim == 1: + result.index = self.grouper.result_index return self._reindex_output(result, fill_value=0) @@ -2709,18 +2745,15 @@ def blk_func(values: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) if not is_ser and len(res_mgr.items) != len(mgr.items): - warnings.warn( - "Dropping invalid columns in " - f"{type(self).__name__}.quantile is deprecated. " - "In a future version, a TypeError will be raised. " - "Before calling .quantile, select only columns which " - "should be valid for the function.", - FutureWarning, - stacklevel=find_stack_level(), - ) + warn_dropping_nuisance_columns_deprecated(type(self), "quantile") + if len(res_mgr.items) == 0: # re-call grouped_reduce to get the desired exception message mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) if is_ser: res = self._wrap_agged_manager(res_mgr) @@ -3150,30 +3183,20 @@ def blk_func(values: ArrayLike) -> ArrayLike: if not is_ser and len(res_mgr.items) != len(mgr.items): howstr = how.replace("group_", "") - warnings.warn( - "Dropping invalid columns in " - f"{type(self).__name__}.{howstr} is deprecated. " - "In a future version, a TypeError will be raised. " - f"Before calling .{howstr}, select only columns which " - "should be valid for the function.", - FutureWarning, - stacklevel=3, - ) + warn_dropping_nuisance_columns_deprecated(type(self), howstr) + if len(res_mgr.items) == 0: # We re-call grouped_reduce to get the right exception message - try: - mgr.grouped_reduce(blk_func, ignore_failures=False) - except Exception as err: - error_msg = str(err) - raise TypeError(error_msg) - # We should never get here - raise TypeError("All columns were dropped in grouped_reduce") + mgr.grouped_reduce(blk_func, ignore_failures=False) + # grouped_reduce _should_ raise, so this should not be reached + raise TypeError( # pragma: no cover + "All columns were dropped in grouped_reduce" + ) if is_ser: out = self._wrap_agged_manager(res_mgr) - out.index = self.grouper.result_index else: - out = type(obj)(res_mgr) + out = obj._constructor(res_mgr) return self._wrap_aggregated_output(out) @@ -3627,3 +3650,15 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde else: mi = MultiIndex.from_product([idx, qs]) return mi + + +def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None: + warnings.warn( + "Dropping invalid columns in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + )