diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 19fba398feb08..67188d91bca70 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -154,9 +154,6 @@ def _get_data_to_aggregate( ) return single - def _iterate_slices(self) -> Iterable[Series]: - yield self._selected_obj - _agg_examples_doc = dedent( """ Examples @@ -408,7 +405,9 @@ def _aggregate_named(self, func, *args, **kwargs): result = {} initialized = False - for name, group in self: + for name, group in self.grouper.get_iterator( + self._selected_obj, axis=self.axis + ): object.__setattr__(group, "name", name) output = func(group, *args, **kwargs) @@ -568,7 +567,11 @@ def true_and_notna(x) -> bool: try: indices = [ - self._get_index(name) for name, group in self if true_and_notna(group) + self._get_index(name) + for name, group in self.grouper.get_iterator( + self._selected_obj, axis=self.axis + ) + if true_and_notna(group) ] except (ValueError, TypeError) as err: raise TypeError("the filter must return a boolean result") from err @@ -1850,29 +1853,33 @@ def _indexed_output_to_ndframe( def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: return self.obj._constructor(mgr) - def _iterate_column_groupbys(self, obj: DataFrame): - for i, colname in enumerate(obj.columns): - yield colname, SeriesGroupBy( + def _apply_to_column_groupbys(self, func) -> DataFrame: + from pandas.core.reshape.concat import concat + + obj = self._obj_with_exclusions + columns = obj.columns + sgbs = [ + SeriesGroupBy( obj.iloc[:, i], selection=colname, grouper=self.grouper, exclusions=self.exclusions, observed=self.observed, ) - - def _apply_to_column_groupbys(self, func, obj: DataFrame) -> DataFrame: - from pandas.core.reshape.concat import concat - - columns = obj.columns - results = [ - func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj) + for i, colname in enumerate(obj.columns) ] + results = [func(sgb) for sgb in sgbs] if not len(results): # concat would raise - return DataFrame([], columns=columns, index=self.grouper.result_index) + res_df = DataFrame([], columns=columns, index=self.grouper.result_index) else: - return concat(results, keys=columns, axis=1) + res_df = concat(results, keys=columns, axis=1) + + if not self.as_index: + res_df.index = default_index(len(res_df)) + res_df = self._insert_inaxis_grouper(res_df) + return res_df def nunique(self, dropna: bool = True) -> DataFrame: """ @@ -1925,16 +1932,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: lambda sgb: sgb.nunique(dropna), self._obj_with_exclusions, is_agg=True ) - obj = self._obj_with_exclusions - results = self._apply_to_column_groupbys( - lambda sgb: sgb.nunique(dropna), obj=obj - ) - - if not self.as_index: - results.index = default_index(len(results)) - results = self._insert_inaxis_grouper(results) - - return results + return self._apply_to_column_groupbys(lambda sgb: sgb.nunique(dropna)) def idxmax( self, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dee68c01587b1..43354fc589760 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -19,7 +19,6 @@ class providing the base-class of operations. TYPE_CHECKING, Callable, Hashable, - Iterable, Iterator, List, Literal, @@ -990,12 +989,6 @@ def curried(x): result = self._set_result_index_ordered(result) return result - # ----------------------------------------------------------------- - # Selection - - def _iterate_slices(self) -> Iterable[Series]: - raise AbstractMethodError(self) - # ----------------------------------------------------------------- # Dispatch/Wrapping @@ -1398,7 +1391,7 @@ def _python_apply_general( Series or DataFrame data after applying f """ - values, mutated = self.grouper.apply(f, data, self.axis) + values, mutated = self.grouper.apply_groupwise(f, data, self.axis) if not_indexed_same is None: not_indexed_same = mutated @@ -2459,7 +2452,6 @@ def ohlc(self) -> DataFrame: Open, high, low and close values within each group. """ if self.obj.ndim == 1: - # self._iterate_slices() yields only self._selected_obj obj = self._selected_obj is_numeric = is_numeric_dtype(obj.dtype) @@ -2476,12 +2468,7 @@ def ohlc(self) -> DataFrame: ) return self._reindex_output(result) - result = self._apply_to_column_groupbys( - lambda x: x.ohlc(), self._obj_with_exclusions - ) - if not self.as_index: - result = self._insert_inaxis_grouper(result) - result.index = default_index(len(result)) + result = self._apply_to_column_groupbys(lambda sgb: sgb.ohlc()) return result @doc(DataFrame.describe) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 52b8301554c96..726d75d705344 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -748,7 +748,7 @@ def group_keys_seq(self): return get_flattened_list(ids, ngroups, self.levels, self.codes) @final - def apply( + def apply_groupwise( self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0 ) -> tuple[list, bool]: mutated = False