diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 28c44ff64c3e1..d967558b93954 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -26,10 +26,7 @@ import numpy as np -from pandas._libs import ( - lib, - reduction as libreduction, -) +from pandas._libs import reduction as libreduction from pandas._typing import ( ArrayLike, FrameOrSeries, @@ -161,6 +158,19 @@ def pinner(cls): class SeriesGroupBy(GroupBy[Series]): _apply_allowlist = base.series_apply_allowlist + def _wrap_agged_manager(self, mgr: Manager2D) -> Series: + single = mgr.iget(0) + ser = self.obj._constructor(single, name=self.obj.name) + ser.index = self.grouper.result_index + return ser + + def _get_data_to_aggregate(self) -> Manager2D: + obj = self._obj_with_exclusions + df = obj.to_frame() + df.columns = [obj.name] # in case name is None, we need to overwrite [0] + + return df._mgr + def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj @@ -768,30 +778,6 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: out = ensure_int64(out) return self.obj._constructor(out, index=mi, name=self.obj.name) - def count(self) -> Series: - """ - Compute count of group, excluding missing values. - - Returns - ------- - Series - Count of values within each group. - """ - ids, _, ngroups = self.grouper.group_info - val = self.obj._values - - mask = (ids != -1) & ~isna(val) - minlength = ngroups or 0 - out = np.bincount(ids[mask], minlength=minlength) - - result = self.obj._constructor( - out, - index=self.grouper.result_index, - name=self.obj.name, - dtype="int64", - ) - return self._reindex_output(result, fill_value=0) - @doc(Series.nlargest) def nlargest(self, n: int = 5, keep: str = "first"): f = partial(Series.nlargest, n=n, keep=keep) @@ -1583,40 +1569,6 @@ def _apply_to_column_groupbys(self, func, obj: FrameOrSeries) -> DataFrame: else: return concat(results, keys=columns, axis=1) - def count(self) -> DataFrame: - """ - Compute count of group, excluding missing values. - - Returns - ------- - DataFrame - Count of values within each group. - """ - data = self._get_data_to_aggregate() - ids, _, ngroups = self.grouper.group_info - mask = ids != -1 - - def hfunc(bvalues: ArrayLike) -> ArrayLike: - # TODO(2DEA): reshape would not be necessary with 2D EAs - if bvalues.ndim == 1: - # EA - masked = mask & ~isna(bvalues).reshape(1, -1) - else: - masked = mask & ~isna(bvalues) - - counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1) - return counted - - new_mgr = data.grouped_reduce(hfunc) - - # If we are grouping on categoricals we want unobserved categories to - # return zero, rather than the default of NaN which the reindexing in - # _wrap_agged_manager() returns. GH 35028 - with com.temp_setattr(self, "observed", True): - result = self._wrap_agged_manager(new_mgr) - - return self._reindex_output(result, fill_value=0) - def nunique(self, dropna: bool = True) -> DataFrame: """ Return DataFrame with counts of unique elements in each position. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a969b9d0ee160..2d2287a351030 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1720,9 +1720,10 @@ def all(self, skipna: bool = True): """ return self._bool_agg("all", skipna) + @final @Substitution(name="groupby") @Appender(_common_see_also) - def count(self): + def count(self) -> Series | DataFrame: """ Compute count of group, excluding missing values. @@ -1731,8 +1732,30 @@ def count(self): Series or DataFrame Count of values within each group. """ - # defined here for API doc - raise NotImplementedError + data = self._get_data_to_aggregate() + ids, _, ngroups = self.grouper.group_info + mask = ids != -1 + + def hfunc(bvalues: ArrayLike) -> ArrayLike: + # TODO(2DEA): reshape would not be necessary with 2D EAs + if bvalues.ndim == 1: + # EA + masked = mask & ~isna(bvalues).reshape(1, -1) + else: + masked = mask & ~isna(bvalues) + + counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1) + return counted + + new_mgr = data.grouped_reduce(hfunc) + + # If we are grouping on categoricals we want unobserved categories to + # return zero, rather than the default of NaN which the reindexing in + # _wrap_agged_manager() returns. GH 35028 + with com.temp_setattr(self, "observed", True): + result = self._wrap_agged_manager(new_mgr) + + return self._reindex_output(result, fill_value=0) @final @Substitution(name="groupby")