diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 815f9936057f4..aec2037d044b8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -380,10 +380,16 @@ def _wrap_applied_output( """ if len(values) == 0: # GH #6265 + if is_transform: + # GH#47787 see test_group_on_empty_multiindex + res_index = data.index + else: + res_index = self.grouper.result_index + return self.obj._constructor( [], name=self.obj.name, - index=self.grouper.result_index, + index=res_index, dtype=data.dtype, ) assert values is not None @@ -1136,14 +1142,12 @@ def cov( @property @doc(Series.is_monotonic_increasing.__doc__) def is_monotonic_increasing(self) -> Series: - result = self._op_via_apply("is_monotonic_increasing") - return result + return self.apply(lambda ser: ser.is_monotonic_increasing) @property @doc(Series.is_monotonic_decreasing.__doc__) def is_monotonic_decreasing(self) -> Series: - result = self._op_via_apply("is_monotonic_decreasing") - return result + return self.apply(lambda ser: ser.is_monotonic_decreasing) @doc(Series.hist.__doc__) def hist( @@ -1181,8 +1185,7 @@ def hist( @property @doc(Series.dtype.__doc__) def dtype(self) -> Series: - result = self._op_via_apply("dtype") - return result + return self.apply(lambda ser: ser.dtype) @doc(Series.unique.__doc__) def unique(self) -> Series: @@ -1428,9 +1431,13 @@ def _wrap_applied_output( ): if len(values) == 0: - result = self.obj._constructor( - index=self.grouper.result_index, columns=data.columns - ) + if is_transform: + # GH#47787 see test_group_on_empty_multiindex + res_index = data.index + else: + res_index = self.grouper.result_index + + result = self.obj._constructor(index=res_index, columns=data.columns) result = result.astype(data.dtypes, copy=False) return result @@ -1719,18 +1726,11 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: # iterate through columns, see test_transform_exclude_nuisance # gets here with non-unique columns output = {} - inds = [] for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)): output[i] = sgb.transform(wrapper) - inds.append(i) - - if not output: - raise TypeError("Transform function invalid for data types") - - columns = obj.columns.take(inds) result = self.obj._constructor(output, index=obj.index) - result.columns = columns + result.columns = obj.columns return result def filter(self, func, dropna: bool = True, *args, **kwargs): @@ -2677,8 +2677,8 @@ def hist( @property @doc(DataFrame.dtypes.__doc__) def dtypes(self) -> Series: - result = self._op_via_apply("dtypes") - return result + # error: Incompatible return value type (got "DataFrame", expected "Series") + return self.apply(lambda df: df.dtypes) # type: ignore[return-value] @doc(DataFrame.corrwith.__doc__) def corrwith( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fd9a06a06cfa7..be22b05cbe1ab 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -956,9 +956,6 @@ def __getattr__(self, attr: str): def _op_via_apply(self, name: str, *args, **kwargs): """Compute the result of an operation by using GroupBy's apply.""" f = getattr(type(self._obj_with_exclusions), name) - if not callable(f): - return self.apply(lambda self: getattr(self, name)) - sig = inspect.signature(f) # a little trickery for aggregation functions that need an axis @@ -980,9 +977,6 @@ def curried(x): return self.apply(curried) is_transform = name in base.transformation_kernels - # Transform needs to keep the same schema, including when empty - if is_transform and self._obj_with_exclusions.empty: - return self._obj_with_exclusions result = self._python_apply_general( curried, self._obj_with_exclusions, @@ -1105,6 +1099,7 @@ def _set_result_index_ordered( return result + @final def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame: if isinstance(result, Series): result = result.to_frame() @@ -1131,7 +1126,7 @@ def _indexed_output_to_ndframe( @final def _wrap_aggregated_output( self, - output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike], + result: Series | DataFrame, qs: npt.NDArray[np.float64] | None = None, ): """ @@ -1139,22 +1134,14 @@ def _wrap_aggregated_output( Parameters ---------- - output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike] - Data to wrap. + result : Series, DataFrame Returns ------- Series or DataFrame """ - - if isinstance(output, (Series, DataFrame)): - # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce, - # in which case our columns are already set correctly. - # ATM we do not get here for SeriesGroupBy; when we do, we will - # need to require that result.name already match self.obj.name - result = output - else: - result = self._indexed_output_to_ndframe(output) + # ATM we do not get here for SeriesGroupBy; when we do, we will + # need to require that result.name already match self.obj.name if not self.as_index: # `not self.as_index` is only relevant for DataFrameGroupBy, @@ -1183,36 +1170,6 @@ def _wrap_aggregated_output( return self._reindex_output(result, qs=qs) - @final - def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, ArrayLike] - ) -> Series | DataFrame: - """ - Wraps the output of GroupBy transformations into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, ArrayLike] - Data to wrap. - - Returns - ------- - Series or DataFrame - Series for SeriesGroupBy, DataFrame for DataFrameGroupBy - """ - if isinstance(output, (Series, DataFrame)): - result = output - else: - result = self._indexed_output_to_ndframe(output) - - if self.axis == 1: - # Only relevant for DataFrameGroupBy - result = result.T - result.columns = self.obj.columns - - result.index = self.obj.index - return result - def _wrap_applied_output( self, data, @@ -1456,7 +1413,8 @@ def _python_agg_general(self, func, *args, **kwargs): output: dict[base.OutputKey, ArrayLike] = {} if self.ngroups == 0: - # agg_series below assumes ngroups > 0 + # e.g. test_evaluate_with_empty_groups different path gets different + # result dtype in empty case. return self._python_apply_general(f, self._selected_obj, is_agg=True) for idx, obj in enumerate(self._iterate_slices()): @@ -1466,9 +1424,11 @@ def _python_agg_general(self, func, *args, **kwargs): output[key] = result if not output: + # e.g. test_groupby_crash_on_nunique, test_margins_no_values_no_cols return self._python_apply_general(f, self._selected_obj) - return self._wrap_aggregated_output(output) + res = self._indexed_output_to_ndframe(output) + return self._wrap_aggregated_output(res) @final def _agg_general( @@ -1850,6 +1810,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: # If we are grouping on categoricals we want unobserved categories to # return zero, rather than the default of NaN which the reindexing in # _wrap_agged_manager() returns. GH 35028 + # e.g. test_dataframe_groupby_on_2_categoricals_when_observed_is_false with com.temp_setattr(self, "observed", True): result = self._wrap_agged_manager(new_mgr) @@ -2577,6 +2538,7 @@ def ohlc(self) -> DataFrame: ) return self._reindex_output(result) + # TODO: 2023-02-05 all tests that get here have self.as_index return self._apply_to_column_groupbys( lambda x: x.ohlc(), self._obj_with_exclusions ) @@ -2854,7 +2816,13 @@ def blk_func(values: ArrayLike) -> ArrayLike: if isinstance(new_obj, Series): new_obj.name = obj.name - return self._wrap_transformed_output(new_obj) + if self.axis == 1: + # Only relevant for DataFrameGroupBy + new_obj = new_obj.T + new_obj.columns = self.obj.columns + + new_obj.index = self.obj.index + return new_obj @final @Substitution(name="groupby") diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index bff61ec135d74..14ca9066dae77 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1028,7 +1028,6 @@ def _aggregate_series_pure_python( ) -> npt.NDArray[np.object_]: ids, _, ngroups = self.group_info - counts = np.zeros(ngroups, dtype=int) result = np.empty(ngroups, dtype="O") initialized = False @@ -1044,7 +1043,6 @@ def _aggregate_series_pure_python( libreduction.check_result_array(res, group.dtype) initialized = True - counts[i] = group.shape[0] result[i] = res return result diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0cb2765b439bc..01f0ddd1627c7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -435,6 +435,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): try: if isinstance(obj, ABCDataFrame) and callable(how): # Check if the function is reducing or not. + # e.g. test_resample_apply_with_additional_args result = grouped._aggregate_item_by_item(how, *args, **kwargs) else: result = grouped.aggregate(how, *args, **kwargs)