From c2fae931e58ee5433ab3997133d60025327419a2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 4 Feb 2023 20:02:53 -0800
Subject: [PATCH 1/6] REF: avoid handling corner cases in op_via_apply

---
 pandas/core/groupby/generic.py | 30 ++++++++++++++++++------------
 pandas/core/groupby/groupby.py |  6 ------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 815f9936057f4..0501ead7c7928 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -380,10 +380,16 @@ def _wrap_applied_output(
         """
         if len(values) == 0:
             # GH #6265
+            if is_transform:
+                # GH#47787 see test_group_on_empty_multiindex
+                res_index = data.index
+            else:
+                res_index = self.grouper.result_index
+
             return self.obj._constructor(
                 [],
                 name=self.obj.name,
-                index=self.grouper.result_index,
+                index=res_index,
                 dtype=data.dtype,
             )
         assert values is not None
@@ -1136,14 +1142,12 @@ def cov(
     @property
     @doc(Series.is_monotonic_increasing.__doc__)
     def is_monotonic_increasing(self) -> Series:
-        result = self._op_via_apply("is_monotonic_increasing")
-        return result
+        return self.apply(lambda ser: ser.is_monotonic_increasing)
 
     @property
     @doc(Series.is_monotonic_decreasing.__doc__)
     def is_monotonic_decreasing(self) -> Series:
-        result = self._op_via_apply("is_monotonic_decreasing")
-        return result
+        return self.apply(lambda ser: ser.is_monotonic_decreasing)
 
     @doc(Series.hist.__doc__)
     def hist(
@@ -1181,8 +1185,7 @@ def hist(
     @property
     @doc(Series.dtype.__doc__)
     def dtype(self) -> Series:
-        result = self._op_via_apply("dtype")
-        return result
+        return self.apply(lambda ser: ser.dtype)
 
     @doc(Series.unique.__doc__)
     def unique(self) -> Series:
@@ -1428,9 +1431,13 @@ def _wrap_applied_output(
     ):
 
         if len(values) == 0:
-            result = self.obj._constructor(
-                index=self.grouper.result_index, columns=data.columns
-            )
+            if is_transform:
+                # GH#47787 see test_group_on_empty_multiindex
+                res_index = data.index
+            else:
+                res_index = self.grouper.result_index
+
+            result = self.obj._constructor(index=res_index, columns=data.columns)
             result = result.astype(data.dtypes, copy=False)
             return result
 
@@ -2677,8 +2684,7 @@ def hist(
     @property
     @doc(DataFrame.dtypes.__doc__)
     def dtypes(self) -> Series:
-        result = self._op_via_apply("dtypes")
-        return result
+        return self.apply(lambda df: df.dtypes)
 
     @doc(DataFrame.corrwith.__doc__)
     def corrwith(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index fd9a06a06cfa7..d4c2013cc578e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -956,9 +956,6 @@ def __getattr__(self, attr: str):
     def _op_via_apply(self, name: str, *args, **kwargs):
         """Compute the result of an operation by using GroupBy's apply."""
         f = getattr(type(self._obj_with_exclusions), name)
-        if not callable(f):
-            return self.apply(lambda self: getattr(self, name))
-
         sig = inspect.signature(f)
 
         # a little trickery for aggregation functions that need an axis
@@ -980,9 +977,6 @@ def curried(x):
             return self.apply(curried)
 
         is_transform = name in base.transformation_kernels
-        # Transform needs to keep the same schema, including when empty
-        if is_transform and self._obj_with_exclusions.empty:
-            return self._obj_with_exclusions
         result = self._python_apply_general(
             curried,
             self._obj_with_exclusions,

From 0ee4baea8946c5c3066e804e9ba079020a046d50 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 5 Feb 2023 11:48:02 -0800
Subject: [PATCH 2/6] simplify _wrap_aggregated_output

---
 pandas/core/groupby/groupby.py | 24 ++++++++++--------------
 pandas/core/groupby/ops.py     |  2 --
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d4c2013cc578e..c5923715b2114 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1125,7 +1125,7 @@ def _indexed_output_to_ndframe(
     @final
     def _wrap_aggregated_output(
         self,
-        output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike],
+        result: Series | DataFrame,
         qs: npt.NDArray[np.float64] | None = None,
     ):
         """
@@ -1133,22 +1133,14 @@ def _wrap_aggregated_output(
 
         Parameters
         ----------
-        output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike]
-           Data to wrap.
+        result : Series, DataFrame
 
         Returns
         -------
         Series or DataFrame
         """
-
-        if isinstance(output, (Series, DataFrame)):
-            # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce,
-            #  in which case our columns are already set correctly.
-            # ATM we do not get here for SeriesGroupBy; when we do, we will
-            #  need to require that result.name already match self.obj.name
-            result = output
-        else:
-            result = self._indexed_output_to_ndframe(output)
+        # ATM we do not get here for SeriesGroupBy; when we do, we will
+        #  need to require that result.name already match self.obj.name
 
         if not self.as_index:
             # `not self.as_index` is only relevant for DataFrameGroupBy,
@@ -1450,7 +1442,8 @@ def _python_agg_general(self, func, *args, **kwargs):
         output: dict[base.OutputKey, ArrayLike] = {}
 
         if self.ngroups == 0:
-            # agg_series below assumes ngroups > 0
+            # e.g. test_evaluate_with_empty_groups different path gets different
+            #  result dtype in empty case.
             return self._python_apply_general(f, self._selected_obj, is_agg=True)
 
         for idx, obj in enumerate(self._iterate_slices()):
@@ -1460,9 +1453,11 @@ def _python_agg_general(self, func, *args, **kwargs):
             output[key] = result
 
         if not output:
+            # e.g. test_groupby_crash_on_nunique, test_margins_no_values_no_cols
             return self._python_apply_general(f, self._selected_obj)
 
-        return self._wrap_aggregated_output(output)
+        result = self._indexed_output_to_ndframe(output)
+        return self._wrap_aggregated_output(result)
 
     @final
     def _agg_general(
@@ -2571,6 +2566,7 @@ def ohlc(self) -> DataFrame:
             )
             return self._reindex_output(result)
 
+        # TODO: 2023-02-05 all tests that get here have self.as_index
         return self._apply_to_column_groupbys(
             lambda x: x.ohlc(), self._obj_with_exclusions
         )
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index bff61ec135d74..14ca9066dae77 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -1028,7 +1028,6 @@ def _aggregate_series_pure_python(
     ) -> npt.NDArray[np.object_]:
         ids, _, ngroups = self.group_info
 
-        counts = np.zeros(ngroups, dtype=int)
         result = np.empty(ngroups, dtype="O")
         initialized = False
 
@@ -1044,7 +1043,6 @@ def _aggregate_series_pure_python(
                 libreduction.check_result_array(res, group.dtype)
                 initialized = True
 
-            counts[i] = group.shape[0]
             result[i] = res
 
         return result

From 1b9a2891f5f7e22a8775d0eaaac08dae84ee96cd Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 5 Feb 2023 16:17:42 -0800
Subject: [PATCH 3/6] REF: remove _wrap_transformed_output

---
 pandas/core/groupby/generic.py |  7 +-----
 pandas/core/groupby/groupby.py | 39 +++++++---------------------------
 pandas/core/resample.py        |  1 +
 3 files changed, 10 insertions(+), 37 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0501ead7c7928..fe21e27bbce5c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1731,13 +1731,8 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
             output[i] = sgb.transform(wrapper)
             inds.append(i)
 
-        if not output:
-            raise TypeError("Transform function invalid for data types")
-
-        columns = obj.columns.take(inds)
-
         result = self.obj._constructor(output, index=obj.index)
-        result.columns = columns
+        result.columns = obj.columns
         return result
 
     def filter(self, func, dropna: bool = True, *args, **kwargs):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c5923715b2114..d9bb2844d798d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1169,36 +1169,6 @@ def _wrap_aggregated_output(
 
         return self._reindex_output(result, qs=qs)
 
-    @final
-    def _wrap_transformed_output(
-        self, output: Mapping[base.OutputKey, ArrayLike]
-    ) -> Series | DataFrame:
-        """
-        Wraps the output of GroupBy transformations into the expected result.
-
-        Parameters
-        ----------
-        output : Mapping[base.OutputKey, ArrayLike]
-            Data to wrap.
-
-        Returns
-        -------
-        Series or DataFrame
-            Series for SeriesGroupBy, DataFrame for DataFrameGroupBy
-        """
-        if isinstance(output, (Series, DataFrame)):
-            result = output
-        else:
-            result = self._indexed_output_to_ndframe(output)
-
-        if self.axis == 1:
-            # Only relevant for DataFrameGroupBy
-            result = result.T
-            result.columns = self.obj.columns
-
-        result.index = self.obj.index
-        return result
-
     def _wrap_applied_output(
         self,
         data,
@@ -1839,6 +1809,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
         # If we are grouping on categoricals we want unobserved categories to
         # return zero, rather than the default of NaN which the reindexing in
         # _wrap_agged_manager() returns. GH 35028
+        # e.g. test_dataframe_groupby_on_2_categoricals_when_observed_is_false
         with com.temp_setattr(self, "observed", True):
             result = self._wrap_agged_manager(new_mgr)
 
@@ -2844,7 +2815,13 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         if isinstance(new_obj, Series):
             new_obj.name = obj.name
 
-        return self._wrap_transformed_output(new_obj)
+        if self.axis == 1:
+            # Only relevant for DataFrameGroupBy
+            new_obj = new_obj.T
+            new_obj.columns = self.obj.columns
+
+        new_obj.index = self.obj.index
+        return new_obj
 
     @final
     @Substitution(name="groupby")
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 0cb2765b439bc..01f0ddd1627c7 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -435,6 +435,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
         try:
             if isinstance(obj, ABCDataFrame) and callable(how):
                 # Check if the function is reducing or not.
+                # e.g. test_resample_apply_with_additional_args
                 result = grouped._aggregate_item_by_item(how, *args, **kwargs)
             else:
                 result = grouped.aggregate(how, *args, **kwargs)

From fca5cc354b8578d4394904b92bd178b87ade9737 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 5 Feb 2023 16:41:39 -0800
Subject: [PATCH 4/6] final

---
 pandas/core/groupby/groupby.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d9bb2844d798d..04a0840f8adb4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1099,6 +1099,7 @@ def _set_result_index_ordered(
 
         return result
 
+    @final
     def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
         if isinstance(result, Series):
             result = result.to_frame()

From cb499e9e450fd18dfc2b661e9f6aaab5580d8513 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 5 Feb 2023 18:41:45 -0800
Subject: [PATCH 5/6] mypy fixup

---
 pandas/core/groupby/generic.py | 3 ++-
 pandas/core/groupby/groupby.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index fe21e27bbce5c..20a1cb6e5ee3c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -2679,7 +2679,8 @@ def hist(
     @property
     @doc(DataFrame.dtypes.__doc__)
     def dtypes(self) -> Series:
-        return self.apply(lambda df: df.dtypes)
+        # error: Incompatible return value type (got "DataFrame", expected "Series")
+        return self.apply(lambda df: df.dtypes)  # type: ignore[return-value]
 
     @doc(DataFrame.corrwith.__doc__)
     def corrwith(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 04a0840f8adb4..be22b05cbe1ab 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1427,8 +1427,8 @@ def _python_agg_general(self, func, *args, **kwargs):
             # e.g. test_groupby_crash_on_nunique, test_margins_no_values_no_cols
             return self._python_apply_general(f, self._selected_obj)
 
-        result = self._indexed_output_to_ndframe(output)
-        return self._wrap_aggregated_output(result)
+        res = self._indexed_output_to_ndframe(output)
+        return self._wrap_aggregated_output(res)
 
     @final
     def _agg_general(

From 5c9d6ec0490191783421e2a87fa6d1a663d51873 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 5 Feb 2023 19:00:06 -0800
Subject: [PATCH 6/6] remove unnecessary

---
 pandas/core/groupby/generic.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 20a1cb6e5ee3c..aec2037d044b8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1726,10 +1726,8 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
         # iterate through columns, see test_transform_exclude_nuisance
         #  gets here with non-unique columns
         output = {}
-        inds = []
         for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)):
             output[i] = sgb.transform(wrapper)
-            inds.append(i)
 
         result = self.obj._constructor(output, index=obj.index)
         result.columns = obj.columns