From 1b21a6dd734669409886ab3b795c851ba62b4af8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 9 Sep 2021 10:29:17 -0700
Subject: [PATCH 1/2] REF: dont pass keys through wrap_applied_output

---
 pandas/core/groupby/generic.py | 23 +++++++++++------------
 pandas/core/groupby/groupby.py | 14 +++++++-------
 pandas/core/groupby/ops.py     | 13 +++++++++----
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7af32d70c00bc..4380756be65da 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -399,7 +399,6 @@ def _wrap_transformed_output(
     def _wrap_applied_output(
         self,
         data: Series,
-        keys: Index,
         values: list[Any] | None,
         not_indexed_same: bool = False,
     ) -> DataFrame | Series:
@@ -410,8 +409,6 @@ def _wrap_applied_output(
         ----------
         data : Series
             Input data for groupby operation.
-        keys : Index
-            Keys of groups that Series was grouped by.
         values : Optional[List[Any]]
             Applied output for each group.
         not_indexed_same : bool, default False
@@ -421,6 +418,8 @@ def _wrap_applied_output(
         -------
         DataFrame or Series
         """
+        keys = self.grouper.group_keys_seq
+
         if len(keys) == 0:
             # GH #6265
             return self.obj._constructor(
@@ -442,7 +441,7 @@ def _wrap_applied_output(
             res_ser.name = self.obj.name
             return res_ser
         elif isinstance(values[0], (Series, DataFrame)):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(values, not_indexed_same=not_indexed_same)
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
@@ -1130,7 +1129,9 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
         res_df.columns = obj.columns
         return res_df
 
-    def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
+    def _wrap_applied_output(self, data, values, not_indexed_same=False):
+        keys = self.grouper.group_keys_seq
+
         if len(keys) == 0:
             result = self.obj._constructor(
                 index=self.grouper.result_index, columns=data.columns
@@ -1145,7 +1146,7 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
             # GH9684 - All values are None, return an empty frame.
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(values, not_indexed_same=not_indexed_same)
 
         key_index = self.grouper.result_index if self.as_index else None
 
@@ -1173,12 +1174,11 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
         else:
             # values are Series
             return self._wrap_applied_output_series(
-                keys, values, not_indexed_same, first_not_none, key_index
+                values, not_indexed_same, first_not_none, key_index
             )
 
     def _wrap_applied_output_series(
         self,
-        keys,
         values: list[Series],
         not_indexed_same: bool,
         first_not_none,
@@ -1201,6 +1201,7 @@ def _wrap_applied_output_series(
 
             # assign the name to this series
             if singular_series:
+                keys = self.grouper.group_keys_seq
                 values[0].name = keys[0]
 
                 # GH2893
@@ -1209,9 +1210,7 @@ def _wrap_applied_output_series(
                 # if any of the sub-series are not indexed the same
                 # OR we don't have a multi-index and we have only a
                 # single values
-                return self._concat_objects(
-                    keys, values, not_indexed_same=not_indexed_same
-                )
+                return self._concat_objects(values, not_indexed_same=not_indexed_same)
 
             # still a series
             # path added as of GH 5545
@@ -1222,7 +1221,7 @@ def _wrap_applied_output_series(
 
         if not all_indexed_same:
             # GH 8467
-            return self._concat_objects(keys, values, not_indexed_same=True)
+            return self._concat_objects(values, not_indexed_same=True)
 
         # Combine values
         # vstack+constructor is faster than concat and handles MI-columns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b3e5605d4a2d1..ac88da3f0d47e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -998,7 +998,7 @@ def _iterate_slices(self) -> Iterable[Series]:
     # Dispatch/Wrapping
 
     @final
-    def _concat_objects(self, keys, values, not_indexed_same: bool = False):
+    def _concat_objects(self, values, not_indexed_same: bool = False):
         from pandas.core.reshape.concat import concat
 
         def reset_identity(values):
@@ -1035,7 +1035,7 @@ def reset_identity(values):
             if self.as_index:
 
                 # possible MI return case
-                group_keys = keys
+                group_keys = self.grouper.group_keys_seq
                 group_levels = self.grouper.levels
                 group_names = self.grouper.names
 
@@ -1146,7 +1146,7 @@ def _wrap_aggregated_output(
     def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]):
         raise AbstractMethodError(self)
 
-    def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False):
+    def _wrap_applied_output(self, data, values, not_indexed_same: bool = False):
         raise AbstractMethodError(self)
 
     def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
@@ -1182,7 +1182,7 @@ def _group_keys_index(self) -> Index:
         # The index to use for the result of Groupby Aggregations.
         # This _may_ be redundant with self.grouper.result_index, but that
         #  has not been conclusively proven yet.
-        keys = self.grouper._get_group_keys()
+        keys = self.grouper.group_keys_seq
         if self.grouper.nkeys > 1:
             index = MultiIndex.from_tuples(keys, names=self.grouper.names)
         else:
@@ -1223,7 +1223,7 @@ def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
         data and indices into a Numba jitted function.
         """
         starts, ends, sorted_index, sorted_data = self._numba_prep(func, data)
-        group_keys = self.grouper._get_group_keys()
+        group_keys = self.grouper.group_keys_seq
 
         numba_transform_func = numba_.generate_numba_transform_func(
             kwargs, func, engine_kwargs
@@ -1360,13 +1360,13 @@ def _python_apply_general(
         Series or DataFrame
             data after applying f
         """
-        keys, values, mutated = self.grouper.apply(f, data, self.axis)
+        values, mutated = self.grouper.apply(f, data, self.axis)
 
         if not_indexed_same is None:
             not_indexed_same = mutated or self.mutated
 
         return self._wrap_applied_output(
-            data, keys, values, not_indexed_same=not_indexed_same
+            data, values, not_indexed_same=not_indexed_same
         )
 
     @final
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index e35f5331195fa..8090758d9cdc3 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -690,7 +690,7 @@ def get_iterator(
         for each group
         """
         splitter = self._get_splitter(data, axis=axis)
-        keys = self._get_group_keys()
+        keys = self.group_keys_seq
         for key, group in zip(keys, splitter):
             yield key, group.__finalize__(data, method="groupby")
 
@@ -726,10 +726,15 @@ def _get_group_keys(self):
             return get_flattened_list(ids, ngroups, self.levels, self.codes)
 
     @final
-    def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
+    @cache_readonly
+    def group_keys_seq(self):
+        return self._get_group_keys()
+
+    @final
+    def apply(self, f: F, data: FrameOrSeries, axis: int = 0) -> tuple[list, bool]:
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
-        group_keys = self._get_group_keys()
+        group_keys = self.group_keys_seq
         result_values = []
 
         # This calls DataSplitter.__iter__
@@ -745,7 +750,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
                 mutated = True
             result_values.append(res)
 
-        return group_keys, result_values, mutated
+        return result_values, mutated
 
     @cache_readonly
     def indices(self):

From 50349651c52ba2389a971b97708389d72baa76fb Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 9 Sep 2021 11:32:51 -0700
Subject: [PATCH 2/2] remoe _get_group_keys

---
 pandas/core/groupby/ops.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 8090758d9cdc3..d5569fb5f8a96 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -716,7 +716,8 @@ def _get_grouper(self):
         return self.groupings[0].grouping_vector
 
     @final
-    def _get_group_keys(self):
+    @cache_readonly
+    def group_keys_seq(self):
         if len(self.groupings) == 1:
             return self.levels[0]
         else:
@@ -725,11 +726,6 @@ def _get_group_keys(self):
             # provide "flattened" iterator for multi-group setting
             return get_flattened_list(ids, ngroups, self.levels, self.codes)
 
-    @final
-    @cache_readonly
-    def group_keys_seq(self):
-        return self._get_group_keys()
-
     @final
     def apply(self, f: F, data: FrameOrSeries, axis: int = 0) -> tuple[list, bool]:
         mutated = self.mutated