From aa9c9e1a43ad843eeb3cd0366a4c119e5b9d073f Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 28 Dec 2022 13:59:14 -0500
Subject: [PATCH 01/18] REF: groupby Series selection with as_index=False

---
 pandas/core/apply.py                 | 78 +++++++++++++++++---------
 pandas/core/base.py                  | 13 ++---
 pandas/core/groupby/generic.py       | 84 ++++++++++++++++------------
 pandas/core/groupby/groupby.py       | 63 ++++++++++++++-------
 pandas/core/groupby/ops.py           |  2 +-
 pandas/core/series.py                |  2 +
 pandas/tests/groupby/test_groupby.py |  2 +
 7 files changed, 154 insertions(+), 90 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 02a9444dd4f97..d6de62676028d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -2,6 +2,7 @@
 
 import abc
 from collections import defaultdict
+from contextlib import nullcontext
 from functools import partial
 import inspect
 from typing import (
@@ -292,6 +293,10 @@ def agg_list_like(self) -> DataFrame | Series:
         -------
         Result of aggregation.
         """
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -312,26 +317,35 @@ def agg_list_like(self) -> DataFrame | Series:
         results = []
         keys = []
 
-        # degenerate case
-        if selected_obj.ndim == 1:
-            for a in arg:
-                colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
-                new_res = colg.aggregate(a)
-                results.append(new_res)
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        if is_groupby:
+            # When as_index=False, we combine all results using indices
+            # and adjust index after
+            context_manager = com.temp_setattr(obj, "as_index", True)
+        else:
+            context_manager = nullcontext()
+        with context_manager:
+            # degenerate case
+            if selected_obj.ndim == 1:
 
-                # make sure we find a good name
-                name = com.get_callable_name(a) or a
-                keys.append(name)
+                for a in arg:
+                    colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
+                    new_res = colg.aggregate(a)
+                    results.append(new_res)
 
-        # multiples
-        else:
-            indices = []
-            for index, col in enumerate(selected_obj):
-                colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-                new_res = colg.aggregate(arg)
-                results.append(new_res)
-                indices.append(index)
-            keys = selected_obj.columns.take(indices)
+                    # make sure we find a good name
+                    name = com.get_callable_name(a) or a
+                    keys.append(name)
+
+            # multiples
+            else:
+                indices = []
+                for index, col in enumerate(selected_obj):
+                    colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
+                    new_res = colg.aggregate(arg)
+                    results.append(new_res)
+                    indices.append(index)
+                keys = selected_obj.columns.take(indices)
 
         try:
             concatenated = concat(results, keys=keys, axis=1, sort=False)
@@ -366,6 +380,10 @@ def agg_dict_like(self) -> DataFrame | Series:
         Result of aggregation.
         """
         from pandas import Index
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
@@ -384,15 +402,23 @@ def agg_dict_like(self) -> DataFrame | Series:
 
         arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
 
-        if selected_obj.ndim == 1:
-            # key only used for output
-            colg = obj._gotitem(selection, ndim=1)
-            results = {key: colg.agg(how) for key, how in arg.items()}
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        if is_groupby:
+            # When as_index=False, we combine all results using indices
+            # and adjust index after
+            context_manager = com.temp_setattr(obj, "as_index", True)
         else:
-            # key used for column selection and output
-            results = {
-                key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
-            }
+            context_manager = nullcontext()
+        with context_manager:
+            if selected_obj.ndim == 1:
+                # key only used for output
+                colg = obj._gotitem(selection, ndim=1)
+                results = {key: colg.agg(how) for key, how in arg.items()}
+            else:
+                # key used for column selection and output
+                results = {
+                    key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                }
 
         # set the final keys
         keys = list(arg.keys())
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 826583fd26f5d..8559640c1858d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -216,6 +216,9 @@ def _obj_with_exclusions(self):
         if self._selection is not None and isinstance(self.obj, ABCDataFrame):
             return self.obj[self._selection_list]
 
+        if isinstance(self.obj, ABCSeries):
+            return self.obj
+
         if len(self.exclusions) > 0:
             # equivalent to `self.obj.drop(self.exclusions, axis=1)
             #  but this avoids consolidating and making a copy
@@ -235,17 +238,11 @@ def __getitem__(self, key):
                 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
             return self._gotitem(list(key), ndim=2)
 
-        elif not getattr(self, "as_index", False):
-            if key not in self.obj.columns:
-                raise KeyError(f"Column not found: {key}")
-            return self._gotitem(key, ndim=2)
-
         else:
             if key not in self.obj:
                 raise KeyError(f"Column not found: {key}")
-            subset = self.obj[key]
-            ndim = subset.ndim
-            return self._gotitem(key, ndim=ndim, subset=subset)
+            ndim = self.obj[key].ndim
+            return self._gotitem(key, ndim=ndim)
 
     def _gotitem(self, key, ndim: int, subset=None):
         """
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 905c1193713cc..09648e0d3e040 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -248,7 +248,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
             index = self.grouper.result_index
-            return self.obj._constructor(result.ravel(), index=index, name=data.name)
+            result = self.obj._constructor(result.ravel(), index=index, name=data.name)
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
+            return result
 
         relabeling = func is None
         columns = None
@@ -268,6 +272,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 # columns is not narrowed by mypy from relabeling flag
                 assert columns is not None  # for mypy
                 ret.columns = columns
+            if not self.as_index:
+                ret = self._insert_inaxis_grouper(ret)
+                ret.index = default_index(len(ret))
             return ret
 
         else:
@@ -287,23 +294,24 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
                 # result is a dict whose keys are the elements of result_index
                 index = self.grouper.result_index
-                return Series(result, index=index)
+                result = Series(result, index=index)
+                if not self.as_index:
+                    result = self._insert_inaxis_grouper(result)
+                    result.index = default_index(len(result))
+                return result
 
     agg = aggregate
 
     def _aggregate_multiple_funcs(self, arg) -> DataFrame:
         if isinstance(arg, dict):
-
-            # show the deprecation, but only if we
-            # have not shown a higher level one
-            # GH 15931
-            raise SpecificationError("nested renamer is not supported")
-
-        if any(isinstance(x, (tuple, list)) for x in arg):
+            if self.as_index:
+                # GH 15931
+                raise SpecificationError("nested renamer is not supported")
+            else:
+                # GH#50684 - This accidentally worked in 1.x
+                arg = list(arg.items())
+        elif any(isinstance(x, (tuple, list)) for x in arg):
             arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]
-
-            # indicated column order
-            columns = next(zip(*arg))
         else:
             # list of functions / function names
             columns = []
@@ -313,10 +321,13 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame:
             arg = zip(columns, arg)
 
         results: dict[base.OutputKey, DataFrame | Series] = {}
-        for idx, (name, func) in enumerate(arg):
+        with com.temp_setattr(self, "as_index", True):
+            # Combine results using the index, need to adjust index after
+            # if as_index=False (GH#50724)
+            for idx, (name, func) in enumerate(arg):
 
-            key = base.OutputKey(label=name, position=idx)
-            results[key] = self.aggregate(func)
+                key = base.OutputKey(label=name, position=idx)
+                results[key] = self.aggregate(func)
 
         if any(isinstance(x, DataFrame) for x in results.values()):
             from pandas import concat
@@ -396,12 +407,18 @@ def _wrap_applied_output(
             )
             if isinstance(result, Series):
                 result.name = self.obj.name
+            if not self.as_index and not_indexed_same:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
             return result
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
                 data=values, index=self.grouper.result_index, name=self.obj.name
             )
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
             return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
@@ -630,6 +647,9 @@ def nunique(self, dropna: bool = True) -> Series:
                 res[ids[idx]] = out
 
         result = self.obj._constructor(res, index=ri, name=self.obj.name)
+        if not self.as_index:
+            result = self._insert_inaxis_grouper(result)
+            result.index = default_index(len(result))
         return self._reindex_output(result, fill_value=0)
 
     @doc(Series.describe)
@@ -643,12 +663,11 @@ def value_counts(
         ascending: bool = False,
         bins=None,
         dropna: bool = True,
-    ) -> Series:
+    ) -> Series | DataFrame:
         if bins is None:
             result = self._value_counts(
                 normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
             )
-            assert isinstance(result, Series)
             return result
 
         from pandas.core.reshape.merge import get_join_indexers
@@ -786,7 +805,11 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
 
         if is_integer_dtype(out.dtype):
             out = ensure_int64(out)
-        return self.obj._constructor(out, index=mi, name=self.obj.name)
+        result = self.obj._constructor(out, index=mi, name=self.obj.name)
+        if not self.as_index:
+            result.name = "proportion" if normalize else "count"
+            result = result.reset_index()
+        return result
 
     def fillna(
         self,
@@ -1274,7 +1297,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                         result.columns = result.columns.droplevel(-1)
 
         if not self.as_index:
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result.index = default_index(len(result))
 
         return result
@@ -1386,7 +1409,7 @@ def _wrap_applied_output(
                 return self.obj._constructor_sliced(values, index=key_index)
             else:
                 result = self.obj._constructor(values, columns=[self._selection])
-                self._insert_inaxis_grouper_inplace(result)
+                result = self._insert_inaxis_grouper(result)
                 return result
         else:
             # values are Series
@@ -1443,7 +1466,7 @@ def _wrap_applied_output_series(
         result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
         if not self.as_index:
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
 
         return self._reindex_output(result)
 
@@ -1774,7 +1797,9 @@ def _gotitem(self, key, ndim: int, subset=None):
                 subset,
                 level=self.level,
                 grouper=self.grouper,
+                exclusions=self.exclusions,
                 selection=key,
+                as_index=self.as_index,
                 sort=self.sort,
                 group_keys=self.group_keys,
                 observed=self.observed,
@@ -1790,19 +1815,6 @@ def _get_data_to_aggregate(self) -> Manager2D:
         else:
             return obj._mgr
 
-    def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None:
-        # zip in reverse so we can always insert at loc 0
-        columns = result.columns
-        for name, lev, in_axis in zip(
-            reversed(self.grouper.names),
-            reversed(self.grouper.get_group_levels()),
-            reversed([grp.in_axis for grp in self.grouper.groupings]),
-        ):
-            # GH #28549
-            # When using .apply(-), name will be in columns already
-            if in_axis and name not in columns:
-                result.insert(0, name, lev)
-
     def _indexed_output_to_ndframe(
         self, output: Mapping[base.OutputKey, ArrayLike]
     ) -> DataFrame:
@@ -1825,7 +1837,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             mgr.set_axis(1, index)
             result = self.obj._constructor(mgr)
 
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
@@ -1918,7 +1930,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         if not self.as_index:
             results.index = default_index(len(results))
-            self._insert_inaxis_grouper_inplace(results)
+            results = self._insert_inaxis_grouper(results)
 
         return results
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 431b23023b094..a7e3b4215625b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -123,6 +123,7 @@ class providing the base-class of operations.
     Index,
     MultiIndex,
     RangeIndex,
+    default_index,
 )
 from pandas.core.internals.blocks import ensure_block_shape
 from pandas.core.series import Series
@@ -910,8 +911,6 @@ def __init__(
         self.level = level
 
         if not as_index:
-            if not isinstance(obj, DataFrame):
-                raise TypeError("as_index=False only valid with DataFrame")
             if axis != 0:
                 raise ValueError("as_index=False only valid for axis=0")
 
@@ -1157,6 +1156,24 @@ def _set_result_index_ordered(
 
         return result
 
+    def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
+        if isinstance(result, Series):
+            result = result.to_frame()
+
+        # zip in reverse so we can always insert at loc 0
+        columns = result.columns
+        for name, lev, in_axis in zip(
+            reversed(self.grouper.names),
+            reversed(self.grouper.get_group_levels()),
+            reversed([grp.in_axis for grp in self.grouper.groupings]),
+        ):
+            # GH #28549
+            # When using .apply(-), name will be in columns already
+            if in_axis and name not in columns:
+                result.insert(0, name, lev)
+
+        return result
+
     def _indexed_output_to_ndframe(
         self, result: Mapping[base.OutputKey, ArrayLike]
     ) -> Series | DataFrame:
@@ -1193,7 +1210,7 @@ def _wrap_aggregated_output(
         if not self.as_index:
             # `not self.as_index` is only relevant for DataFrameGroupBy,
             #   enforced in __init__
-            self._insert_inaxis_grouper_inplace(result)
+            result = self._insert_inaxis_grouper(result)
             result = result._consolidate()
             index = Index(range(self.grouper.ngroups))
 
@@ -1613,7 +1630,10 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
         res = self._wrap_agged_manager(new_mgr)
         if is_ser:
-            res.index = self.grouper.result_index
+            if self.as_index:
+                res.index = self.grouper.result_index
+            else:
+                res = self._insert_inaxis_grouper(res)
             return self._reindex_output(res)
         else:
             return res
@@ -1887,7 +1907,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
             result = self._wrap_agged_manager(new_mgr)
 
         if result.ndim == 1:
-            result.index = self.grouper.result_index
+            if self.as_index:
+                result.index = self.grouper.result_index
+            else:
+                result = self._insert_inaxis_grouper(result)
 
         return self._reindex_output(result, fill_value=0)
 
@@ -2622,31 +2645,33 @@ def describe(
         exclude=None,
     ) -> NDFrameT:
         with self._group_selection_context():
-            if len(self._selected_obj) == 0:
-                described = self._selected_obj.describe(
+            selected_obj = self._selected_obj
+            if len(selected_obj) == 0:
+                described = selected_obj.describe(
                     percentiles=percentiles, include=include, exclude=exclude
                 )
-                if self._selected_obj.ndim == 1:
+                if selected_obj.ndim == 1:
                     result = described
                 else:
                     result = described.unstack()
                 return result.to_frame().T.iloc[:0]
 
-            result = self._python_apply_general(
-                lambda x: x.describe(
-                    percentiles=percentiles, include=include, exclude=exclude
-                ),
-                self._selected_obj,
-                not_indexed_same=True,
-            )
+            with com.temp_setattr(self, "as_index", True):
+                result = self._python_apply_general(
+                    lambda x: x.describe(
+                        percentiles=percentiles, include=include, exclude=exclude
+                    ),
+                    selected_obj,
+                    not_indexed_same=True,
+                )
             if self.axis == 1:
                 return result.T
 
             # GH#49256 - properly handle the grouping column(s)
-            if self._selected_obj.ndim != 1 or self.as_index:
-                result = result.unstack()
-                if not self.as_index:
-                    self._insert_inaxis_grouper_inplace(result)
+            result = result.unstack()
+            if not self.as_index:
+                result = self._insert_inaxis_grouper(result)
+                result.index = default_index(len(result))
 
             return result
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ea902800cf7e0..f88236b2464c1 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -946,7 +946,7 @@ def result_index(self) -> Index:
 
     @final
     def get_group_levels(self) -> list[ArrayLike]:
-        # Note: only called from _insert_inaxis_grouper_inplace, which
+        # Note: only called from _insert_inaxis_grouper, which
         #  is only called for BaseGrouper, never for BinGrouper
         if len(self.groupings) == 1:
             return [self.groupings[0].group_arraylike]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6b82d48f82ce7..ea6725fde5908 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1977,6 +1977,8 @@ def groupby(
 
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
+        if not as_index:
+            raise TypeError("as_index=False only valid with DataFrame")
         axis = self._get_axis_number(axis)
 
         return SeriesGroupBy(
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 3baf2d86010f7..c3ce3a1cc84c7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -652,6 +652,8 @@ def test_groupby_as_index_select_column_sum_empty_df():
     left = df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False)
 
     expected = DataFrame(columns=df.columns[:2], index=range(0))
+    # GH#?? - Columns after selection shouldn't retain names
+    expected.columns.names = [None]
     tm.assert_frame_equal(left, expected)
 
 

From 7d00d07bf36468e97a1da910362885ccfb42710b Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sat, 14 Jan 2023 10:46:26 -0500
Subject: [PATCH 02/18] GH#

---
 pandas/tests/groupby/test_groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c3ce3a1cc84c7..9b293f0f1669c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -652,7 +652,7 @@ def test_groupby_as_index_select_column_sum_empty_df():
     left = df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False)
 
     expected = DataFrame(columns=df.columns[:2], index=range(0))
-    # GH#?? - Columns after selection shouldn't retain names
+    # GH#50744 - Columns after selection shouldn't retain names
     expected.columns.names = [None]
     tm.assert_frame_equal(left, expected)
 

From 41399ad544fbcf3ab281f9264b34b62ecd74141a Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 16 Jan 2023 17:08:22 -0500
Subject: [PATCH 03/18] type-hinting fixes

---
 pandas/core/apply.py           | 3 +++
 pandas/core/groupby/generic.py | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index d6de62676028d..c28da1bc758cd 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -9,6 +9,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    ContextManager,
     DefaultDict,
     Dict,
     Hashable,
@@ -318,6 +319,7 @@ def agg_list_like(self) -> DataFrame | Series:
         keys = []
 
         is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        context_manager: ContextManager
         if is_groupby:
             # When as_index=False, we combine all results using indices
             # and adjust index after
@@ -403,6 +405,7 @@ def agg_dict_like(self) -> DataFrame | Series:
         arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
 
         is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
+        context_manager: ContextManager
         if is_groupby:
             # When as_index=False, we combine all results using indices
             # and adjust index after
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 09648e0d3e040..2340c36d14301 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -594,7 +594,7 @@ def true_and_notna(x) -> bool:
         filtered = self._apply_filter(indices, dropna)
         return filtered
 
-    def nunique(self, dropna: bool = True) -> Series:
+    def nunique(self, dropna: bool = True) -> Series | DataFrame:
         """
         Return number of unique elements in the group.
 
@@ -646,7 +646,9 @@ def nunique(self, dropna: bool = True) -> Series:
                 # GH#21334s
                 res[ids[idx]] = out
 
-        result = self.obj._constructor(res, index=ri, name=self.obj.name)
+        result: Series | DataFrame = self.obj._constructor(
+            res, index=ri, name=self.obj.name
+        )
         if not self.as_index:
             result = self._insert_inaxis_grouper(result)
             result.index = default_index(len(result))

From c26957d49b4b64eaad1201fc8678e38be390d859 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 16 Jan 2023 23:33:29 -0500
Subject: [PATCH 04/18] WIP

---
 pandas/core/groupby/groupby.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a7e3b4215625b..7ea2139e4ba50 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -939,6 +939,19 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
+        with self._group_selection_context():
+            so = self._selected_obj
+            # if self.ndim == 2 and so.ndim == 1:
+            #     so = so.to_frame()
+            owe = self._obj_with_exclusions
+            import pandas._testing as tm
+            print('---')
+            print(owe.head())
+            print('---')
+            print(so.head())
+            print('---')
+            tm.assert_equal(owe, so)
+
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
             return object.__getattribute__(self, attr)

From 1860c4dc38fe447895f8a36b1de657e043784d84 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 17 Jan 2023 19:53:58 -0500
Subject: [PATCH 05/18] WIP

---
 pandas/core/groupby/groupby.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7ea2139e4ba50..bd0c92df7c2d6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -726,7 +726,7 @@ def _selected_obj(self):
 
         if self._selection is None or isinstance(self.obj, Series):
             if self._group_selection is not None:
-                return self.obj[self._group_selection]
+                return self.obj._take(self._group_selection, axis=1, convert_indices=False)
             return self.obj
         else:
             return self.obj[self._selection]
@@ -939,18 +939,18 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
-        with self._group_selection_context():
-            so = self._selected_obj
-            # if self.ndim == 2 and so.ndim == 1:
-            #     so = so.to_frame()
-            owe = self._obj_with_exclusions
-            import pandas._testing as tm
-            print('---')
-            print(owe.head())
-            print('---')
-            print(so.head())
-            print('---')
-            tm.assert_equal(owe, so)
+        # with self._group_selection_context():
+        #     so = self._selected_obj
+        #     # if self.ndim == 2 and so.ndim == 1:
+        #     #     so = so.to_frame()
+        #     owe = self._obj_with_exclusions
+        #     import pandas._testing as tm
+        #     print('---')
+        #     print(owe.head())
+        #     print('---')
+        #     print(so.head())
+        #     print('---')
+        #     tm.assert_equal(owe, so)
 
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
@@ -1037,6 +1037,7 @@ def _set_group_selection(self) -> None:
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
             self._group_selection = ax.difference(Index(groupers), sort=False).tolist()
+            self._group_selection = [idx for idx, label in enumerate(ax) if label not in groupers]
             self._reset_cache("_selected_obj")
 
     @final

From e42e222f6c8f70c567dcae50ec91438228808621 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 18 Jan 2023 16:33:44 -0500
Subject: [PATCH 06/18] WIP

---
 pandas/tests/groupby/test_function.py | 21 +++++++++++++++++++++
 pandas/tests/groupby/test_groupby.py  | 11 +++++++++++
 2 files changed, 32 insertions(+)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1e16e353cc1a4..8611b928b5a40 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1254,6 +1254,27 @@ def test_describe_with_duplicate_output_column_names(as_index, keys):
     tm.assert_frame_equal(result, expected)
 
 
+def test_describe_duplicate_columns():
+    # GH#50806
+    df = DataFrame([[0, 1, 2, 3]])
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    result = gb.describe(percentiles=[])
+
+    columns = ["count", "mean", "std", "min", "50%", "max"]
+    frames = [
+        DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
+        for val in (0.0, 2.0, 3.0)
+    ]
+    expected = pd.concat(frames, axis=1)
+    expected.columns = MultiIndex(
+        levels=[[0, 2], columns],
+        codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
+    )
+    expected.index.names = [1]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index a1c1930c2e11b..ded764ad7a613 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2867,3 +2867,14 @@ def test_groupby_method_drop_na(method):
     else:
         expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4])
     tm.assert_frame_equal(result, expected)
+
+
+def test_selected_obj_duplicate_columns():
+    # GH#50806
+    df = DataFrame([[0, 1, 2, 3]])
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    with gb._group_selection_context():
+        result = gb._selected_obj
+    expected = df.take([0, 2, 3], axis=1)
+    tm.assert_frame_equal(result, expected)

From 0bdf009cecb6b52b406c0482cfa969a8548a2523 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 28 Dec 2022 13:59:14 -0500
Subject: [PATCH 07/18] BUG: groupby.describe on a frame with duplicate column
 names

---
 doc/source/whatsnew/v2.0.0.rst        |  1 +
 pandas/core/groupby/groupby.py        |  7 +++-
 pandas/tests/groupby/test_function.py | 47 +++++++++++++++++++++++++++
 pandas/tests/groupby/test_groupby.py  | 11 +++++++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 614832c5acd1b..715abb7de4eab 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1063,6 +1063,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
 - Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
 - Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
+- Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`)
 -
 
 Reshaping
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c15948ce877a8..52bf337a86c92 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -726,7 +726,9 @@ def _selected_obj(self):
 
         if self._selection is None or isinstance(self.obj, Series):
             if self._group_selection is not None:
-                return self.obj[self._group_selection]
+                return self.obj._take(
+                    self._group_selection, axis=1, convert_indices=False
+                )
             return self.obj
         else:
             return self.obj[self._selection]
@@ -1024,6 +1026,9 @@ def _set_group_selection(self) -> None:
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
             self._group_selection = ax.difference(Index(groupers), sort=False).tolist()
+            self._group_selection = [
+                idx for idx, label in enumerate(ax) if label not in groupers
+            ]
             self._reset_cache("_selected_obj")
 
     @final
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1e16e353cc1a4..c077fb1d257a5 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1254,6 +1254,27 @@ def test_describe_with_duplicate_output_column_names(as_index, keys):
     tm.assert_frame_equal(result, expected)
 
 
+def test_describe_duplicate_columns():
+    # GH#50806
+    df = DataFrame([[0, 1, 2, 3]])
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    result = gb.describe(percentiles=[])
+
+    columns = ["count", "mean", "std", "min", "50%", "max"]
+    frames = [
+        DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
+        for val in (0.0, 2.0, 3.0)
+    ]
+    expected = pd.concat(frames, axis=1)
+    expected.columns = MultiIndex(
+        levels=[[0, 2], columns],
+        codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
+    )
+    expected.index.names = [1]
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = DataFrame(
@@ -1594,3 +1615,29 @@ def test_multiindex_group_all_columns_when_empty(groupby_func):
     result = method(*args).index
     expected = df.index
     tm.assert_index_equal(result, expected)
+
+
+def test_duplicate_columns(request, groupby_func, as_index):
+    # GH#50806
+    if groupby_func == "corrwith":
+        msg = "GH#50845 - corrwith fails when there are duplicate columns"
+        request.node.add_marker(pytest.mark.xfail(reason=msg))
+    df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
+    args = get_groupby_method_args(groupby_func, df)
+    gb = df.groupby("a", as_index=as_index)
+    result = getattr(gb, groupby_func)(*args)
+
+    if groupby_func in ("size", "ngroup", "cumcount"):
+        expected = getattr(
+            df.take([0, 1], axis=1).groupby("a", as_index=as_index), groupby_func
+        )(*args)
+        tm.assert_equal(result, expected)
+    else:
+        expected_df = df.copy()
+        expected_df.columns = ["a", "b", "c"]
+        expected_args = get_groupby_method_args(groupby_func, expected_df)
+        expected = getattr(expected_df.groupby("a", as_index=as_index), groupby_func)(
+            *expected_args
+        )
+        expected = expected.rename(columns={"c": "b"})
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index a1c1930c2e11b..ded764ad7a613 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2867,3 +2867,14 @@ def test_groupby_method_drop_na(method):
     else:
         expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4])
     tm.assert_frame_equal(result, expected)
+
+
+def test_selected_obj_duplicate_columns():
+    # GH#50806
+    df = DataFrame([[0, 1, 2, 3]])
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    with gb._group_selection_context():
+        result = gb._selected_obj
+    expected = df.take([0, 2, 3], axis=1)
+    tm.assert_frame_equal(result, expected)

From 185e4f8e6006a6cc9404fd608a991c48cfc1f4d1 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 18 Jan 2023 17:31:40 -0500
Subject: [PATCH 08/18] cleanup

---
 pandas/core/groupby/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 52bf337a86c92..37e75a984c92c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1025,7 +1025,6 @@ def _set_group_selection(self) -> None:
         if len(groupers):
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
-            self._group_selection = ax.difference(Index(groupers), sort=False).tolist()
             self._group_selection = [
                 idx for idx, label in enumerate(ax) if label not in groupers
             ]

From d2b965ff426b4d467e70d815618aaf0c2e9c3ac7 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 19 Jan 2023 16:22:33 -0500
Subject: [PATCH 09/18] test fixup

---
 pandas/tests/groupby/test_groupby_dropna.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 5418a2a60dc80..3cee8baeb6e5b 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -544,9 +544,12 @@ def test_categorical_reducers(
 
     gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
     expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
-    expected["x"] = expected["x"].replace(4, None)
+    # Workaround since we can't use replace (GH#50872)
+    mask = expected["x"] == 4
+    expected["x"] = expected["x"].mask(mask, None).cat.remove_categories([4])
     if index_kind == "multi":
-        expected["x2"] = expected["x2"].replace(4, None)
+        mask = expected["x2"] == 4
+        expected["x2"] = expected["x2"].mask(mask, None).cat.remove_categories([4])
     if as_index:
         if index_kind == "multi":
             expected = expected.set_index(["x", "x2"])
@@ -578,6 +581,8 @@ def test_categorical_reducers(
     result = getattr(gb_keepna, reduction_func)(*args)
 
     # size will return a Series, others are DataFrame
+    print(result.index.dtype)
+    print(expected.index.dtype)
     tm.assert_equal(result, expected)
 
 

From 932e3c87b7c50906d52511c0fb01165031f56560 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 19 Jan 2023 16:26:21 -0500
Subject: [PATCH 10/18] Fix type-hint for _group_selection

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 37e75a984c92c..37041f515c0d8 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -610,7 +610,7 @@ def f(self):
 
 
 class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
-    _group_selection: IndexLabel | None = None
+    _group_selection: list[int] | None = None
     _hidden_attrs = PandasObject._hidden_attrs | {
         "as_index",
         "axis",

From eeea6fcc41691741103d8820c8d2c80c8712be39 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 19 Jan 2023 22:26:01 -0500
Subject: [PATCH 11/18] Merge branch 'groupby_select_obj_dup_cols' of
 https://github.com/rhshadrach/pandas into groupby_select_obj_dup_cols

# Conflicts:
#	pandas/core/groupby/groupby.py
---
 pandas/core/groupby/groupby.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b7d52dd207019..37041f515c0d8 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -941,19 +941,6 @@ def __init__(
         self.grouper = grouper
         self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 
-        # with self._group_selection_context():
-        #     so = self._selected_obj
-        #     # if self.ndim == 2 and so.ndim == 1:
-        #     #     so = so.to_frame()
-        #     owe = self._obj_with_exclusions
-        #     import pandas._testing as tm
-        #     print('---')
-        #     print(owe.head())
-        #     print('---')
-        #     print(so.head())
-        #     print('---')
-        #     tm.assert_equal(owe, so)
-
     def __getattr__(self, attr: str):
         if attr in self._internal_names_set:
             return object.__getattribute__(self, attr)

From 83f12b77601f956e16efce8305947127d0f4c593 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 19 Jan 2023 23:06:20 -0500
Subject: [PATCH 12/18] Speedup

---
 pandas/core/groupby/groupby.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 37041f515c0d8..4ae205ba2d85b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -610,7 +610,7 @@ def f(self):
 
 
 class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
-    _group_selection: list[int] | None = None
+    _group_selection: np.ndarray | None = None
     _hidden_attrs = PandasObject._hidden_attrs | {
         "as_index",
         "axis",
@@ -1025,9 +1025,15 @@ def _set_group_selection(self) -> None:
         if len(groupers):
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
-            self._group_selection = [
-                idx for idx, label in enumerate(ax) if label not in groupers
-            ]
+            if len(ax) < 2000:
+                # Determined experimentally, after 2000 this is slower than
+                # the NumPy version
+                self._group_selection = np.array(
+                    [idx for idx, label in enumerate(ax) if label not in groupers]
+                )
+            else:
+                indexer = ax.get_indexer_for(list(groupers))
+                self._group_selection = np.delete(np.arange(len(ax)), indexer)
             self._reset_cache("_selected_obj")
 
     @final

From c37a1ababc94d90aacf4cddc7bd05f4fc830dad8 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 20 Jan 2023 16:46:07 -0500
Subject: [PATCH 13/18] refinement

---
 pandas/core/groupby/groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4ae205ba2d85b..05aa0095247a0 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1025,9 +1025,8 @@ def _set_group_selection(self) -> None:
         if len(groupers):
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
-            if len(ax) < 2000:
-                # Determined experimentally, after 2000 this is slower than
-                # the NumPy version
+            if len(ax) < 1000:
+                # Determined experimentally, larger is slower than the NumPy version
                 self._group_selection = np.array(
                     [idx for idx, label in enumerate(ax) if label not in groupers]
                 )

From 4dafe5a3258bbe5a63f8e02d7b8e53946d166a6b Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 25 Jan 2023 16:22:44 -0500
Subject: [PATCH 14/18] cleanup, faster implementation

---
 pandas/core/groupby/groupby.py              | 15 ++++-----------
 pandas/tests/groupby/test_groupby_dropna.py |  9 ++-------
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 0717fc1cc7765..c6855944af2b5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1020,19 +1020,12 @@ def _set_group_selection(self) -> None:
         ):
             return
 
-        groupers = self.exclusions
-
-        if len(groupers):
+        exclusions = self.exclusions
+        if len(exclusions):
             # GH12839 clear selected obj cache when group selection changes
             ax = self.obj._info_axis
-            if len(ax) < 1000:
-                # Determined experimentally, larger is slower than the NumPy version
-                self._group_selection = np.array(
-                    [idx for idx, label in enumerate(ax) if label not in groupers]
-                )
-            else:
-                indexer = ax.get_indexer_for(list(groupers))
-                self._group_selection = np.delete(np.arange(len(ax)), indexer)
+            # ilocs of ax that are not in the exclusions
+            self._group_selection = np.arange(len(ax))[~ax.isin(exclusions)]
             self._reset_cache("_selected_obj")
 
     @final
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 3cee8baeb6e5b..5418a2a60dc80 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -544,12 +544,9 @@ def test_categorical_reducers(
 
     gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
     expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
-    # Workaround since we can't use replace (GH#50872)
-    mask = expected["x"] == 4
-    expected["x"] = expected["x"].mask(mask, None).cat.remove_categories([4])
+    expected["x"] = expected["x"].replace(4, None)
     if index_kind == "multi":
-        mask = expected["x2"] == 4
-        expected["x2"] = expected["x2"].mask(mask, None).cat.remove_categories([4])
+        expected["x2"] = expected["x2"].replace(4, None)
     if as_index:
         if index_kind == "multi":
             expected = expected.set_index(["x", "x2"])
@@ -581,8 +578,6 @@ def test_categorical_reducers(
     result = getattr(gb_keepna, reduction_func)(*args)
 
     # size will return a Series, others are DataFrame
-    print(result.index.dtype)
-    print(expected.index.dtype)
     tm.assert_equal(result, expected)
 
 

From d5df78cb9ebc61dc2892b90ca1b0193b5d82654b Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 30 Jan 2023 22:51:28 -0500
Subject: [PATCH 15/18] Make group_selection a Boolean flag

---
 pandas/core/groupby/groupby.py | 38 +++++++---------------------------
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4ae205ba2d85b..84211888124a5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -610,7 +610,7 @@ def f(self):
 
 
 class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
-    _group_selection: np.ndarray | None = None
+    _group_selection: bool = False
     _hidden_attrs = PandasObject._hidden_attrs | {
         "as_index",
         "axis",
@@ -725,10 +725,8 @@ def _selected_obj(self):
         # Note: _selected_obj is always just `self.obj` for SeriesGroupBy
 
         if self._selection is None or isinstance(self.obj, Series):
-            if self._group_selection is not None:
-                return self.obj._take(
-                    self._group_selection, axis=1, convert_indices=False
-                )
+            if self._group_selection:
+                return self._obj_with_exclusions
             return self.obj
         else:
             return self.obj[self._selection]
@@ -1011,30 +1009,10 @@ def _set_group_selection(self) -> None:
 
         NOTE: this should be paired with a call to _reset_group_selection
         """
-        # This is a no-op for SeriesGroupBy
-        grp = self.grouper
-        if (
-            grp.groupings is None
-            or self.obj.ndim == 1
-            or self._group_selection is not None
-        ):
+        if self.grouper.groupings is None or self.obj.ndim == 1:
             return
-
-        groupers = self.exclusions
-
-        if len(groupers):
-            # GH12839 clear selected obj cache when group selection changes
-            ax = self.obj._info_axis
-            if len(ax) < 2000:
-                # Determined experimentally, after 2000 this is slower than
-                # the NumPy version
-                self._group_selection = np.array(
-                    [idx for idx, label in enumerate(ax) if label not in groupers]
-                )
-            else:
-                indexer = ax.get_indexer_for(list(groupers))
-                self._group_selection = np.delete(np.arange(len(ax)), indexer)
-            self._reset_cache("_selected_obj")
+        self._group_selection = True
+        self._reset_cache("_selected_obj")
 
     @final
     def _reset_group_selection(self) -> None:
@@ -1044,9 +1022,9 @@ def _reset_group_selection(self) -> None:
         Used for methods needing to return info on each group regardless of
         whether a group selection was previously set.
         """
-        if self._group_selection is not None:
+        if self._group_selection:
             # GH12839 clear cached selection too when changing group selection
-            self._group_selection = None
+            self._group_selection = False
             self._reset_cache("_selected_obj")
 
     @contextmanager

From 8d6df54006c60ead171be06745116c15076cf5f3 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 30 Jan 2023 22:55:17 -0500
Subject: [PATCH 16/18] Avoid resetting cache

---
 pandas/core/groupby/groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ae5175cc19e39..f1f2a16d47dd4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1009,7 +1009,8 @@ def _set_group_selection(self) -> None:
 
         NOTE: this should be paired with a call to _reset_group_selection
         """
-        if self.grouper.groupings is None or self.obj.ndim == 1:
+        grp = self.grouper
+        if grp.groupings is None or self.obj.ndim == 1 or self._group_selection:
             return
         self._group_selection = True
         self._reset_cache("_selected_obj")

From 62540af2c466a28e013690692d370bc8ab1d3c73 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Tue, 31 Jan 2023 20:19:10 -0500
Subject: [PATCH 17/18] Improve test

---
 pandas/tests/groupby/test_function.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index c077fb1d257a5..d00dde7179df7 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1627,17 +1627,10 @@ def test_duplicate_columns(request, groupby_func, as_index):
     gb = df.groupby("a", as_index=as_index)
     result = getattr(gb, groupby_func)(*args)
 
-    if groupby_func in ("size", "ngroup", "cumcount"):
-        expected = getattr(
-            df.take([0, 1], axis=1).groupby("a", as_index=as_index), groupby_func
-        )(*args)
-        tm.assert_equal(result, expected)
-    else:
-        expected_df = df.copy()
-        expected_df.columns = ["a", "b", "c"]
-        expected_args = get_groupby_method_args(groupby_func, expected_df)
-        expected = getattr(expected_df.groupby("a", as_index=as_index), groupby_func)(
-            *expected_args
-        )
+    expected_df = df.set_axis(["a", "b", "c"], axis=1)
+    expected_args = get_groupby_method_args(groupby_func, expected_df)
+    expected_gb = expected_df.groupby("a", as_index=as_index)
+    expected = getattr(expected_gb, groupby_func)(*expected_args)
+    if groupby_func not in ("size", "ngroup", "cumcount"):
         expected = expected.rename(columns={"c": "b"})
-        tm.assert_frame_equal(result, expected)
+    tm.assert_equal(result, expected)

From 359d7fffb7ae4ca5eebecc0fa75014c00723ec5d Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 2 Feb 2023 22:57:11 -0500
Subject: [PATCH 18/18] Rework test

---
 pandas/tests/groupby/test_groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6771e5566b2f9..d7b015fa7104a 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2830,12 +2830,11 @@ def test_groupby_reduce_period():
     tm.assert_series_equal(res, expected)
 
 
-def test_selected_obj_duplicate_columns():
+def test_obj_with_exclusions_duplicate_columns():
     # GH#50806
     df = DataFrame([[0, 1, 2, 3]])
     df.columns = [0, 1, 2, 0]
     gb = df.groupby(df[1])
-    with gb._group_selection_context():
-        result = gb._selected_obj
+    result = gb._obj_with_exclusions
     expected = df.take([0, 2, 3], axis=1)
     tm.assert_frame_equal(result, expected)