From e84a15dfa19a5130c99825c16f6f2f350ce36b4a Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 15 Jul 2020 00:58:41 +0100
Subject: [PATCH 01/34] GroupBy.apply() calls self._reset_group_selection at
 the start. Errant tests updated

---
 pandas/core/groupby/groupby.py           | 2 ++
 pandas/tests/groupby/test_categorical.py | 4 ++--
 pandas/tests/groupby/test_grouping.py    | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d039b715b3c08..f570d3284e3f5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -825,6 +825,8 @@ def __iter__(self):
         )
     )
     def apply(self, func, *args, **kwargs):
+        
+        self._reset_group_selection()
 
         func = self._is_builtin_func(func)
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 118d928ac02f4..96a5e83459a59 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -129,9 +129,9 @@ def test_basic():
     def f(x):
         return x.drop_duplicates("person_name").iloc[0]
 
+    g = x.groupby(["person_id"], observed=False, as_index=False)
     result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
-    expected.index = Index([1, 2], name="person_id")
     expected["person_name"] = expected["person_name"].astype("object")
     tm.assert_frame_equal(result, expected)
 
@@ -1287,7 +1287,7 @@ def test_get_nonexistent_category():
     # Accessing a Category that is not in the dataframe
     df = pd.DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)})
     with pytest.raises(KeyError, match="'vau'"):
-        df.groupby("var").apply(
+        df.groupby("var", as_index=False).apply(
             lambda rows: pd.DataFrame(
                 {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]}
             )
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index efcd22f9c0c82..61588bd6ce165 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -191,7 +191,7 @@ def test_grouper_creation_bug(self):
         result = g.sum()
         tm.assert_frame_equal(result, expected)
 
-        result = g.apply(lambda x: x.sum())
+        result = g[["B"]].apply(lambda x: x.sum())
         tm.assert_frame_equal(result, expected)
 
         g = df.groupby(pd.Grouper(key="A", axis=0))

From e122809eae5c8d55c593541c537427a943270680 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 15 Jul 2020 15:39:33 +0100
Subject: [PATCH 02/34] gb.apply() now resets group selection so it always
 returns grouping columns as columns. updated tests that relied on previous
 behaviour

---
 pandas/core/groupby/groupby.py                   | 2 +-
 pandas/tests/groupby/aggregate/test_other.py     | 4 ++--
 pandas/tests/groupby/test_function.py            | 2 +-
 pandas/tests/groupby/transform/test_transform.py | 8 +++-----
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 972ece1ab549f..a29502ce1363d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -828,7 +828,7 @@ def __iter__(self):
         )
     )
     def apply(self, func, *args, **kwargs):
-        
+
         self._reset_group_selection()
 
         func = self._is_builtin_func(func)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 264cf40dc6984..3356c2dd9c88b 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -486,13 +486,13 @@ def test_agg_timezone_round_trip():
     assert ts == grouped.first()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 0]
+    assert ts == grouped.apply(lambda x: x.iloc[0]).loc["a", "B"]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 0]
+    assert ts == grouped.apply(lambda x: x.iloc[-1]).loc["a", "B"]
 
 
 def test_sum_uint64_overflow():
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 6f19ec40c2520..35c9bd4c33fdc 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -298,7 +298,7 @@ def test_non_cython_api():
         index=expected_index,
         columns=expected_col,
     )
-    result = g.describe()
+    result = g.describe().drop(columns="A")
     tm.assert_frame_equal(result, expected)
 
     expected = pd.concat(
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index cdaf27e214d80..4caf50b0df1fa 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -728,11 +728,6 @@ def test_cython_transform_frame(op, args, targop):
             # dict(by=['int','string'])]:
 
             gb = df.groupby(**gb_target)
-            # allowlisted methods set the selection before applying
-            # bit a of hack to make sure the cythonized shift
-            # is equivalent to pre 0.17.1 behavior
-            if op == "shift":
-                gb._set_group_selection()
 
             if op != "shift" and "int" not in gb_target:
                 # numeric apply fastpath promotes dtype so have
@@ -743,6 +738,9 @@ def test_cython_transform_frame(op, args, targop):
             else:
                 expected = gb.apply(targop)
 
+            if op == "shift" and type(gb_target.get("by")) is str:
+                expected = expected.drop(columns=gb_target.get("by"))
+
             expected = expected.sort_index(axis=1)
             tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1))
             tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1))

From 27b2694203057c1e3691a667d4337bc363e1744c Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 15 Jul 2020 15:46:41 +0100
Subject: [PATCH 03/34] test uses .drop() instead of selection

---
 pandas/tests/groupby/test_grouping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 61588bd6ce165..9122b8de4c2fa 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -191,7 +191,7 @@ def test_grouper_creation_bug(self):
         result = g.sum()
         tm.assert_frame_equal(result, expected)
 
-        result = g[["B"]].apply(lambda x: x.sum())
+        result = g.apply(lambda x: x.sum()).drop(columns="A")
         tm.assert_frame_equal(result, expected)
 
         g = df.groupby(pd.Grouper(key="A", axis=0))

From 0cca6df39246f06734d820b13b78d36581aec5b4 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 15 Jul 2020 16:44:15 +0100
Subject: [PATCH 04/34] wrote new tests

---
 pandas/tests/groupby/test_apply.py | 40 ++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index aa10f44670361..a6aa9bdd7814d 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1010,3 +1010,43 @@ def test_apply_with_timezones_aware():
     result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy())
 
     tm.assert_frame_equal(result1, result2)
+
+
+@pytest.mark.parametrize(
+    "func", ["sum", "min", "max", "mean", "std", "prod", "cumprod", "cumsum"]
+)
+def test_apply_is_unchanged_when_other_methods_are_clled_first(func):
+    # GH 34656
+    # GH 34271
+    df = DataFrame(
+        {
+            "a": [99, 99, 99, 88, 88, 88],
+            "b": [1, 2, 3, 4, 5, 6],
+            "c": [10, 20, 30, 40, 50, 60],
+        }
+    )
+
+    expected = df.groupby("a").apply(getattr(np, func))
+
+    # Call .apply() without calling any method on the GroupBy beforehand
+    grp = df.groupby("a")
+    result = grp.apply(getattr(np, func))
+    tm.assert_frame_equal(result, expected)
+
+    # Call .apply() after calling .min()  on the GroupBy
+    grp = df.groupby("a")
+    grp.min()
+    result = grp.apply(getattr(np, func))
+    tm.assert_frame_equal(result, expected)
+
+    # Call .apply() after calling 'func' on the GroupBy
+    grp = df.groupby("a")
+    getattr(grp, func)()
+    result = grp.apply(getattr(np, func))
+    tm.assert_frame_equal(result, expected)
+
+    # Call .apply() after directly calling ._set_group_selection() on the GroupBy
+    grp = df.groupby("a")
+    grp._set_group_selection()
+    result = grp.apply(getattr(np, func))
+    tm.assert_frame_equal(result, expected)

From 2786eb5e3b2f0ee46659a4c56e4cd29be873e703 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Thu, 16 Jul 2020 23:51:42 +0100
Subject: [PATCH 05/34] rewrote test

---
 pandas/tests/groupby/test_apply.py | 37 +++++++++++-------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index a6aa9bdd7814d..b9e800c6da84d 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1012,10 +1012,7 @@ def test_apply_with_timezones_aware():
     tm.assert_frame_equal(result1, result2)
 
 
-@pytest.mark.parametrize(
-    "func", ["sum", "min", "max", "mean", "std", "prod", "cumprod", "cumsum"]
-)
-def test_apply_is_unchanged_when_other_methods_are_clled_first(func):
+def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
     # GH 34656
     # GH 34271
     df = DataFrame(
@@ -1026,27 +1023,19 @@ def test_apply_is_unchanged_when_other_methods_are_clled_first(func):
         }
     )
 
-    expected = df.groupby("a").apply(getattr(np, func))
-
-    # Call .apply() without calling any method on the GroupBy beforehand
-    grp = df.groupby("a")
-    result = grp.apply(getattr(np, func))
-    tm.assert_frame_equal(result, expected)
-
-    # Call .apply() after calling .min()  on the GroupBy
-    grp = df.groupby("a")
-    grp.min()
-    result = grp.apply(getattr(np, func))
-    tm.assert_frame_equal(result, expected)
+    expected = pd.DataFrame(
+        {"a": [264, 297], "b": [15, 6], "c": [150, 60],},
+        index=pd.Index([88, 99], name="a"),
+    )
 
-    # Call .apply() after calling 'func' on the GroupBy
-    grp = df.groupby("a")
-    getattr(grp, func)()
-    result = grp.apply(getattr(np, func))
+    # Check output wehn no other methods are called before .apply()
+    grp = df.groupby(by="a")
+    result = grp.apply(sum)
     tm.assert_frame_equal(result, expected)
 
-    # Call .apply() after directly calling ._set_group_selection() on the GroupBy
-    grp = df.groupby("a")
-    grp._set_group_selection()
-    result = grp.apply(getattr(np, func))
+    # Check output when another methods is called before .apply()
+    grp = df.groupby(by="a")
+    args = {"nth": [0], "corrwith": [df]}.get(reduction_func, [])
+    _ = getattr(grp, reduction_func)(*args)
+    result = grp.apply(sum)
     tm.assert_frame_equal(result, expected)

From 33cdf65b60bcdf67d7bb1b4b70e3c42c9ae2d675 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Thu, 16 Jul 2020 23:54:24 +0100
Subject: [PATCH 06/34] whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 814dbe999d5c1..bb11085c26d1f 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1084,6 +1084,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
 - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
+- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance index column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^

From 6a65e2f4eb64e9f2811d4d2742f0724596c7b658 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:03:29 +0100
Subject: [PATCH 07/34] restore if-stat in test_transform

---
 pandas/tests/groupby/transform/test_transform.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4caf50b0df1fa..47e5fe2742075 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -728,6 +728,11 @@ def test_cython_transform_frame(op, args, targop):
             # dict(by=['int','string'])]:
 
             gb = df.groupby(**gb_target)
+            # allowlisted methods set the selection before applying
+            # bit a of hack to make sure the cythonized shift
+            # is equivalent to pre 0.17.1 behavior
+            if op == "shift":
+                gb._set_group_selection()
 
             if op != "shift" and "int" not in gb_target:
                 # numeric apply fastpath promotes dtype so have

From 9948d2f898be43af4e985b4da070b31749147baa Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:06:57 +0100
Subject: [PATCH 08/34] amended test

---
 pandas/tests/groupby/test_categorical.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e0d654dc60e9c..e04f7877a352b 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -118,7 +118,7 @@ def test_basic():
     )
     x["person_name"] = Categorical(x.person_name)
 
-    g = x.groupby(["person_id"], observed=False)
+    g = x.groupby(["person_id"], observed=False, as_index=False)
     result = g.transform(lambda x: x)
     tm.assert_frame_equal(result, x[["person_name"]])
 
@@ -129,7 +129,6 @@ def test_basic():
     def f(x):
         return x.drop_duplicates("person_name").iloc[0]
 
-    g = x.groupby(["person_id"], observed=False, as_index=False)
     result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected["person_name"] = expected["person_name"].astype("object")

From e4a132e88b809859736d9131bbcebf4b17df477c Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:09:40 +0100
Subject: [PATCH 09/34] restored test

---
 pandas/tests/groupby/test_categorical.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e04f7877a352b..52ef4fe935bdb 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -118,7 +118,7 @@ def test_basic():
     )
     x["person_name"] = Categorical(x.person_name)
 
-    g = x.groupby(["person_id"], observed=False, as_index=False)
+    g = x.groupby(["person_id"], observed=False)
     result = g.transform(lambda x: x)
     tm.assert_frame_equal(result, x[["person_name"]])
 
@@ -131,6 +131,7 @@ def f(x):
 
     result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
+    expected.index = Index([1, 2], name="person_id")
     expected["person_name"] = expected["person_name"].astype("object")
     tm.assert_frame_equal(result, expected)
 

From 4170ca6969fa9d766597495cb6de279e7c26ec76 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:11:08 +0100
Subject: [PATCH 10/34] restored test

---
 pandas/tests/groupby/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 52ef4fe935bdb..7e4513da37dc9 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1287,7 +1287,7 @@ def test_get_nonexistent_category():
     # Accessing a Category that is not in the dataframe
     df = pd.DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)})
     with pytest.raises(KeyError, match="'vau'"):
-        df.groupby("var", as_index=False).apply(
+        df.groupby("var").apply(
             lambda rows: pd.DataFrame(
                 {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]}
             )

From 0ab1c8ae77f0a89adc38a334e819994ccd987667 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:14:29 +0100
Subject: [PATCH 11/34] amended test

---
 pandas/tests/groupby/aggregate/test_other.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 3356c2dd9c88b..e8cd6017a117c 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -486,13 +486,13 @@ def test_agg_timezone_round_trip():
     assert ts == grouped.first()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[0]).loc["a", "B"]
+    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[-1]).loc["a", "B"]
+    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
 
 
 def test_sum_uint64_overflow():

From f2a32f405c4a52c28827c516f34139b728541c8e Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:16:31 +0100
Subject: [PATCH 12/34] cleanup

---
 doc/source/whatsnew/v1.1.0.rst     | 2 +-
 pandas/tests/groupby/test_apply.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index bb11085c26d1f..6f727df2cb1c4 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1084,7 +1084,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
 - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
-- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance index column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
+- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index b9e800c6da84d..0e9cb845dcf63 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1033,7 +1033,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
     result = grp.apply(sum)
     tm.assert_frame_equal(result, expected)
 
-    # Check output when another methods is called before .apply()
+    # Check output when another method is called before .apply()
     grp = df.groupby(by="a")
     args = {"nth": [0], "corrwith": [df]}.get(reduction_func, [])
     _ = getattr(grp, reduction_func)(*args)

From f5b674b4580e5cfb3e778659a48c6b2560fd55af Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 00:29:14 +0100
Subject: [PATCH 13/34] trailing comma

---
 pandas/tests/groupby/test_apply.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 0e9cb845dcf63..b68b3d74517bc 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1024,7 +1024,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
     )
 
     expected = pd.DataFrame(
-        {"a": [264, 297], "b": [15, 6], "c": [150, 60],},
+        {"a": [264, 297], "b": [15, 6], "c": [150, 60]},
         index=pd.Index([88, 99], name="a"),
     )
 

From 7028756e5d93adf200822a955fbeb0e3090d57c7 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 09:14:15 +0100
Subject: [PATCH 14/34] fixed test_to_latex

---
 pandas/tests/io/formats/test_to_latex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 509e5bcb33304..c7b2d2de2585d 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -224,7 +224,7 @@ def test_to_latex_multiindex(self):
 
         assert result == expected
 
-        result = df.groupby("a").describe().to_latex()
+        result = df.groupby("a").describe().drop(columns='a').to_latex()
         expected = r"""\begin{tabular}{lrrrrrrrr}
 \toprule
 {} & \multicolumn{8}{l}{c} \\

From 45abe63d7810a49312d60355bbc81d805280b18d Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 10:14:20 +0100
Subject: [PATCH 15/34] added test to ensure .describe() keeps non-nuisance
 groupin columns

---
 pandas/core/groupby/groupby.py        |  9 +++---
 pandas/tests/groupby/test_function.py | 44 +++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a29502ce1363d..c542c1ae6e334 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1625,11 +1625,10 @@ def ohlc(self) -> DataFrame:
 
     @doc(DataFrame.describe)
     def describe(self, **kwargs):
-        with _group_selection_context(self):
-            result = self.apply(lambda x: x.describe(**kwargs))
-            if self.axis == 1:
-                return result.T
-            return result.unstack()
+        result = self.apply(lambda x: x.describe(**kwargs))
+        if self.axis == 1:
+            return result.T
+        return result.unstack()
 
     def resample(self, rule, *args, **kwargs):
         """
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 35c9bd4c33fdc..3a994f9265c8a 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -974,6 +974,50 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("by_col_dtype", [int, float, str])
+def test_describe_results_includes_non_nuisance_columns(by_col_dtype):
+    # GH 34656
+    # GH 34271
+    df = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 3, 3], "b": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+    df = df.astype({"a": by_col_dtype})
+
+    expected = (
+        DataFrame.from_records(
+            [
+                ("a", "count", 3.0, 3.0, 3.0),
+                ("a", "mean", 1.0, 2.0, 3.0),
+                ("a", "std", 0.0, 0.0, 0.0),
+                ("a", "min", 1.0, 2.0, 3.0),
+                ("a", "25%", 1.0, 2.0, 3.0),
+                ("a", "50%", 1.0, 2.0, 3.0),
+                ("a", "75%", 1.0, 2.0, 3.0),
+                ("a", "max", 1.0, 2.0, 3.0),
+                ("b", "count", 3.0, 3.0, 3.0),
+                ("b", "mean", 2.0, 5.0, 8.0),
+                ("b", "std", 1.0, 1.0, 1.0),
+                ("b", "min", 1.0, 4.0, 7.0),
+                ("b", "25%", 1.5, 4.5, 7.5),
+                ("b", "50%", 2.0, 5.0, 8.0),
+                ("b", "75%", 2.5, 5.5, 8.5),
+                ("b", "max", 3.0, 6.0, 9.0),
+            ],
+            columns=["col", "func", 1, 2, 3],
+        )
+        .set_index(["col", "func"])
+        .T
+    )
+    expected.columns.names = [None, None]
+    expected.index = pd.Index(expected.index.astype(by_col_dtype), name="a")
+
+    if by_col_dtype is str:
+        # If the grouping column is a nuisance column (i.e. can't apply the
+        # std() or quantile() to it) then it does not appear in the output
+        expected = expected.drop(columns="a")
+
+    result = df.groupby("a").describe()
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = pd.DataFrame(

From 063f0ea0320313ad3f553410a88aed9a6adc9ce6 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 11:03:01 +0100
Subject: [PATCH 16/34] minimized changes to exsiting tests

---
 pandas/tests/groupby/test_function.py    | 45 ++++++++++++++++++++----
 pandas/tests/groupby/test_grouping.py    |  8 +++--
 pandas/tests/io/formats/test_to_latex.py |  2 +-
 3 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 3a994f9265c8a..61d8ae3b281b8 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -286,19 +286,52 @@ def test_non_cython_api():
 
     # describe
     expected_index = pd.Index([1, 3], name="A")
-    expected_col = pd.MultiIndex(
-        levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]],
-        codes=[[0] * 8, list(range(8))],
+    expected_col = pd.MultiIndex.from_product(
+        [["A", "B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]]
     )
     expected = pd.DataFrame(
         [
-            [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
-            [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            [
+                2.0,
+                1.0,
+                0.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                2.0,
+                np.nan,
+                2.0,
+                2.0,
+                2.0,
+                2.0,
+                2.0,
+            ],
+            [
+                1.0,
+                3.0,
+                np.nan,
+                3.0,
+                3.0,
+                3.0,
+                3.0,
+                3.0,
+                0.0,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
         ],
         index=expected_index,
         columns=expected_col,
     )
-    result = g.describe().drop(columns="A")
+    result = g.describe()
     tm.assert_frame_equal(result, expected)
 
     expected = pd.concat(
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 9122b8de4c2fa..40b4ce46e550b 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -191,13 +191,15 @@ def test_grouper_creation_bug(self):
         result = g.sum()
         tm.assert_frame_equal(result, expected)
 
-        result = g.apply(lambda x: x.sum()).drop(columns="A")
-        tm.assert_frame_equal(result, expected)
-
         g = df.groupby(pd.Grouper(key="A", axis=0))
         result = g.sum()
         tm.assert_frame_equal(result, expected)
 
+        result = g.apply(lambda x: x.sum())
+        expected["A"] = [0, 2, 4]
+        expected = expected.loc[:, ["A", "B"]]
+        tm.assert_frame_equal(result, expected)
+
         # GH14334
         # pd.Grouper(key=...) may be passed in a list
         df = DataFrame(
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index c7b2d2de2585d..053a9b6a9fd38 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -224,7 +224,7 @@ def test_to_latex_multiindex(self):
 
         assert result == expected
 
-        result = df.groupby("a").describe().drop(columns='a').to_latex()
+        result = df.groupby("a").describe().drop(columns="a").to_latex()
         expected = r"""\begin{tabular}{lrrrrrrrr}
 \toprule
 {} & \multicolumn{8}{l}{c} \\

From 7f0d192aa172f6fba3d22f2eb036e458a757a222 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 11:04:57 +0100
Subject: [PATCH 17/34] add .describe() to whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 6f727df2cb1c4..6c5810a770ece 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1084,7 +1084,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
 - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
-- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
+- Bug in :meth:`DataFrameGroupBy.apply` :meth:`DataFrameGroupBy.describe` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^

From 974da63b1fc4c7bdd53bd45bb8227cea69bcb3d4 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Fri, 17 Jul 2020 20:58:47 +0100
Subject: [PATCH 18/34] parametrize test over as_index=T/F

---
 pandas/tests/groupby/test_function.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 61d8ae3b281b8..595cbe9a0e9de 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1008,7 +1008,8 @@ def test_frame_describe_unstacked_format():
 
 
 @pytest.mark.parametrize("by_col_dtype", [int, float, str])
-def test_describe_results_includes_non_nuisance_columns(by_col_dtype):
+@pytest.mark.parametrize("as_index", [True, False])
+def test_describe_results_includes_non_nuisance_columns(by_col_dtype, as_index):
     # GH 34656
     # GH 34271
     df = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 3, 3], "b": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
@@ -1042,12 +1043,15 @@ def test_describe_results_includes_non_nuisance_columns(by_col_dtype):
     expected.columns.names = [None, None]
     expected.index = pd.Index(expected.index.astype(by_col_dtype), name="a")
 
+    if not as_index:
+        expected = expected.reset_index(drop=True)
+
     if by_col_dtype is str:
         # If the grouping column is a nuisance column (i.e. can't apply the
         # std() or quantile() to it) then it does not appear in the output
         expected = expected.drop(columns="a")
 
-    result = df.groupby("a").describe()
+    result = df.groupby("a", as_index=as_index).describe()
     tm.assert_frame_equal(result, expected)
 
 

From b395e39e72c5c550f78bf3929829d9b39bb47cce Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sat, 18 Jul 2020 13:41:49 +0100
Subject: [PATCH 19/34] restored .describe to old behaviour

---
 doc/source/whatsnew/v1.1.0.rst           |   2 +-
 pandas/core/groupby/groupby.py           |   5 +
 pandas/tests/groupby/test_function.py    | 160 +----------------------
 pandas/tests/io/formats/test_to_latex.py |   2 +-
 4 files changed, 12 insertions(+), 157 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 6c5810a770ece..6f727df2cb1c4 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1084,7 +1084,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
 - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
-- Bug in :meth:`DataFrameGroupBy.apply` :meth:`DataFrameGroupBy.describe` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
+- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c542c1ae6e334..b44a563bdb648 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1628,6 +1628,11 @@ def describe(self, **kwargs):
         result = self.apply(lambda x: x.describe(**kwargs))
         if self.axis == 1:
             return result.T
+        if self.as_index:
+            with _group_selection_context(self):
+                if self._group_selection is not None:
+                    cols = result.columns.intersection(self._group_selection)
+                    result = result.reindex(columns=cols)
         return result.unstack()
 
     def resample(self, rule, *args, **kwargs):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 595cbe9a0e9de..d136556e5336a 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -286,47 +286,14 @@ def test_non_cython_api():
 
     # describe
     expected_index = pd.Index([1, 3], name="A")
-    expected_col = pd.MultiIndex.from_product(
-        [["A", "B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]]
+    expected_col = pd.MultiIndex(
+        levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]],
+        codes=[[0] * 8, list(range(8))],
     )
     expected = pd.DataFrame(
         [
-            [
-                2.0,
-                1.0,
-                0.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                2.0,
-                np.nan,
-                2.0,
-                2.0,
-                2.0,
-                2.0,
-                2.0,
-            ],
-            [
-                1.0,
-                3.0,
-                np.nan,
-                3.0,
-                3.0,
-                3.0,
-                3.0,
-                3.0,
-                0.0,
-                np.nan,
-                np.nan,
-                np.nan,
-                np.nan,
-                np.nan,
-                np.nan,
-                np.nan,
-            ],
+            [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
+            [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
         ],
         index=expected_index,
         columns=expected_col,
@@ -334,16 +301,6 @@ def test_non_cython_api():
     result = g.describe()
     tm.assert_frame_equal(result, expected)
 
-    expected = pd.concat(
-        [
-            df[df.A == 1].describe().unstack().to_frame().T,
-            df[df.A == 3].describe().unstack().to_frame().T,
-        ]
-    )
-    expected.index = pd.Index([0, 1])
-    result = gni.describe()
-    tm.assert_frame_equal(result, expected)
-
     # any
     expected = DataFrame(
         [[True, True], [False, True]], columns=["B", "C"], index=[1, 3]
@@ -1005,110 +962,3 @@ def test_frame_describe_unstacked_format():
         columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
     )
     tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("by_col_dtype", [int, float, str])
-@pytest.mark.parametrize("as_index", [True, False])
-def test_describe_results_includes_non_nuisance_columns(by_col_dtype, as_index):
-    # GH 34656
-    # GH 34271
-    df = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 3, 3], "b": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
-    df = df.astype({"a": by_col_dtype})
-
-    expected = (
-        DataFrame.from_records(
-            [
-                ("a", "count", 3.0, 3.0, 3.0),
-                ("a", "mean", 1.0, 2.0, 3.0),
-                ("a", "std", 0.0, 0.0, 0.0),
-                ("a", "min", 1.0, 2.0, 3.0),
-                ("a", "25%", 1.0, 2.0, 3.0),
-                ("a", "50%", 1.0, 2.0, 3.0),
-                ("a", "75%", 1.0, 2.0, 3.0),
-                ("a", "max", 1.0, 2.0, 3.0),
-                ("b", "count", 3.0, 3.0, 3.0),
-                ("b", "mean", 2.0, 5.0, 8.0),
-                ("b", "std", 1.0, 1.0, 1.0),
-                ("b", "min", 1.0, 4.0, 7.0),
-                ("b", "25%", 1.5, 4.5, 7.5),
-                ("b", "50%", 2.0, 5.0, 8.0),
-                ("b", "75%", 2.5, 5.5, 8.5),
-                ("b", "max", 3.0, 6.0, 9.0),
-            ],
-            columns=["col", "func", 1, 2, 3],
-        )
-        .set_index(["col", "func"])
-        .T
-    )
-    expected.columns.names = [None, None]
-    expected.index = pd.Index(expected.index.astype(by_col_dtype), name="a")
-
-    if not as_index:
-        expected = expected.reset_index(drop=True)
-
-    if by_col_dtype is str:
-        # If the grouping column is a nuisance column (i.e. can't apply the
-        # std() or quantile() to it) then it does not appear in the output
-        expected = expected.drop(columns="a")
-
-    result = df.groupby("a", as_index=as_index).describe()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_mean_no_overflow():
-    # Regression test for (#22487)
-    df = pd.DataFrame(
-        {
-            "user": ["A", "A", "A", "A", "A"],
-            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
-        }
-    )
-    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
-
-
-@pytest.mark.parametrize(
-    "values",
-    [
-        {
-            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
-        },
-        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
-    ],
-)
-@pytest.mark.parametrize("function", ["mean", "median", "var"])
-def test_apply_to_nullable_integer_returns_float(values, function):
-    # https://github.com/pandas-dev/pandas/issues/32219
-    output = 0.5 if function == "var" else 1.5
-    arr = np.array([output] * 3, dtype=float)
-    idx = pd.Index([1, 2, 3], dtype=object, name="a")
-    expected = pd.DataFrame({"b": arr}, index=idx)
-
-    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
-
-    result = getattr(groups, function)()
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg(function)
-    tm.assert_frame_equal(result, expected)
-
-    result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_sum_below_mincount_nullable_integer():
-    # https://github.com/pandas-dev/pandas/issues/32861
-    df = pd.DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64")
-    grouped = df.groupby("a")
-    idx = pd.Index([0, 1, 2], dtype=object, name="a")
-
-    result = grouped["b"].sum(min_count=2)
-    expected = pd.Series([pd.NA] * 3, dtype="Int64", index=idx, name="b")
-    tm.assert_series_equal(result, expected)
-
-    result = grouped.sum(min_count=2)
-    expected = pd.DataFrame(
-        {"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx
-    )
-    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 053a9b6a9fd38..509e5bcb33304 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -224,7 +224,7 @@ def test_to_latex_multiindex(self):
 
         assert result == expected
 
-        result = df.groupby("a").describe().drop(columns="a").to_latex()
+        result = df.groupby("a").describe().to_latex()
         expected = r"""\begin{tabular}{lrrrrrrrr}
 \toprule
 {} & \multicolumn{8}{l}{c} \\

From 67e974438bbb8f94dd4169dec16fe36bc64d45e7 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sat, 18 Jul 2020 13:49:54 +0100
Subject: [PATCH 20/34] restoring test_function.py to master

---
 pandas/tests/groupby/test_function.py | 69 +++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 67cc90d23620d..e693962e57ac3 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -319,6 +319,16 @@ def test_non_cython_api():
     result = g.describe()
     tm.assert_frame_equal(result, expected)
 
+    expected = pd.concat(
+        [
+            df[df.A == 1].describe().unstack().to_frame().T,
+            df[df.A == 3].describe().unstack().to_frame().T,
+        ]
+    )
+    expected.index = pd.Index([0, 1])
+    result = gni.describe()
+    tm.assert_frame_equal(result, expected)
+
     # any
     expected = DataFrame(
         [[True, True], [False, True]], columns=["B", "C"], index=[1, 3]
@@ -980,3 +990,62 @@ def test_frame_describe_unstacked_format():
         columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_mean_no_overflow():
+    # Regression test for (#22487)
+    df = pd.DataFrame(
+        {
+            "user": ["A", "A", "A", "A", "A"],
+            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
+        }
+    )
+    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        {
+            "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2],
+        },
+        {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]},
+    ],
+)
+@pytest.mark.parametrize("function", ["mean", "median", "var"])
+def test_apply_to_nullable_integer_returns_float(values, function):
+    # https://github.com/pandas-dev/pandas/issues/32219
+    output = 0.5 if function == "var" else 1.5
+    arr = np.array([output] * 3, dtype=float)
+    idx = pd.Index([1, 2, 3], dtype=object, name="a")
+    expected = pd.DataFrame({"b": arr}, index=idx)
+
+    groups = pd.DataFrame(values, dtype="Int64").groupby("a")
+
+    result = getattr(groups, function)()
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg(function)
+    tm.assert_frame_equal(result, expected)
+
+    result = groups.agg([function])
+    expected.columns = MultiIndex.from_tuples([("b", function)])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_sum_below_mincount_nullable_integer():
+    # https://github.com/pandas-dev/pandas/issues/32861
+    df = pd.DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64")
+    grouped = df.groupby("a")
+    idx = pd.Index([0, 1, 2], dtype=object, name="a")
+
+    result = grouped["b"].sum(min_count=2)
+    expected = pd.Series([pd.NA] * 3, dtype="Int64", index=idx, name="b")
+    tm.assert_series_equal(result, expected)
+
+    result = grouped.sum(min_count=2)
+    expected = pd.DataFrame(
+        {"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx
+    )
+    tm.assert_frame_equal(result, expected)

From 8f1b9c9fdd355474494f98367ec7f6facbf452cc Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sat, 18 Jul 2020 14:00:41 +0100
Subject: [PATCH 21/34] added comment

---
 pandas/core/groupby/groupby.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 93b779bc35d6a..bc9ef0b6e3456 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1628,6 +1628,8 @@ def describe(self, **kwargs):
         result = self.apply(lambda x: x.describe(**kwargs))
         if self.axis == 1:
             return result.T
+        # GH 34656 self.apply() will return non-nuisance grouping columns, but
+        # we remove them from describe if as_index=True
         if self.as_index:
             with _group_selection_context(self):
                 if self._group_selection is not None:

From f422b7d5a962a06f4e8d66a18aad49bdfffc8fe5 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 26 Jul 2020 22:52:02 +0100
Subject: [PATCH 22/34] fixed describe to work with duplicate cols

---
 pandas/core/groupby/groupby.py        |  4 +-
 pandas/tests/groupby/test_function.py | 57 +++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index bc9ef0b6e3456..05d7aa1379d95 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1633,8 +1633,8 @@ def describe(self, **kwargs):
         if self.as_index:
             with _group_selection_context(self):
                 if self._group_selection is not None:
-                    cols = result.columns.intersection(self._group_selection)
-                    result = result.reindex(columns=cols)
+                    group_cols = result.columns.difference(self._group_selection)
+                    result = result.drop(columns=group_cols)
         return result.unstack()
 
     def resample(self, rule, *args, **kwargs):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index e693962e57ac3..1c5d08561ff90 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -992,6 +992,63 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("as_index", [True, False])
+def test_describe_with_duplicate_output_column_names(as_index):
+    # GH 35314
+    df = pd.DataFrame(
+        {
+            "a": [99, 99, 99, 88, 88, 88],
+            "b": [1, 2, 3, 4, 5, 6],
+            "c": [10, 20, 30, 40, 50, 60],
+        },
+        columns=["a", "b", "b"],
+    )
+
+    expected = (
+        pd.DataFrame.from_records(
+            [
+                ("a", "count", 3.0, 3.0),
+                ("a", "mean", 88.0, 99.0),
+                ("a", "std", 0.0, 0.0),
+                ("a", "min", 88.0, 99.0),
+                ("a", "25%", 88.0, 99.0),
+                ("a", "50%", 88.0, 99.0),
+                ("a", "75%", 88.0, 99.0),
+                ("a", "max", 88.0, 99.0),
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+            ],
+        )
+        .set_index([0, 1])
+        .T
+    )
+    expected.columns.names = [None, None]
+    expected.index = pd.Index([88, 99], name="a")
+
+    if as_index:
+        expected = expected.drop(columns=["a"], level=0)
+    else:
+        expected = expected.reset_index(drop=True)
+
+    result = df.groupby("a", as_index=as_index).describe()
+
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = pd.DataFrame(

From 6bec0400c4fbae62b655bde70be41fdbaaa2c9fc Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 26 Jul 2020 23:50:09 +0100
Subject: [PATCH 23/34] update comment

---
 pandas/tests/groupby/test_function.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1c5d08561ff90..97e99337f5ffd 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -994,7 +994,7 @@ def test_frame_describe_unstacked_format():
 
 @pytest.mark.parametrize("as_index", [True, False])
 def test_describe_with_duplicate_output_column_names(as_index):
-    # GH 35314
+    # GH #35314
     df = pd.DataFrame(
         {
             "a": [99, 99, 99, 88, 88, 88],

From 8cdd4cd34728c89ca28a2c1044ca2c6132b2efb1 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 08:39:31 +0100
Subject: [PATCH 24/34] context manager in agg_general

---
 pandas/core/groupby/groupby.py                | 177 +++++++++---------
 .../tests/groupby/transform/test_transform.py |   3 -
 2 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 05d7aa1379d95..a4c8ff1ec564e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -734,56 +734,58 @@ def pipe(self, func, *args, **kwargs):
     def _make_wrapper(self, name):
         assert name in self._apply_allowlist
 
-        self._set_group_selection()
+        with _group_selection_context(self):
 
-        # need to setup the selection
-        # as are not passed directly but in the grouper
-        f = getattr(self._obj_with_exclusions, name)
-        if not isinstance(f, types.MethodType):
-            return self.apply(lambda self: getattr(self, name))
+            # need to setup the selection
+            # as are not passed directly but in the grouper
+            f = getattr(self._obj_with_exclusions, name)
+            if not isinstance(f, types.MethodType):
+                return self.apply(lambda self: getattr(self, name))
 
-        f = getattr(type(self._obj_with_exclusions), name)
-        sig = inspect.signature(f)
+            f = getattr(type(self._obj_with_exclusions), name)
+            sig = inspect.signature(f)
 
-        def wrapper(*args, **kwargs):
-            # a little trickery for aggregation functions that need an axis
-            # argument
-            if "axis" in sig.parameters:
-                if kwargs.get("axis", None) is None:
-                    kwargs["axis"] = self.axis
+            def wrapper(*args, **kwargs):
+                # a little trickery for aggregation functions that need an axis
+                # argument
+                if "axis" in sig.parameters:
+                    if kwargs.get("axis", None) is None:
+                        kwargs["axis"] = self.axis
 
-            def curried(x):
-                return f(x, *args, **kwargs)
+                def curried(x):
+                    return f(x, *args, **kwargs)
 
-            # preserve the name so we can detect it when calling plot methods,
-            # to avoid duplicates
-            curried.__name__ = name
+                # preserve the name so we can detect it when calling plot methods,
+                # to avoid duplicates
+                curried.__name__ = name
 
-            # special case otherwise extra plots are created when catching the
-            # exception below
-            if name in base.plotting_methods:
-                return self.apply(curried)
+                # special case otherwise extra plots are created when catching the
+                # exception below
+                if name in base.plotting_methods:
+                    return self.apply(curried)
 
-            try:
-                return self._python_apply_general(curried, self._obj_with_exclusions)
-            except TypeError as err:
-                if not re.search(
-                    "reduction operation '.*' not allowed for this dtype", str(err)
-                ):
-                    # We don't have a cython implementation
-                    # TODO: is the above comment accurate?
-                    raise
+                try:
+                    return self._python_apply_general(
+                        curried, self._obj_with_exclusions
+                    )
+                except TypeError as err:
+                    if not re.search(
+                        "reduction operation '.*' not allowed for this dtype", str(err)
+                    ):
+                        # We don't have a cython implementation
+                        # TODO: is the above comment accurate?
+                        raise
 
-            if self.obj.ndim == 1:
-                # this can be called recursively, so need to raise ValueError
-                raise ValueError
+                if self.obj.ndim == 1:
+                    # this can be called recursively, so need to raise ValueError
+                    raise ValueError
 
-            # GH#3688 try to operate item-by-item
-            result = self._aggregate_item_by_item(name, *args, **kwargs)
-            return result
+                # GH#3688 try to operate item-by-item
+                result = self._aggregate_item_by_item(name, *args, **kwargs)
+                return result
 
-        wrapper.__name__ = name
-        return wrapper
+            wrapper.__name__ = name
+            return wrapper
 
     def get_group(self, name, obj=None):
         """
@@ -829,8 +831,6 @@ def __iter__(self):
     )
     def apply(self, func, *args, **kwargs):
 
-        self._reset_group_selection()
-
         func = self._is_builtin_func(func)
 
         # this is needed so we don't try and wrap strings. If we could
@@ -992,28 +992,31 @@ def _agg_general(
         alias: str,
         npfunc: Callable,
     ):
-        self._set_group_selection()
-
-        # try a cython aggregation if we can
-        try:
-            return self._cython_agg_general(
-                how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count,
-            )
-        except DataError:
-            pass
-        except NotImplementedError as err:
-            if "function is not implemented for this dtype" in str(
-                err
-            ) or "category dtype not supported" in str(err):
-                # raised in _get_cython_function, in some cases can
-                #  be trimmed by implementing cython funcs for more dtypes
+        # self._set_group_selection()
+        with _group_selection_context(self):
+            # try a cython aggregation if we can
+            try:
+                return self._cython_agg_general(
+                    how=alias,
+                    alt=npfunc,
+                    numeric_only=numeric_only,
+                    min_count=min_count,
+                )
+            except DataError:
                 pass
-            else:
-                raise
+            except NotImplementedError as err:
+                if "function is not implemented for this dtype" in str(
+                    err
+                ) or "category dtype not supported" in str(err):
+                    # raised in _get_cython_function, in some cases can
+                    #  be trimmed by implementing cython funcs for more dtypes
+                    pass
+                else:
+                    raise
 
-        # apply a non-cython aggregation
-        result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
-        return result
+            # apply a non-cython aggregation
+            result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
+            return result
 
     def _cython_agg_general(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
@@ -1625,16 +1628,11 @@ def ohlc(self) -> DataFrame:
 
     @doc(DataFrame.describe)
     def describe(self, **kwargs):
-        result = self.apply(lambda x: x.describe(**kwargs))
-        if self.axis == 1:
-            return result.T
-        # GH 34656 self.apply() will return non-nuisance grouping columns, but
-        # we remove them from describe if as_index=True
-        if self.as_index:
-            with _group_selection_context(self):
-                if self._group_selection is not None:
-                    group_cols = result.columns.difference(self._group_selection)
-                    result = result.drop(columns=group_cols)
+        with _group_selection_context(self):
+            result = self.apply(lambda x: x.describe(**kwargs))
+            if self.axis == 1:
+                return result.T
+            return result.unstack()
         return result.unstack()
 
     def resample(self, rule, *args, **kwargs):
@@ -1936,29 +1934,32 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
                 nth_values = list(set(n))
 
             nth_array = np.array(nth_values, dtype=np.intp)
-            self._set_group_selection()
+            # self._set_group_selection()
+            with _group_selection_context(self):
 
-            mask_left = np.in1d(self._cumcount_array(), nth_array)
-            mask_right = np.in1d(self._cumcount_array(ascending=False) + 1, -nth_array)
-            mask = mask_left | mask_right
+                mask_left = np.in1d(self._cumcount_array(), nth_array)
+                mask_right = np.in1d(
+                    self._cumcount_array(ascending=False) + 1, -nth_array
+                )
+                mask = mask_left | mask_right
 
-            ids, _, _ = self.grouper.group_info
+                ids, _, _ = self.grouper.group_info
 
-            # Drop NA values in grouping
-            mask = mask & (ids != -1)
+                # Drop NA values in grouping
+                mask = mask & (ids != -1)
 
-            out = self._selected_obj[mask]
-            if not self.as_index:
-                return out
+                out = self._selected_obj[mask]
+                if not self.as_index:
+                    return out
 
-            result_index = self.grouper.result_index
-            out.index = result_index[ids[mask]]
+                result_index = self.grouper.result_index
+                out.index = result_index[ids[mask]]
 
-            if not self.observed and isinstance(result_index, CategoricalIndex):
-                out = out.reindex(result_index)
+                if not self.observed and isinstance(result_index, CategoricalIndex):
+                    out = out.reindex(result_index)
 
-            out = self._reindex_output(out)
-            return out.sort_index() if self.sort else out
+                out = self._reindex_output(out)
+                return out.sort_index() if self.sort else out
 
         # dropna is truthy
         if isinstance(n, valid_containers):
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 47e5fe2742075..cdaf27e214d80 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -743,9 +743,6 @@ def test_cython_transform_frame(op, args, targop):
             else:
                 expected = gb.apply(targop)
 
-            if op == "shift" and type(gb_target.get("by")) is str:
-                expected = expected.drop(columns=gb_target.get("by"))
-
             expected = expected.sort_index(axis=1)
             tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1))
             tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1))

From abe8be36adc17e9db4fbf5ce09574e79b5a8b457 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 08:44:02 +0100
Subject: [PATCH 25/34] remove hashed out line

---
 pandas/core/groupby/groupby.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a4c8ff1ec564e..ef802ffb37a00 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1633,7 +1633,6 @@ def describe(self, **kwargs):
             if self.axis == 1:
                 return result.T
             return result.unstack()
-        return result.unstack()
 
     def resample(self, rule, *args, **kwargs):
         """
@@ -1934,7 +1933,6 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
                 nth_values = list(set(n))
 
             nth_array = np.array(nth_values, dtype=np.intp)
-            # self._set_group_selection()
             with _group_selection_context(self):
 
                 mask_left = np.in1d(self._cumcount_array(), nth_array)

From 7112cf86d78eaa75ef2ee665735b839f717e3735 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 09:00:23 +0100
Subject: [PATCH 26/34] limited context manager in _make_wrapper

---
 pandas/core/groupby/groupby.py | 72 ++++++++++++++++------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ef802ffb37a00..ec09f132aa088 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -735,57 +735,54 @@ def _make_wrapper(self, name):
         assert name in self._apply_allowlist
 
         with _group_selection_context(self):
-
             # need to setup the selection
             # as are not passed directly but in the grouper
             f = getattr(self._obj_with_exclusions, name)
             if not isinstance(f, types.MethodType):
                 return self.apply(lambda self: getattr(self, name))
 
-            f = getattr(type(self._obj_with_exclusions), name)
-            sig = inspect.signature(f)
+        f = getattr(type(self._obj_with_exclusions), name)
+        sig = inspect.signature(f)
 
-            def wrapper(*args, **kwargs):
-                # a little trickery for aggregation functions that need an axis
-                # argument
-                if "axis" in sig.parameters:
-                    if kwargs.get("axis", None) is None:
-                        kwargs["axis"] = self.axis
+        def wrapper(*args, **kwargs):
+            # a little trickery for aggregation functions that need an axis
+            # argument
+            if "axis" in sig.parameters:
+                if kwargs.get("axis", None) is None:
+                    kwargs["axis"] = self.axis
 
-                def curried(x):
-                    return f(x, *args, **kwargs)
+            def curried(x):
+                return f(x, *args, **kwargs)
 
-                # preserve the name so we can detect it when calling plot methods,
-                # to avoid duplicates
-                curried.__name__ = name
+            # preserve the name so we can detect it when calling plot methods,
+            # to avoid duplicates
+            curried.__name__ = name
 
-                # special case otherwise extra plots are created when catching the
-                # exception below
-                if name in base.plotting_methods:
-                    return self.apply(curried)
+            # special case otherwise extra plots are created when catching the
+            # exception below
+            if name in base.plotting_methods:
+                return self.apply(curried)
 
-                try:
-                    return self._python_apply_general(
-                        curried, self._obj_with_exclusions
-                    )
-                except TypeError as err:
-                    if not re.search(
-                        "reduction operation '.*' not allowed for this dtype", str(err)
-                    ):
-                        # We don't have a cython implementation
-                        # TODO: is the above comment accurate?
-                        raise
+            try:
+                return self._python_apply_general(curried, self._obj_with_exclusions)
+            except TypeError as err:
+                if not re.search(
+                    "reduction operation '.*' not allowed for this dtype", str(err)
+                ):
+                    # We don't have a cython implementation
+                    # TODO: is the above comment accurate?
+                    raise
 
-                if self.obj.ndim == 1:
-                    # this can be called recursively, so need to raise ValueError
-                    raise ValueError
+            if self.obj.ndim == 1:
+                # this can be called recursively, so need to raise ValueError
+                raise ValueError
 
-                # GH#3688 try to operate item-by-item
-                result = self._aggregate_item_by_item(name, *args, **kwargs)
-                return result
+            # GH#3688 try to operate item-by-item
+            result = self._aggregate_item_by_item(name, *args, **kwargs)
+            return result
 
-            wrapper.__name__ = name
-            return wrapper
+        wrapper.__name__ = name
+        return wrapper
 
     def get_group(self, name, obj=None):
         """
@@ -992,7 +989,6 @@ def _agg_general(
         alias: str,
         npfunc: Callable,
     ):
-        # self._set_group_selection()
         with _group_selection_context(self):
             # try a cython aggregation if we can
             try:

From 0c8b1440ca5d2e21aa2ae47b9043899e39f83b12 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 10:12:36 +0100
Subject: [PATCH 27/34] removed unrelated test

---
 pandas/tests/groupby/test_function.py | 57 ---------------------------
 1 file changed, 57 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 97e99337f5ffd..e693962e57ac3 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -992,63 +992,6 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("as_index", [True, False])
-def test_describe_with_duplicate_output_column_names(as_index):
-    # GH #35314
-    df = pd.DataFrame(
-        {
-            "a": [99, 99, 99, 88, 88, 88],
-            "b": [1, 2, 3, 4, 5, 6],
-            "c": [10, 20, 30, 40, 50, 60],
-        },
-        columns=["a", "b", "b"],
-    )
-
-    expected = (
-        pd.DataFrame.from_records(
-            [
-                ("a", "count", 3.0, 3.0),
-                ("a", "mean", 88.0, 99.0),
-                ("a", "std", 0.0, 0.0),
-                ("a", "min", 88.0, 99.0),
-                ("a", "25%", 88.0, 99.0),
-                ("a", "50%", 88.0, 99.0),
-                ("a", "75%", 88.0, 99.0),
-                ("a", "max", 88.0, 99.0),
-                ("b", "count", 3.0, 3.0),
-                ("b", "mean", 5.0, 2.0),
-                ("b", "std", 1.0, 1.0),
-                ("b", "min", 4.0, 1.0),
-                ("b", "25%", 4.5, 1.5),
-                ("b", "50%", 5.0, 2.0),
-                ("b", "75%", 5.5, 2.5),
-                ("b", "max", 6.0, 3.0),
-                ("b", "count", 3.0, 3.0),
-                ("b", "mean", 5.0, 2.0),
-                ("b", "std", 1.0, 1.0),
-                ("b", "min", 4.0, 1.0),
-                ("b", "25%", 4.5, 1.5),
-                ("b", "50%", 5.0, 2.0),
-                ("b", "75%", 5.5, 2.5),
-                ("b", "max", 6.0, 3.0),
-            ],
-        )
-        .set_index([0, 1])
-        .T
-    )
-    expected.columns.names = [None, None]
-    expected.index = pd.Index([88, 99], name="a")
-
-    if as_index:
-        expected = expected.drop(columns=["a"], level=0)
-    else:
-        expected = expected.reset_index(drop=True)
-
-    result = df.groupby("a", as_index=as_index).describe()
-
-    tm.assert_frame_equal(result, expected)
-
-
 def test_groupby_mean_no_overflow():
     # Regression test for (#22487)
     df = pd.DataFrame(

From 755c8f0edd6fafbe972d57ade4334f48537746a8 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 27 Jul 2020 10:51:31 +0100
Subject: [PATCH 28/34] update comment

---
 pandas/tests/groupby/test_apply.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index ee6200d4e946b..d5da08f15b440 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1017,8 +1017,8 @@ def test_apply_with_timezones_aware():
 
 
 def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
-    # GH 34656
-    # GH 34271
+    # GH #34656
+    # GH #34271
     df = DataFrame(
         {
             "a": [99, 99, 99, 88, 88, 88],

From b61695be939364a2acb57e8a7bf1a641d479695a Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 3 Aug 2020 21:16:06 +0100
Subject: [PATCH 29/34] whatsnew on v1.1.1

---
 doc/source/whatsnew/v1.1.0.rst | 1 -
 doc/source/whatsnew/v1.1.1.rst | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 71521f82e991d..a49b29d691692 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1131,7 +1131,6 @@ Groupby/resample/rolling
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
 - Bug in :meth:`DataFrame.groupby` raising an ``AttributeError`` when selecting a column and aggregating with ``as_index=False`` (:issue:`35246`).
 - Bug in :meth:`DataFrameGroupBy.first` and :meth:`DataFrameGroupBy.last` that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
-- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst
index 443589308ad4c..46bc8234f27ec 100644
--- a/doc/source/whatsnew/v1.1.1.rst
+++ b/doc/source/whatsnew/v1.1.1.rst
@@ -40,6 +40,10 @@ Bug fixes
 
 -
 
+**Groupby/resample/rolling**
+
+- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
+
 **Indexing**
 
 -

From 673a35b8ecc80697d2444ea84301649b293f7e76 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 3 Aug 2020 21:17:39 +0100
Subject: [PATCH 30/34] comment typo

---
 pandas/tests/groupby/test_apply.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index d5da08f15b440..b639862a3e12e 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1032,7 +1032,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
         index=pd.Index([88, 99], name="a"),
     )
 
-    # Check output wehn no other methods are called before .apply()
+    # Check output when no other methods are called before .apply()
     grp = df.groupby(by="a")
     result = grp.apply(sum)
     tm.assert_frame_equal(result, expected)

From 42f53dd644fd89edc977a8bb6a5f3e7ceb34667c Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 3 Aug 2020 23:06:36 +0100
Subject: [PATCH 31/34] amend comment to restart tests

---
 pandas/tests/groupby/test_apply.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index b639862a3e12e..3a373838b169b 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1017,8 +1017,8 @@ def test_apply_with_timezones_aware():
 
 
 def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
-    # GH #34656
-    # GH #34271
+    # GH 34656
+    # GH 34271
     df = DataFrame(
         {
             "a": [99, 99, 99, 88, 88, 88],

From 18634a6064f1fce1c3310661e7bfeaeb851a6eaa Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 5 Aug 2020 15:25:13 +0100
Subject: [PATCH 32/34] whatsnew to 1.2.0

---
 doc/source/whatsnew/v1.1.1.rst | 4 ----
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst
index 46bc8234f27ec..443589308ad4c 100644
--- a/doc/source/whatsnew/v1.1.1.rst
+++ b/doc/source/whatsnew/v1.1.1.rst
@@ -40,10 +40,6 @@ Bug fixes
 
 -
 
-**Groupby/resample/rolling**
-
-- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
-
 **Indexing**
 
 -
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index b16ca0a80c5b4..ba89e88299e89 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -134,7 +134,7 @@ Groupby/resample/rolling
 
 -
 -
-
+- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
 
 Reshaping
 ^^^^^^^^^

From 1a0aa44501bdc6bea1d65aa4d1c0eb660df8c16d Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 5 Aug 2020 16:30:28 +0100
Subject: [PATCH 33/34] remove line that can't be tiggered by test

---
 pandas/core/groupby/groupby.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ec09f132aa088..4598548932fcf 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1007,8 +1007,6 @@ def _agg_general(
                     # raised in _get_cython_function, in some cases can
                     #  be trimmed by implementing cython funcs for more dtypes
                     pass
-                else:
-                    raise
 
             # apply a non-cython aggregation
             result = self.aggregate(lambda x: npfunc(x, axis=self.axis))

From b09e41e5e64422d9e682e294004850a0ca184dbd Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Wed, 5 Aug 2020 18:44:18 +0100
Subject: [PATCH 34/34] restart tests

---
 pandas/tests/groupby/test_apply.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index ca8a8004a2820..edf0be919fc41 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1017,8 +1017,8 @@ def test_apply_with_timezones_aware():
 
 
 def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
-    # GH 34656
-    # GH 34271
+    # GH #34656
+    # GH #34271
     df = DataFrame(
         {
             "a": [99, 99, 99, 88, 88, 88],