From 88f0108d7c393bfac459cdbd62767aca1bcae7ff Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 01:50:43 -0400 Subject: [PATCH 1/7] Implement test for GH #21340 --- pandas/tests/groupby/test_grouping.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 063b0ce38387f..d6c92873277c1 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -823,6 +823,39 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) +def test_groupby_tuple_keys_handle_multiindex(): + # https://github.com/pandas-dev/pandas/issues/21340 + df = DataFrame( + { + "num1": [0, 8, 9, 4, 3, 3, 5, 9, 3, 6], + "num2": [3, 8, 6, 4, 9, 2, 1, 7, 0, 9], + "num3": [6, 5, 7, 8, 5, 1, 1, 10, 7, 8], + "category_tuple": [ + (0, 1), + (0, 1), + (0, 1), + (0, 4), + (2, 3), + (2, 3), + (2, 3), + (2, 3), + (5,), + (6,), + ], + "category_string": list("aaabbbbcde"), + } + ) + df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] + expected = df.sort_values(by=["category_tuple", "num1"]) + + msg = "DataFrameGroupBy.apply operated on the grouping columns" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = df.groupby("category_tuple").apply(lambda x: x.sort_values(by="num1")) + expected = expected[result.columns] + + tm.assert_frame_equal(result.reset_index(drop=True), expected) + + # get_group # -------------------------------- From bd4cb40c0a55c88d3661be7a9f504f8885e91b3c Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 02:05:08 -0400 Subject: [PATCH 2/7] minor fixup --- pandas/tests/groupby/test_grouping.py | 58 +++++++++++++-------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index d6c92873277c1..97a07c3105be3 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -823,37 +823,37 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) -def test_groupby_tuple_keys_handle_multiindex(): - # https://github.com/pandas-dev/pandas/issues/21340 - df = DataFrame( - { - "num1": [0, 8, 9, 4, 3, 3, 5, 9, 3, 6], - "num2": [3, 8, 6, 4, 9, 2, 1, 7, 0, 9], - "num3": [6, 5, 7, 8, 5, 1, 1, 10, 7, 8], - "category_tuple": [ - (0, 1), - (0, 1), - (0, 1), - (0, 4), - (2, 3), - (2, 3), - (2, 3), - (2, 3), - (5,), - (6,), - ], - "category_string": list("aaabbbbcde"), - } - ) - df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] - expected = df.sort_values(by=["category_tuple", "num1"]) + def test_groupby_tuple_keys_handle_multiindex(self): + # https://github.com/pandas-dev/pandas/issues/21340 + df = DataFrame( + { + "num1": [0, 8, 9, 4, 3, 3, 5, 9, 3, 6], + "num2": [3, 8, 6, 4, 9, 2, 1, 7, 0, 9], + "num3": [6, 5, 7, 8, 5, 1, 1, 10, 7, 8], + "category_tuple": [ + (0, 1), + (0, 1), + (0, 1), + (0, 4), + (2, 3), + (2, 3), + (2, 3), + (2, 3), + (5,), + (6,), + ], + "category_string": list("aaabbbbcde"), + } + ) + df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] + expected = df.sort_values(by=["category_tuple", "num1"]) - msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = df.groupby("category_tuple").apply(lambda x: x.sort_values(by="num1")) - expected = expected[result.columns] + msg = "DataFrameGroupBy.apply operated on the grouping columns" + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = df.groupby("category_tuple").apply(lambda x: x.sort_values(by="num1")) + expected = expected[result.columns] - tm.assert_frame_equal(result.reset_index(drop=True), expected) + tm.assert_frame_equal(result.reset_index(drop=True), expected) # get_group From 6538334dbe5f781639f36864eab7461bc9ff3a45 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 02:07:34 -0400 Subject: [PATCH 3/7] Lint contribution --- pandas/tests/groupby/test_grouping.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 97a07c3105be3..0ebc70c324a7b 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -822,7 +822,6 @@ def test_groupby_multiindex_level_empty(self): ) tm.assert_frame_equal(result, expected) - def test_groupby_tuple_keys_handle_multiindex(self): # https://github.com/pandas-dev/pandas/issues/21340 df = DataFrame( @@ -850,7 +849,9 @@ def test_groupby_tuple_keys_handle_multiindex(self): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = df.groupby("category_tuple").apply(lambda x: x.sort_values(by="num1")) + result = df.groupby("category_tuple").apply( + lambda x: x.sort_values(by="num1") + ) expected = expected[result.columns] tm.assert_frame_equal(result.reset_index(drop=True), expected) From d5d69fbad929a0616ef784ea270295309d8dd8b2 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 03:23:22 -0400 Subject: [PATCH 4/7] Make spacing consistent --- pandas/tests/groupby/test_grouping.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 0ebc70c324a7b..0278cf257cb75 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -846,14 +846,13 @@ def test_groupby_tuple_keys_handle_multiindex(self): ) df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] expected = df.sort_values(by=["category_tuple", "num1"]) - + msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = df.groupby("category_tuple").apply( lambda x: x.sort_values(by="num1") ) expected = expected[result.columns] - tm.assert_frame_equal(result.reset_index(drop=True), expected) From a7da0375a0a3f7f121c1d9421a93787241bce7a7 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 03:24:21 -0400 Subject: [PATCH 5/7] Lint --- pandas/tests/groupby/test_grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 0278cf257cb75..a5799ef42c298 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -846,7 +846,7 @@ def test_groupby_tuple_keys_handle_multiindex(self): ) df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] expected = df.sort_values(by=["category_tuple", "num1"]) - + msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = df.groupby("category_tuple").apply( From c0c41f4016f26219e1135219bef5d93b11b18eb6 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 03:32:47 -0400 Subject: [PATCH 6/7] Remove duplicate column construction --- pandas/tests/groupby/test_grouping.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index a5799ef42c298..c72d8ebf628fe 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -844,7 +844,6 @@ def test_groupby_tuple_keys_handle_multiindex(self): "category_string": list("aaabbbbcde"), } ) - df = df[["category_tuple", "category_string", "num1", "num2", "num3"]] expected = df.sort_values(by=["category_tuple", "num1"]) msg = "DataFrameGroupBy.apply operated on the grouping columns" From 321b8ff3de4c1203cd80c68a53993f7df7679153 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Wed, 8 May 2024 13:09:14 -0400 Subject: [PATCH 7/7] Avoid DeprecationWarning by setting include_groups=False in apply --- pandas/tests/groupby/test_grouping.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index c72d8ebf628fe..39eadd32f300d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -845,12 +845,9 @@ def test_groupby_tuple_keys_handle_multiindex(self): } ) expected = df.sort_values(by=["category_tuple", "num1"]) - - msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - result = df.groupby("category_tuple").apply( - lambda x: x.sort_values(by="num1") - ) + result = df.groupby("category_tuple").apply( + lambda x: x.sort_values(by="num1"), include_groups=False + ) expected = expected[result.columns] tm.assert_frame_equal(result.reset_index(drop=True), expected)