From 4bb68ed3736dc95d2766ef40cd75845e2a7f3c1e Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 15 Jan 2023 14:12:37 +0800 Subject: [PATCH 1/4] TST: test-groupby-method-drop-na --- pandas/tests/groupby/test_groupby.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5384b228850f4..cec702bcb472d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2846,3 +2846,12 @@ def test_sum_of_booleans(n): result = df.groupby("groupby_col").sum() expected = DataFrame({"bool": [n]}, index=Index([1], name="groupby_col")) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method, n", [("head", 1), ("nth", 0)]) +def test_groupby_method_drop_na(method, n): + # GH 21755 + df = DataFrame({"A": ["a", np.nan, "b", np.nan, "c"], "B": range(5)}) + result = df.groupby("A").agg(method, n=n) + expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) + tm.assert_frame_equal(result, expected) From d29901958cdfab3214c7c1ce6e4f107502ecf479 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sun, 15 Jan 2023 20:14:02 +0800 Subject: [PATCH 2/4] add first last tail --- pandas/tests/groupby/test_groupby.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index cec702bcb472d..d5ccae1f8d642 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2848,10 +2848,20 @@ def test_sum_of_booleans(n): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("method, n", [("head", 1), ("nth", 0)]) -def test_groupby_method_drop_na(method, n): +@pytest.mark.parametrize("method", ["head", "tail", "nth", "first", "last"]) +def test_groupby_method_drop_na(method): # GH 21755 df = DataFrame({"A": ["a", np.nan, "b", np.nan, "c"], "B": range(5)}) - result = df.groupby("A").agg(method, n=n) - expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) + + if method == "nth": + result = df.groupby("A").agg(method, n=0) + else: + result = df.groupby("A").agg(method) + + if method in ["first", "last"]: + expected = DataFrame({"B": [0, 2, 4]}) + index = Series(["a", "b", "c"], name="A") + expected.set_index(index, inplace=True) + else: + expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) tm.assert_frame_equal(result, expected) From f0fe53674958ca82482cd603cd6bd4f796a4ab54 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 16 Jan 2023 11:15:39 +0800 Subject: [PATCH 3/4] construct DataFrame in one line --- pandas/tests/groupby/test_groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d5ccae1f8d642..e17d6b7c4feb5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2859,9 +2859,9 @@ def test_groupby_method_drop_na(method): result = df.groupby("A").agg(method) if method in ["first", "last"]: - expected = DataFrame({"B": [0, 2, 4]}) - index = Series(["a", "b", "c"], name="A") - expected.set_index(index, inplace=True) + expected = DataFrame({"B": [0, 2, 4]}).set_index( + Series(["a", "b", "c"], name="A") + ) else: expected = DataFrame({"A": ["a", "b", "c"], "B": [0, 2, 4]}, index=[0, 2, 4]) tm.assert_frame_equal(result, expected) From e4fce3a50abb41dce44f59886fdb7c86b346f9b4 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 17 Jan 2023 12:07:58 +0800 Subject: [PATCH 4/4] use actual func instead of agg --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e17d6b7c4feb5..b4a243671e287 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2854,9 +2854,9 @@ def test_groupby_method_drop_na(method): df = DataFrame({"A": ["a", np.nan, "b", np.nan, "c"], "B": range(5)}) if method == "nth": - result = df.groupby("A").agg(method, n=0) + result = getattr(df.groupby("A"), method)(n=0) else: - result = df.groupby("A").agg(method) + result = getattr(df.groupby("A"), method)() if method in ["first", "last"]: expected = DataFrame({"B": [0, 2, 4]}).set_index(