From 74705bdc402fe556fd457e6147668feb8fe861ee Mon Sep 17 00:00:00 2001 From: Vivike Lapoutre Date: Sat, 20 Jun 2020 17:35:59 +0200 Subject: [PATCH 1/3] add test --- pandas/tests/groupby/test_groupby.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0d040b8e6955a..631460f880d90 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -726,6 +726,24 @@ def test_as_index_series_column_slice_raises(df): grouped["C"].__getitem__("D") +def test_as_index_produces_same_min(): + # GH26321 + dates = ["2019-05-09", "2019-05-09", "2019-05-09"] + date_series = pd.Series(dates) + date_series_parsed = pd.to_datetime(date_series, format="%Y-%m-%d").dt.date + + df = pd.DataFrame( + {"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": date_series_parsed} + ) + + tm.assert_series_equal( + df.groupby("b")["c"].min(), + df.groupby("b", as_index=False)["c"].min()["c"], + check_index_type=False, + check_names=False, + ) + + def test_groupby_as_index_cython(df): data = df From 74e2ab12515d73beb19b3582318020f489173c93 Mon Sep 17 00:00:00 2001 From: Vivike Lapoutre Date: Sat, 27 Jun 2020 21:50:38 +0200 Subject: [PATCH 2/3] PR comments --- pandas/tests/groupby/test_function.py | 21 +++++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 18 ------------------ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 9303a084f1e71..012405b2de4cd 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -85,6 +85,27 @@ def test_max_min_non_numeric(): assert "ss" in result +def test_min_date_with_nans(): + # GH26321 + dates = pd.to_datetime( + pd.Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d" + ).dt.date + df = pd.DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) + + result = df.groupby("b")["c"].min() + expected = pd.to_datetime( + pd.Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" + ).dt.date + expected.index.name = "b" + tm.assert_series_equal(result, expected) + + result_with_as_index = df.groupby("b", as_index=False)["c"].min()["c"] + expected_with_as_index = pd.to_datetime( + pd.Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" + ).dt.date + tm.assert_series_equal(result_with_as_index, expected_with_as_index) + + def test_intercept_builtin_sum(): s = Series([1.0, 2.0, np.nan, 3.0]) grouped = s.groupby([0, 1, 2, 2]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 631460f880d90..0d040b8e6955a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -726,24 +726,6 @@ def test_as_index_series_column_slice_raises(df): grouped["C"].__getitem__("D") -def test_as_index_produces_same_min(): - # GH26321 - dates = ["2019-05-09", "2019-05-09", "2019-05-09"] - date_series = pd.Series(dates) - date_series_parsed = pd.to_datetime(date_series, format="%Y-%m-%d").dt.date - - df = pd.DataFrame( - {"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": date_series_parsed} - ) - - tm.assert_series_equal( - df.groupby("b")["c"].min(), - df.groupby("b", as_index=False)["c"].min()["c"], - check_index_type=False, - check_names=False, - ) - - def test_groupby_as_index_cython(df): data = df From 553c312e94bf90f6ccbc00815fdb57e34c8807f9 Mon Sep 17 00:00:00 2001 From: Vivike Lapoutre Date: Tue, 30 Jun 2020 20:55:25 +0200 Subject: [PATCH 3/3] attempt to make the code cleaner --- pandas/tests/groupby/test_function.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 012405b2de4cd..1da4624c306c2 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -92,18 +92,15 @@ def test_min_date_with_nans(): ).dt.date df = pd.DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates}) - result = df.groupby("b")["c"].min() + result = df.groupby("b", as_index=False)["c"].min()["c"] expected = pd.to_datetime( pd.Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" ).dt.date - expected.index.name = "b" tm.assert_series_equal(result, expected) - result_with_as_index = df.groupby("b", as_index=False)["c"].min()["c"] - expected_with_as_index = pd.to_datetime( - pd.Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d" - ).dt.date - tm.assert_series_equal(result_with_as_index, expected_with_as_index) + result = df.groupby("b")["c"].min() + expected.index.name = "b" + tm.assert_series_equal(result, expected) def test_intercept_builtin_sum():