From 88a1f3ae0dd8398221b23b9e7145db73d66136ce Mon Sep 17 00:00:00 2001 From: biddwan Date: Sun, 28 Jun 2020 19:24:49 +0600 Subject: [PATCH 1/5] added test case for aggregation with isnan --- .../tests/groupby/aggregate/test_aggregate.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 96db519578106..467366c4d0877 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -994,3 +994,20 @@ def test_groupby_get_by_index(): res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])}) expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A") pd.testing.assert_frame_equal(res, expected) + + +def test_aggregate_categorical_with_isnan(): + # GH 29837 + df = pd.DataFrame({'A': [1, 1, 1, 1], + 'B': [1, 2, 1, 2], + 'numerical_col': [.1, .2, np.nan, .3], + 'object_col': ['foo', 'bar', 'foo', 'fee'], + 'categorical_col': ['foo', 'bar', 'foo', 'fee'] + }) + + df = df.astype({'categorical_col': 'category'}) + + result = df.groupby(['A', 'B']).agg(lambda df: df.isna().sum()) + expected = pd.DataFrame(data={'numerical_col': [1.0, 0.0], 'object_col': [0, 0], 'categorical_col': [0, 0]}, + index=pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B'))) + pd.testing.assert_frame_equal(result, expected) From 304a0de1f149841ccf65d36376b0e4e164fd36fc Mon Sep 17 00:00:00 2001 From: biddwan Date: Sun, 28 Jun 2020 19:31:17 +0600 Subject: [PATCH 2/5] fix lintint issues --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 467366c4d0877..5b99287d7fc7c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1008,6 +1008,8 @@ def test_aggregate_categorical_with_isnan(): df = df.astype({'categorical_col': 'category'}) result = df.groupby(['A', 'B']).agg(lambda df: df.isna().sum()) - expected = pd.DataFrame(data={'numerical_col': [1.0, 0.0], 'object_col': [0, 0], 'categorical_col': [0, 0]}, - index=pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B'))) + index=pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B')) + expected = pd.DataFrame(data={'numerical_col': [1.0, 0.0], + 'object_col': [0, 0], 'categorical_col': [0, 0]}, + index=index) pd.testing.assert_frame_equal(result, expected) From 5f54200e6f61b8baeef6204a08e16f5e771ed2bd Mon Sep 17 00:00:00 2001 From: biddwan Date: Sun, 28 Jun 2020 19:36:59 +0600 Subject: [PATCH 3/5] fix lintint issues --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5b99287d7fc7c..32873321c0d6e 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1008,7 +1008,7 @@ def test_aggregate_categorical_with_isnan(): df = df.astype({'categorical_col': 'category'}) result = df.groupby(['A', 'B']).agg(lambda df: df.isna().sum()) - index=pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B')) + index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B')) expected = pd.DataFrame(data={'numerical_col': [1.0, 0.0], 'object_col': [0, 0], 'categorical_col': [0, 0]}, index=index) From 46259b77469e7a956c10cd15ce2ee7229a9fc4b3 Mon Sep 17 00:00:00 2001 From: biddwan Date: Sun, 28 Jun 2020 20:29:44 +0600 Subject: [PATCH 4/5] fix linting --- .../tests/groupby/aggregate/test_aggregate.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 32873321c0d6e..6f3f4b23e23bd 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -998,18 +998,26 @@ def test_groupby_get_by_index(): def test_aggregate_categorical_with_isnan(): # GH 29837 - df = pd.DataFrame({'A': [1, 1, 1, 1], - 'B': [1, 2, 1, 2], - 'numerical_col': [.1, .2, np.nan, .3], - 'object_col': ['foo', 'bar', 'foo', 'fee'], - 'categorical_col': ['foo', 'bar', 'foo', 'fee'] - }) - - df = df.astype({'categorical_col': 'category'}) - - result = df.groupby(['A', 'B']).agg(lambda df: df.isna().sum()) - index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=('A', 'B')) - expected = pd.DataFrame(data={'numerical_col': [1.0, 0.0], - 'object_col': [0, 0], 'categorical_col': [0, 0]}, - index=index) + df = pd.DataFrame( + { + "A": [1, 1, 1, 1], + "B": [1, 2, 1, 2], + "numerical_col": [0.1, 0.2, np.nan, 0.3], + "object_col": ["foo", "bar", "foo", "fee"], + "categorical_col": ["foo", "bar", "foo", "fee"], + } + ) + + df = df.astype({"categorical_col": "category"}) + + result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum()) + index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) + expected = pd.DataFrame( + data={ + "numerical_col": [1.0, 0.0], + "object_col": [0, 0], + "categorical_col": [0, 0], + }, + index=index, + ) pd.testing.assert_frame_equal(result, expected) From b2dcb9da52aedeb456fd7bea94aaaf88b3ab0655 Mon Sep 17 00:00:00 2001 From: biddwan09 Date: Sun, 28 Jun 2020 21:04:48 +0600 Subject: [PATCH 5/5] Update pandas/tests/groupby/aggregate/test_aggregate.py changed pd.testing to tm Co-authored-by: Simon Hawkins --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 6f3f4b23e23bd..7bc2ce10bba6d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1020,4 +1020,4 @@ def test_aggregate_categorical_with_isnan(): }, index=index, ) - pd.testing.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected)