From 2e77cef7dadd9607871f486371bb395751e293f3 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 10:08:49 -0500 Subject: [PATCH 01/11] TST: groupby apply with indexing and colunm aggregation returns the column (#7002) --- pandas/tests/groupby/test_apply.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index bc8067212d60e..ec454f1b3e93f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -961,3 +961,13 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) + + +def test_apply_function_with_indexing_return_column(): + # GH: 7002 + df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], + 'foo2': np.random.randn(6)}) + result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) + expected = df.groupby('foo1', as_index=False).mean() + tm.assert_frame_equal(result, expected) + assert 'foo1' in result.columns From 99fee1fd13eaba08d4ca162956711d6766420596 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 10:34:20 -0500 Subject: [PATCH 02/11] TST: groupby apply with indexing and column aggregation returns the column (#7002) --- pandas/tests/groupby/test_apply.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index ec454f1b3e93f..bc8067212d60e 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -961,13 +961,3 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) - - -def test_apply_function_with_indexing_return_column(): - # GH: 7002 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) - result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) - expected = df.groupby('foo1', as_index=False).mean() - tm.assert_frame_equal(result, expected) - assert 'foo1' in result.columns From e3a49fe6cf57cb3e708167875eb58cd8cab86ca2 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 10:36:15 -0500 Subject: [PATCH 03/11] TST: groupby apply with indexing and column aggregation returns the column (#7002) --- pandas/tests/groupby/test_apply.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index bc8067212d60e..ec454f1b3e93f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -961,3 +961,13 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) + + +def test_apply_function_with_indexing_return_column(): + # GH: 7002 + df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], + 'foo2': np.random.randn(6)}) + result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) + expected = df.groupby('foo1', as_index=False).mean() + tm.assert_frame_equal(result, expected) + assert 'foo1' in result.columns From eef7d59f3c76db6d972d9f8effdc5cc596223cfe Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 11:30:16 -0500 Subject: [PATCH 04/11] TST/CLN: reformat (#7002) --- pandas/tests/groupby/test_apply.py | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index ec454f1b3e93f..835f5d94fd3cc 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -5,8 +5,8 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, bdate_range import pandas._testing as tm +from pandas import DataFrame, Index, MultiIndex, Series, bdate_range def test_apply_issues(): @@ -65,8 +65,8 @@ def test_apply_trivial(): @pytest.mark.xfail( reason="GH#20066; function passed into apply " - "returns a DataFrame with the same index " - "as the one to create GroupBy object." + "returns a DataFrame with the same index " + "as the one to create GroupBy object." ) def test_apply_trivial_fail(): # GH 20066 @@ -124,14 +124,14 @@ def f(g): (DataFrame({"a": [1, 1, 1, 2, 2, 1, 1, 2], "b": range(8)}), [1, 2]), (DataFrame({"a": [1, 2, 3, 1, 2, 3], "two": [4, 5, 6, 7, 8, 9]}), [1, 2, 3]), ( - DataFrame( - { - "a": list("aaabbbcccc"), - "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], - "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], - } - ), - ["a", "b", "c"], + DataFrame( + { + "a": list("aaabbbcccc"), + "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], + "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], + } + ), + ["a", "b", "c"], ), (DataFrame([[1, 2, 3], [2, 2, 3]], columns=["a", "b", "c"]), [1, 2]), ], @@ -155,6 +155,7 @@ def test_group_apply_once_per_group(df, group_names): # once per group names = [] + # cannot parameterize over the functions since they need external # `names` to detect side effects @@ -683,7 +684,6 @@ def func_with_date(batch): def test_gb_apply_list_of_unequal_len_arrays(): - # GH1738 df = DataFrame( { @@ -897,12 +897,12 @@ def test_apply_index_has_complex_internals(index): (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), ( - lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, - [{0: 0, 1: 1}, {0: 2, 1: 3}], + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], ), ( - lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], - [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], ), ], ) From cfdbcf0198c0a98b01694981a8bc3297acbda4ad Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 12:27:51 -0500 Subject: [PATCH 05/11] TST: indent (#7002) --- pandas/tests/groupby/test_apply.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 835f5d94fd3cc..cc4d2a49eed18 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -124,14 +124,14 @@ def f(g): (DataFrame({"a": [1, 1, 1, 2, 2, 1, 1, 2], "b": range(8)}), [1, 2]), (DataFrame({"a": [1, 2, 3, 1, 2, 3], "two": [4, 5, 6, 7, 8, 9]}), [1, 2, 3]), ( - DataFrame( - { - "a": list("aaabbbcccc"), - "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], - "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], - } - ), - ["a", "b", "c"], + DataFrame( + { + "a": list("aaabbbcccc"), + "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], + "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], + } + ), + ["a", "b", "c"], ), (DataFrame([[1, 2, 3], [2, 2, 3]], columns=["a", "b", "c"]), [1, 2]), ], @@ -897,12 +897,12 @@ def test_apply_index_has_complex_internals(index): (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), ( - lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, - [{0: 0, 1: 1}, {0: 2, 1: 3}], + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], ), ( - lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], - [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], ), ], ) From e8f181cf3ca4566de99d1b190e573ac1f1b84856 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 12:51:59 -0500 Subject: [PATCH 06/11] TST: indent (#7002) --- pandas/tests/groupby/test_apply.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index cc4d2a49eed18..a4a0f863cf6b3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -5,8 +5,8 @@ import pytest import pandas as pd -import pandas._testing as tm from pandas import DataFrame, Index, MultiIndex, Series, bdate_range +import pandas._testing as tm def test_apply_issues(): @@ -65,8 +65,8 @@ def test_apply_trivial(): @pytest.mark.xfail( reason="GH#20066; function passed into apply " - "returns a DataFrame with the same index " - "as the one to create GroupBy object." + "returns a DataFrame with the same index " + "as the one to create GroupBy object." ) def test_apply_trivial_fail(): # GH 20066 @@ -965,9 +965,13 @@ def fn(x): def test_apply_function_with_indexing_return_column(): # GH: 7002 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) - result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) - expected = df.groupby('foo1', as_index=False).mean() + df = DataFrame( + { + "foo1": ["one", "two", "two", "three", "one", "two"], + "foo2": np.random.randn(6), + } + ) + result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) + expected = df.groupby("foo1", as_index=False).mean() tm.assert_frame_equal(result, expected) - assert 'foo1' in result.columns + assert "foo1" in result.columns From 0bd567c5b17d200c6259c54b2d8990d634bb9eee Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 15:53:13 -0500 Subject: [PATCH 07/11] TST: deterministic test case (#7002) --- pandas/tests/groupby/test_apply.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a4a0f863cf6b3..da6e7c6851667 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -968,10 +968,9 @@ def test_apply_function_with_indexing_return_column(): df = DataFrame( { "foo1": ["one", "two", "two", "three", "one", "two"], - "foo2": np.random.randn(6), + "foo2": [1, 2, 3, 4, 5, 6], } ) result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) expected = df.groupby("foo1", as_index=False).mean() tm.assert_frame_equal(result, expected) - assert "foo1" in result.columns From 787d9ba4a183f66cb8445202c07b4afbb8f4444f Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Tue, 9 Jun 2020 00:31:09 -0500 Subject: [PATCH 08/11] TST: explicitly constructed expected DataFrame (#7002) --- pandas/tests/groupby/test_apply.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index da6e7c6851667..49514f0b90cc5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -968,9 +968,14 @@ def test_apply_function_with_indexing_return_column(): df = DataFrame( { "foo1": ["one", "two", "two", "three", "one", "two"], - "foo2": [1, 2, 3, 4, 5, 6], + "foo2": [1, 2, 4, 4, 5, 6], } ) result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) - expected = df.groupby("foo1", as_index=False).mean() + expected = DataFrame( + { + "foo1": ["one", "three", "two"], + "foo2": [3.0, 4.0, 4.0], + } + ) tm.assert_frame_equal(result, expected) From 8139d8ee9c0491515b19f4156affce2ced9772b2 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Tue, 9 Jun 2020 01:02:15 -0500 Subject: [PATCH 09/11] TST: explicitly constructed expected DataFrame (#7002) --- pandas/tests/groupby/test_apply.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 49514f0b90cc5..84875ae4de494 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -972,10 +972,5 @@ def test_apply_function_with_indexing_return_column(): } ) result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) - expected = DataFrame( - { - "foo1": ["one", "three", "two"], - "foo2": [3.0, 4.0, 4.0], - } - ) + expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0],}) tm.assert_frame_equal(result, expected) From c8d5fca1bd86296707f6917403306aa20262c116 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Tue, 9 Jun 2020 01:09:45 -0500 Subject: [PATCH 10/11] TST: explicitly constructed expected DataFrame (#7002) --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 84875ae4de494..7d67ebbe1c1ec 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -972,5 +972,5 @@ def test_apply_function_with_indexing_return_column(): } ) result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) - expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0],}) + expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) tm.assert_frame_equal(result, expected) From 5936715c233f6826cfcabb9214b4e6d13751a672 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Sun, 14 Jun 2020 09:25:05 -0500 Subject: [PATCH 11/11] TST: revert unrelated changes --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 7d67ebbe1c1ec..8468a21904bf8 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -155,7 +155,6 @@ def test_group_apply_once_per_group(df, group_names): # once per group names = [] - # cannot parameterize over the functions since they need external # `names` to detect side effects @@ -684,6 +683,7 @@ def func_with_date(batch): def test_gb_apply_list_of_unequal_len_arrays(): + # GH1738 df = DataFrame( {