From ea440647f1ab78db604600825448858324fefffc Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 30 Jan 2020 15:09:47 +0000 Subject: [PATCH 01/13] fix bug which prevented .apply to being called with function which returned list --- doc/source/whatsnew/v1.0.1.rst | 2 +- pandas/_libs/reduction.pyx | 6 ++++-- pandas/tests/groupby/test_apply.py | 7 +++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ff8433c7cafd9..e2a357a0fe231 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Bug fixes ~~~~~~~~~ - +- bug ``Groupby.apply`` was returning ``TypeError`` if called with TypeError if called with `lambda x: x.index.to_list()` (:issue:`31441`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 8571761f77265..9b89ddd816f14 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,4 +1,4 @@ -from copy import copy +from copy import copy, deepcopy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -499,7 +499,9 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if not is_scalar(piece): + if isinstance(piece, list): + piece = deepcopy(piece) + elif not is_scalar(piece): # Need to copy data to avoid appending references if hasattr(piece, "copy"): piece = piece.copy(deep="all") diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index c18ef73203914..f530025fb933a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -827,3 +827,10 @@ def test_apply_index_has_complex_internals(index): df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) result = df.groupby("group").apply(lambda x: x) tm.assert_frame_equal(result, df) + + +def test_apply_function_returns_list(): + df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) + result = df.groupby("groups").apply(lambda x: x.index.to_list()) + expected = pd.Series([[0, 1], [2, 3]], index=pd.Index(["A", "B"], name="groups")) + tm.assert_series_equal(result, expected) From 009ccd1dc9a10002ee1c860b8aad6545ab50a5d4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 30 Jan 2020 16:12:31 +0000 Subject: [PATCH 02/13] :pencil: reword whatsnew --- doc/source/whatsnew/v1.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index e2a357a0fe231..ff32f2496e7f0 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Bug fixes ~~~~~~~~~ -- bug ``Groupby.apply`` was returning ``TypeError`` if called with TypeError if called with `lambda x: x.index.to_list()` (:issue:`31441`) +- bug ``Groupby.apply`` was returning ``TypeError`` if called with function which returned list (:issue:`31441`) Categorical ^^^^^^^^^^^ From 1c341eeaf24fc4180fe92f04def75b2f2f5f6d81 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 31 Jan 2020 11:21:54 +0000 Subject: [PATCH 03/13] change fix according to review --- doc/source/whatsnew/v1.0.1.rst | 2 +- pandas/_libs/reduction.pyx | 6 ++---- pandas/tests/groupby/test_apply.py | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ff32f2496e7f0..7b08e3ee1f004 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Bug fixes ~~~~~~~~~ -- bug ``Groupby.apply`` was returning ``TypeError`` if called with function which returned list (:issue:`31441`) +- Bug in :meth:`GroupBy.apply` was raising ``TypeError`` if called with function which returned list (:issue:`31441`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 9b89ddd816f14..bd2087a20c3e9 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -499,12 +499,10 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if isinstance(piece, list): - piece = deepcopy(piece) - elif not is_scalar(piece): + if not is_scalar(piece): # Need to copy data to avoid appending references if hasattr(piece, "copy"): - piece = piece.copy(deep="all") + piece = deepcopy(piece) else: piece = copy(piece) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index f530025fb933a..82a5c3e07e80f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -830,6 +830,7 @@ def test_apply_index_has_complex_internals(index): def test_apply_function_returns_list(): + # GH 31441 df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) result = df.groupby("groups").apply(lambda x: x.index.to_list()) expected = pd.Series([[0, 1], [2, 3]], index=pd.Index(["A", "B"], name="groups")) From 4ad59d29ad9b027556cfb798ead4901a8e1c5dc4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 31 Jan 2020 14:01:29 +0000 Subject: [PATCH 04/13] Revert patch to original --- pandas/_libs/reduction.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index bd2087a20c3e9..9b89ddd816f14 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -499,10 +499,12 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if not is_scalar(piece): + if isinstance(piece, list): + piece = deepcopy(piece) + elif not is_scalar(piece): # Need to copy data to avoid appending references if hasattr(piece, "copy"): - piece = deepcopy(piece) + piece = piece.copy(deep="all") else: piece = copy(piece) From a198cc8f4733e9ac41198b2017eae629ee875c06 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 31 Jan 2020 21:38:59 +0000 Subject: [PATCH 05/13] special case series rather than list --- pandas/_libs/reduction.pyx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 9b89ddd816f14..dbd241baea7f8 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,4 +1,4 @@ -from copy import copy, deepcopy +from copy import copy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -499,11 +499,8 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if isinstance(piece, list): - piece = deepcopy(piece) - elif not is_scalar(piece): - # Need to copy data to avoid appending references - if hasattr(piece, "copy"): + if not is_scalar(piece): + if hasattr(piece, "_typ") and piece._typ == "series": piece = piece.copy(deep="all") else: piece = copy(piece) From 45235f1042b5715160afe54c379bac69a3f072b0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 31 Jan 2020 22:10:15 +0000 Subject: [PATCH 06/13] special case dataframe as well --- pandas/_libs/reduction.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index dbd241baea7f8..ac7bf39b8a170 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -16,7 +16,7 @@ from numpy cimport (ndarray, cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.lib import maybe_convert_objects, is_scalar +from pandas._libs.lib import maybe_convert_objects cdef _check_result_array(object obj, Py_ssize_t cnt): @@ -499,11 +499,13 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if not is_scalar(piece): - if hasattr(piece, "_typ") and piece._typ == "series": - piece = piece.copy(deep="all") - else: - piece = copy(piece) + if hasattr(piece, "_typ") and piece._typ in [ + "series", + "dataframe", + ]: + piece = piece.copy(deep="all") + else: + piece = copy(piece) results.append(piece) From b07ebbcbc0f9aaf40c7490b042f41b98c125f743 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 31 Jan 2020 22:12:36 +0000 Subject: [PATCH 07/13] keep is_scalar in --- pandas/_libs/reduction.pyx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index ac7bf39b8a170..923e5c9bbaa55 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -16,7 +16,7 @@ from numpy cimport (ndarray, cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.lib import maybe_convert_objects +from pandas._libs.lib import maybe_convert_objects, is_scalar cdef _check_result_array(object obj, Py_ssize_t cnt): @@ -499,13 +499,14 @@ def apply_frame_axis0(object frame, object f, object names, # `piece` might not have an index, could be e.g. an int pass - if hasattr(piece, "_typ") and piece._typ in [ - "series", - "dataframe", - ]: - piece = piece.copy(deep="all") - else: - piece = copy(piece) + if not is_scalar(piece): + if hasattr(piece, "_typ") and piece._typ in [ + "series", + "dataframe", + ]: + piece = piece.copy(deep="all") + else: + piece = copy(piece) results.append(piece) From cda4135449b99d3154d43cae46b904641f60fe94 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 08:46:05 +0000 Subject: [PATCH 08/13] use try-except --- doc/source/whatsnew/v1.0.1.rst | 2 +- pandas/_libs/reduction.pyx | 11 ++++------- pandas/tests/groupby/test_apply.py | 17 ++++++++++++++--- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 7b08e3ee1f004..ce0634c879b93 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Bug fixes ~~~~~~~~~ -- Bug in :meth:`GroupBy.apply` was raising ``TypeError`` if called with function which returned list (:issue:`31441`) +- Bug in :meth:`GroupBy.apply` was raising ``TypeError`` if called with function which returned a non-pandas non-scalar object (e.g. a list) (:issue:`31441`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 923e5c9bbaa55..317f617a26c9c 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,4 +1,4 @@ -from copy import copy +from copy import deepcopy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -500,13 +500,10 @@ def apply_frame_axis0(object frame, object f, object names, pass if not is_scalar(piece): - if hasattr(piece, "_typ") and piece._typ in [ - "series", - "dataframe", - ]: + try: piece = piece.copy(deep="all") - else: - piece = copy(piece) + except (TypeError, AttributeError): + piece = deepcopy(piece) results.append(piece) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 82a5c3e07e80f..fcc273cf3cfa0 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -829,9 +829,20 @@ def test_apply_index_has_complex_internals(index): tm.assert_frame_equal(result, df) -def test_apply_function_returns_list(): +@pytest.mark.parametrize( + "function, expected_values", + [ + (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), + (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + ( + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + ), + ], +) +def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): # GH 31441 df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) - result = df.groupby("groups").apply(lambda x: x.index.to_list()) - expected = pd.Series([[0, 1], [2, 3]], index=pd.Index(["A", "B"], name="groups")) + result = df.groupby("groups").apply(function) + expected = pd.Series(expected_values, index=pd.Index(["A", "B"], name="groups")) tm.assert_series_equal(result, expected) From ba735f8d5983d5f314b27859844033eb92b74b0e Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 08:56:29 +0000 Subject: [PATCH 09/13] Add dict test case --- pandas/tests/groupby/test_apply.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index fcc273cf3cfa0..26ee35d399602 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -834,6 +834,10 @@ def test_apply_index_has_complex_internals(index): [ (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + ( + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], + ), ( lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], From eaf824e540603f4230b4400af3ae7c0402a9ff52 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 08:57:58 +0000 Subject: [PATCH 10/13] add tuple --- pandas/tests/groupby/test_apply.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 26ee35d399602..9c2b045079622 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -834,6 +834,7 @@ def test_apply_index_has_complex_internals(index): [ (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), ( lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, [{0: 0, 1: 1}, {0: 2, 1: 3}], From ced6f3eb8cd4a0cabd4c5f7c50be6fd2e01bcb53 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 08:59:01 +0000 Subject: [PATCH 11/13] reinsert comment --- pandas/_libs/reduction.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 317f617a26c9c..c2ecb30777b89 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -500,6 +500,7 @@ def apply_frame_axis0(object frame, object f, object names, pass if not is_scalar(piece): + # Need to copy data to avoid appending references try: piece = piece.copy(deep="all") except (TypeError, AttributeError): From f92e1ffbff2946dff61ebaa89e06afb94fcaed66 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 08:59:19 +0000 Subject: [PATCH 12/13] reinsert comment --- pandas/_libs/reduction.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index c2ecb30777b89..44f4e56589284 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -500,7 +500,7 @@ def apply_frame_axis0(object frame, object f, object names, pass if not is_scalar(piece): - # Need to copy data to avoid appending references + # Need to copy data to avoid appending references try: piece = piece.copy(deep="all") except (TypeError, AttributeError): From 7b3efa2569f91e48adfdf4f78d2a089f2d44e4c0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 1 Feb 2020 10:01:06 +0000 Subject: [PATCH 13/13] use copy instead of deepcopy --- pandas/_libs/reduction.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 44f4e56589284..89164c527002a 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,4 +1,4 @@ -from copy import deepcopy +from copy import copy from distutils.version import LooseVersion from cython import Py_ssize_t @@ -504,7 +504,7 @@ def apply_frame_axis0(object frame, object f, object names, try: piece = piece.copy(deep="all") except (TypeError, AttributeError): - piece = deepcopy(piece) + piece = copy(piece) results.append(piece)