From 889e9391cc0178b2ec1ac0c1f77ba55c08cdc3c6 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 14:31:06 +0100 Subject: [PATCH 1/3] BUG: error when unstacking in DataFrameGroupby.apply --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/groupby/groupby.py | 8 +++++++- pandas/tests/groupby/test_apply.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 743bb78c70c36..f9d790dfb5a89 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,6 +295,7 @@ Groupby/resample/rolling or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) +- Bug in :meth:`DataFrameGroupBy.apply` when the supplied callable unstacked the grouped :class:`DataFrame` and returned a :class:`Series`, an attempt was made to set a list as the name of the result ``Series``, causing an error to be raised (:issue:`52444`) - Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b832eb90aa422..7f2eccbf8b2d1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1192,7 +1192,13 @@ def _concat_objects( else: result = concat(values, axis=self.axis) - name = self.obj.name if self.obj.ndim == 1 else self._selection + if self.obj.ndim == 1: + name = self.obj.name + elif is_hashable(self._selection): + name = self._selection + else: + name = None + if isinstance(result, Series) and name is not None: result.name = name diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0699b7c1369f2..6d6492dd12a33 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1203,6 +1203,26 @@ def test_groupby_apply_shape_cache_safety(): tm.assert_frame_equal(result, expected) +def test_groupby_apply_to_series_name(): + # GH52444 + df = pd.DataFrame.from_dict( + { + "a": ["a", "b", "a", "b"], + "b1": ["aa", "ac", "ac", "ad"], + "b2": ["aa", "aa", "aa", "ac"], + } + ) + grp = df.groupby("a")[["b1", "b2"]] + result = grp.apply(lambda x: x.unstack().value_counts()) + + expected_idx = pd.MultiIndex.from_arrays( + arrays=[["a", "a", "b", "b", "b"], ["aa", "ac", "ac", "ad", "aa"]], + names=["a", None], + ) + expected = pd.Series([3, 1, 2, 1, 1], index=expected_idx, name="count") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dropna", [True, False]) def test_apply_na(dropna): # GH#28984 From 020fcc3cdc183027b143a0df2afb99cd12a94812 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 14:44:57 +0100 Subject: [PATCH 2/3] fix pre-commit --- pandas/tests/groupby/test_apply.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 6d6492dd12a33..b56ce326d1962 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1205,7 +1205,7 @@ def test_groupby_apply_shape_cache_safety(): def test_groupby_apply_to_series_name(): # GH52444 - df = pd.DataFrame.from_dict( + df = DataFrame.from_dict( { "a": ["a", "b", "a", "b"], "b1": ["aa", "ac", "ac", "ad"], @@ -1215,11 +1215,11 @@ def test_groupby_apply_to_series_name(): grp = df.groupby("a")[["b1", "b2"]] result = grp.apply(lambda x: x.unstack().value_counts()) - expected_idx = pd.MultiIndex.from_arrays( + expected_idx = MultiIndex.from_arrays( arrays=[["a", "a", "b", "b", "b"], ["aa", "ac", "ac", "ad", "aa"]], names=["a", None], ) - expected = pd.Series([3, 1, 2, 1, 1], index=expected_idx, name="count") + expected = Series([3, 1, 2, 1, 1], index=expected_idx, name="count") tm.assert_series_equal(result, expected) From 0edff6e14921876aaf0e873da54defd4b2f9dbe3 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 6 Apr 2023 08:37:21 +0100 Subject: [PATCH 3/3] adjust whatsnew for comments --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index f9d790dfb5a89..845fa5dfde428 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,7 +295,7 @@ Groupby/resample/rolling or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) -- Bug in :meth:`DataFrameGroupBy.apply` when the supplied callable unstacked the grouped :class:`DataFrame` and returned a :class:`Series`, an attempt was made to set a list as the name of the result ``Series``, causing an error to be raised (:issue:`52444`) +- Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`) - Reshaping