From d55e4f7c0492daccfde3173d272241ab5adf87de Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 4 Dec 2022 21:53:09 -0500 Subject: [PATCH 1/2] DEPR: Enforce groupby.transform aligning with input index --- doc/source/user_guide/groupby.rst | 7 +++--- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/groupby/generic.py | 24 ++++--------------- pandas/core/groupby/groupby.py | 7 +++--- .../tests/groupby/transform/test_transform.py | 24 +++++++------------ 5 files changed, 19 insertions(+), 44 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index d8a36b1711b6e..fb8462f7f58e4 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -776,12 +776,11 @@ as the one being grouped. The transform function must: * (Optionally) operates on the entire group chunk. If this is supported, a fast path is used starting from the *second* chunk. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b70dcb0ae99fa..4f532157b5d82 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -596,6 +596,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) +- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d3e37a40614b3..40d7abae09b3d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,7 +24,6 @@ Union, cast, ) -import warnings import numpy as np @@ -51,7 +50,6 @@ Substitution, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, @@ -1392,33 +1390,15 @@ def _transform_general(self, func, *args, **kwargs): applied.append(res) # Compute and process with the remaining groups - emit_alignment_warning = False for name, group in gen: if group.size == 0: continue object.__setattr__(group, "name", name) res = path(group) - if ( - not emit_alignment_warning - and res.ndim == 2 - and not res.index.equals(group.index) - ): - emit_alignment_warning = True res = _wrap_transform_general_frame(self.obj, group, res) applied.append(res) - if emit_alignment_warning: - # GH#45648 - warnings.warn( - "In a future version of pandas, returning a DataFrame in " - "groupby.transform will align with the input's index. Apply " - "`.to_numpy()` to the result in the transform function to keep " - "the current behavior and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - concat_index = obj.columns if self.axis == 0 else obj.index other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 concatenated = concat(applied, axis=self.axis, verify_integrity=False) @@ -2336,5 +2316,9 @@ def _wrap_transform_general_frame( ) assert isinstance(res_frame, DataFrame) return res_frame + elif isinstance(res, DataFrame): + if not res.index.is_(group.index): + res = res.align(group)[0] + return res else: return res diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 659ca228bdcb0..d0e86f81397fa 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -472,12 +472,11 @@ class providing the base-class of operations. The resulting dtype will reflect the return value of the passed ``func``, see the examples below. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Examples diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8bdbc86d8659c..d0c8b53f13399 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1466,8 +1466,8 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) @pytest.mark.parametrize( "func, series, expected_values", [ - (Series.sort_values, False, [4, 5, 3, 1, 2]), - (lambda x: x.head(1), False, ValueError), + (Series.sort_values, False, [5, 4, 3, 2, 1]), + (lambda x: x.head(1), False, [5.0, np.nan, 3, 2, np.nan]), # SeriesGroupBy already has correct behavior (Series.sort_values, True, [5, 4, 3, 2, 1]), (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), @@ -1475,7 +1475,7 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) ) @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) @pytest.mark.parametrize("keys_in_index", [True, False]) -def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): +def test_transform_aligns(func, series, expected_values, keys, keys_in_index): # GH#45648 - transform should align with the input's index df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}) if "a2" in keys: @@ -1487,19 +1487,11 @@ def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_inde if series: gb = gb["b"] - warn = None if series else FutureWarning - msg = "returning a DataFrame in groupby.transform will align" - if expected_values is ValueError: - with tm.assert_produces_warning(warn, match=msg): - with pytest.raises(ValueError, match="Length mismatch"): - gb.transform(func) - else: - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(func) - expected = DataFrame({"b": expected_values}, index=df.index) - if series: - expected = expected["b"] - tm.assert_equal(result, expected) + result = gb.transform(func) + expected = DataFrame({"b": expected_values}, index=df.index) + if series: + expected = expected["b"] + tm.assert_equal(result, expected) @pytest.mark.parametrize("keys", ["A", ["A", "B"]]) From 15b95ef30365d1a714fcf341fd5644db84612af9 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 4 Dec 2022 23:27:32 -0500 Subject: [PATCH 2/2] Rework logic --- pandas/core/groupby/generic.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 40d7abae09b3d..819220d13566b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2316,9 +2316,7 @@ def _wrap_transform_general_frame( ) assert isinstance(res_frame, DataFrame) return res_frame - elif isinstance(res, DataFrame): - if not res.index.is_(group.index): - res = res.align(group)[0] - return res + elif isinstance(res, DataFrame) and not res.index.is_(group.index): + return res._align_frame(group)[0] else: return res