diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 320912ec38890..7520b14127c28 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -885,6 +885,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) - Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`) - Bug in :meth:`DataFrameGroupBy.min` and :meth:`DataFrameGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising ``ValueError`` (:issue:41111`) +- Bug in :meth:`DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7a8b41fbdf141..9bc9895f3798f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2662,14 +2662,23 @@ def rank( if na_option not in {"keep", "top", "bottom"}: msg = "na_option must be one of 'keep', 'top', or 'bottom'" raise ValueError(msg) + + kwargs = { + "ties_method": method, + "ascending": ascending, + "na_option": na_option, + "pct": pct, + } + if axis != 0: + # DataFrame uses different keyword name + kwargs["method"] = kwargs.pop("ties_method") + return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs)) + return self._cython_transform( "rank", numeric_only=False, - ties_method=method, - ascending=ascending, - na_option=na_option, - pct=pct, axis=axis, + **kwargs, ) @final diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ffd6209cb83fb..ae46d1b024cc2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -44,6 +44,7 @@ ensure_float64, ensure_int64, ensure_platform_int, + is_1d_only_ea_obj, is_bool_dtype, is_categorical_dtype, is_complex_dtype, @@ -600,9 +601,11 @@ def cython_operation( if values.ndim > 2: raise NotImplementedError("number of dimensions is currently limited to 2") elif values.ndim == 2: + assert axis == 1, axis + elif not is_1d_only_ea_obj(values): # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D - assert axis == 1, axis + assert axis == 0 dtype = values.dtype is_numeric = is_numeric_dtype(dtype) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index e07c5f404a02a..20edf03c5b96c 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -600,3 +600,18 @@ def test_rank_multiindex(): ) tm.assert_frame_equal(result, expected) + + +def test_groupby_axis0_rank_axis1(): + # GH#41320 + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + res = gb.rank(axis=1) + + # This should match what we get when "manually" operating group-by-group + expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0) + tm.assert_frame_equal(res, expected)