diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b12e5be7722d0..d2e4a5b7e75bf 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -286,8 +286,6 @@ def _get_hashtable_algo(values: np.ndarray): def _get_values_for_rank(values: ArrayLike) -> np.ndarray: - if is_categorical_dtype(values): - values = cast("Categorical", values)._values_for_rank() values = _ensure_data(values) if values.dtype.kind in ["i", "u", "f"]: @@ -993,13 +991,13 @@ def rank( na_option: str = "keep", ascending: bool = True, pct: bool = False, -) -> np.ndarray: +) -> npt.NDArray[np.float64]: """ Rank the values along a given axis. Parameters ---------- - values : array-like + values : np.ndarray or ExtensionArray Array whose values will be ranked. The number of dimensions in this array must not exceed 2. axis : int, default 0 diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fc915f5f84d8b..b884ee1e0a395 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -73,6 +73,7 @@ from pandas.core.algorithms import ( factorize_array, isin, + rank, unique, ) from pandas.core.array_algos.quantile import quantile_with_mask @@ -1496,6 +1497,32 @@ def _fill_mask_inplace( self[mask] = new_values[mask] return + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + + # TODO: we only have tests that get here with dt64 and td64 + # TODO: all tests that get here use the defaults for all the kwds + return rank( + self, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + @classmethod def _empty(cls, shape: Shape, dtype: ExtensionDtype): """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0ce7e0fbfb80a..9d59386cda9c3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1842,6 +1842,30 @@ def sort_values( codes = self._codes[sorted_idx] return self._from_backing_data(codes) + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + vff = self._values_for_rank() + return algorithms.rank( + vff, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + def _values_for_rank(self): """ For correctly ranking ordered categorical data. See GH#15420 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc15c846b1907..7190251c0dfd0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8494,19 +8494,32 @@ def rank( raise ValueError(msg) def ranker(data): - ranks = algos.rank( - data.values, - axis=axis, - method=method, - ascending=ascending, - na_option=na_option, - pct=pct, - ) - # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected - # "Union[ArrayManager, BlockManager]" - ranks_obj = self._constructor( - ranks, **data._construct_axes_dict() # type: ignore[arg-type] - ) + if data.ndim == 2: + # i.e. DataFrame, we cast to ndarray + values = data.values + else: + # i.e. Series, can dispatch to EA + values = data._values + + if isinstance(values, ExtensionArray): + ranks = values._rank( + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + ranks = algos.rank( + values, + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + + ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) return ranks_obj.__finalize__(self, method="rank") # if numeric_only is None, and we can't get anything, we try with