From 9ea4f43a6d2a38533543b338703433a15d3aaf08 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Dec 2021 15:58:38 -0800 Subject: [PATCH 1/3] REF: dispatch Series.rank to EA --- pandas/core/algorithms.py | 10 ++++------ pandas/core/arrays/base.py | 27 +++++++++++++++++++++++++ pandas/core/arrays/categorical.py | 24 ++++++++++++++++++++++ pandas/core/generic.py | 33 +++++++++++++++++++++++-------- 4 files changed, 80 insertions(+), 14 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b12e5be7722d0..1afda20ba865b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -285,9 +285,7 @@ def _get_hashtable_algo(values: np.ndarray): return htable, values -def _get_values_for_rank(values: ArrayLike) -> np.ndarray: - if is_categorical_dtype(values): - values = cast("Categorical", values)._values_for_rank() +def _get_values_for_rank(values: ArrayLike, special: bool = True) -> np.ndarray: values = _ensure_data(values) if values.dtype.kind in ["i", "u", "f"]: @@ -298,7 +296,7 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray: def get_data_algo(values: ArrayLike): - values = _get_values_for_rank(values) + values = _get_values_for_rank(values, False) ndtype = _check_object_for_strings(values) htable = _hashtables.get(ndtype, _hashtables["object"]) @@ -993,13 +991,13 @@ def rank( na_option: str = "keep", ascending: bool = True, pct: bool = False, -) -> np.ndarray: +) -> npt.NDArray[np.float64]: """ Rank the values along a given axis. Parameters ---------- - values : array-like + values : np.ndarray or ExtensionArray Array whose values will be ranked. The number of dimensions in this array must not exceed 2. axis : int, default 0 diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fc915f5f84d8b..b884ee1e0a395 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -73,6 +73,7 @@ from pandas.core.algorithms import ( factorize_array, isin, + rank, unique, ) from pandas.core.array_algos.quantile import quantile_with_mask @@ -1496,6 +1497,32 @@ def _fill_mask_inplace( self[mask] = new_values[mask] return + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + + # TODO: we only have tests that get here with dt64 and td64 + # TODO: all tests that get here use the defaults for all the kwds + return rank( + self, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + @classmethod def _empty(cls, shape: Shape, dtype: ExtensionDtype): """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0ce7e0fbfb80a..9d59386cda9c3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1842,6 +1842,30 @@ def sort_values( codes = self._codes[sorted_idx] return self._from_backing_data(codes) + def _rank( + self, + *, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, + ): + """ + See Series.rank.__doc__. + """ + if axis != 0: + raise NotImplementedError + vff = self._values_for_rank() + return algorithms.rank( + vff, + axis=axis, + method=method, + na_option=na_option, + ascending=ascending, + pct=pct, + ) + def _values_for_rank(self): """ For correctly ranking ordered categorical data. See GH#15420 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc15c846b1907..163eec8bb8868 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8494,14 +8494,31 @@ def rank( raise ValueError(msg) def ranker(data): - ranks = algos.rank( - data.values, - axis=axis, - method=method, - ascending=ascending, - na_option=na_option, - pct=pct, - ) + if data.ndim == 2: + # i.e. DataFrame, we cast to ndarray + values = data.values + else: + # i.e. Series, can dispatch to EA + values = data._values + + if isinstance(values, ExtensionArray): + ranks = values._rank( + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + ranks = algos.rank( + values, + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected # "Union[ArrayManager, BlockManager]" ranks_obj = self._constructor( From 22cca38c669c4ccfa506a155bb5ff882fa25b516 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Dec 2021 15:59:49 -0800 Subject: [PATCH 2/3] remove special arg --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1afda20ba865b..d2e4a5b7e75bf 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -285,7 +285,7 @@ def _get_hashtable_algo(values: np.ndarray): return htable, values -def _get_values_for_rank(values: ArrayLike, special: bool = True) -> np.ndarray: +def _get_values_for_rank(values: ArrayLike) -> np.ndarray: values = _ensure_data(values) if values.dtype.kind in ["i", "u", "f"]: @@ -296,7 +296,7 @@ def _get_values_for_rank(values: ArrayLike, special: bool = True) -> np.ndarray: def get_data_algo(values: ArrayLike): - values = _get_values_for_rank(values, False) + values = _get_values_for_rank(values) ndtype = _check_object_for_strings(values) htable = _hashtables.get(ndtype, _hashtables["object"]) From bd3a3a1f2547d2ee5ff7b25dbeb34307cdfbb82d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Dec 2021 18:10:14 -0800 Subject: [PATCH 3/3] mypy fixup --- pandas/core/generic.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 163eec8bb8868..7190251c0dfd0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8519,11 +8519,7 @@ def ranker(data): pct=pct, ) - # error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected - # "Union[ArrayManager, BlockManager]" - ranks_obj = self._constructor( - ranks, **data._construct_axes_dict() # type: ignore[arg-type] - ) + ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) return ranks_obj.__finalize__(self, method="rank") # if numeric_only is None, and we can't get anything, we try with