From f5d50c3c05aecad0f4b2fb3e643efd15d6fdcc04 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 10 Apr 2021 16:37:35 -0700 Subject: [PATCH] TYP: Index.reindex --- pandas/core/indexes/base.py | 19 ++++++++++++------- pandas/core/indexes/category.py | 27 +++++++++++++++++++-------- pandas/core/indexes/multi.py | 6 ++++-- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 705a279638097..c79518702169a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3762,7 +3762,9 @@ def _validate_can_reindex(self, indexer: np.ndarray) -> None: if not self._index_as_unique and len(indexer): raise ValueError("cannot reindex from a duplicate axis") - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, np.ndarray | None]: """ Create index with target's values. @@ -3774,7 +3776,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ------- new_index : pd.Index Resulting index. - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] or None Indices of output values in original index. """ # GH6552: preserve names when reindexing to non-named target @@ -3815,7 +3817,9 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return target, indexer - def _reindex_non_unique(self, target): + def _reindex_non_unique( + self, target: Index + ) -> tuple[Index, np.ndarray, np.ndarray | None]: """ Create a new index with target's values (move/add/delete values as necessary) use with non-unique Index and a possibly non-unique target. @@ -3828,8 +3832,9 @@ def _reindex_non_unique(self, target): ------- new_index : pd.Index Resulting index. - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] Indices of output values in original index. + new_indexer : np.ndarray[np.intp] or None """ target = ensure_index(target) @@ -3858,13 +3863,13 @@ def _reindex_non_unique(self, target): # GH#38906 if not len(self): - new_indexer = np.arange(0) + new_indexer = np.arange(0, dtype=np.intp) # a unique indexer elif target.is_unique: # see GH5553, make sure we use the right indexer - new_indexer = np.arange(len(indexer)) + new_indexer = np.arange(len(indexer), dtype=np.intp) new_indexer[cur_indexer] = np.arange(len(cur_labels)) new_indexer[missing_indexer] = -1 @@ -3876,7 +3881,7 @@ def _reindex_non_unique(self, target): indexer[~check] = -1 # reset the new indexer to account for the new size - new_indexer = np.arange(len(self.take(indexer))) + new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) new_indexer[~check] = -1 if isinstance(self, ABCMultiIndex): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b5089621313b8..724caebd69c23 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -395,7 +395,9 @@ def unique(self, level=None): # of result, not self. return type(self)._simple_new(result, name=self.name) - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, np.ndarray | None]: """ Create index with target's values (move/add/delete values as necessary) @@ -403,7 +405,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ------- new_index : pd.Index Resulting index - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] or None Indices of output values in original index """ @@ -440,7 +442,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): if not isinstance(cats, CategoricalIndex) or (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) - new_target, indexer, _ = result._reindex_non_unique(np.array(target)) + new_target, indexer, _ = result._reindex_non_unique(target) else: codes = new_target.codes.copy() @@ -462,25 +464,34 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return new_target, indexer - def _reindex_non_unique(self, target): + # error: Return type "Tuple[Index, Optional[ndarray], Optional[ndarray]]" + # of "_reindex_non_unique" incompatible with return type + # "Tuple[Index, ndarray, Optional[ndarray]]" in supertype "Index" + def _reindex_non_unique( # type: ignore[override] + self, target: Index + ) -> tuple[Index, np.ndarray | None, np.ndarray | None]: """ reindex from a non-unique; which CategoricalIndex's are almost always """ + # TODO: rule out `indexer is None` here to make the signature + # match the parent class's signature. This should be equivalent + # to ruling out `self.equals(target)` new_target, indexer = self.reindex(target) new_indexer = None check = indexer == -1 - if check.any(): - new_indexer = np.arange(len(self.take(indexer))) + # error: Item "bool" of "Union[Any, bool]" has no attribute "any" + if check.any(): # type: ignore[union-attr] + new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) new_indexer[check] = -1 cats = self.categories.get_indexer(target) if not (cats == -1).any(): # .reindex returns normal Index. Revert to CategoricalIndex if # all targets are included in my categories - new_target = Categorical(new_target, dtype=self.dtype) - new_target = type(self)._simple_new(new_target, name=self.name) + cat = Categorical(new_target, dtype=self.dtype) + new_target = type(self)._simple_new(cat, name=self.name) return new_target, indexer, new_indexer diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3305610a4022e..5b4f3e1bb9e09 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2503,7 +2503,9 @@ def sortlevel( return new_index, indexer - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[MultiIndex, np.ndarray | None]: """ Create index with target's values (move/add/delete values as necessary) @@ -2511,7 +2513,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): ------- new_index : pd.MultiIndex Resulting index - indexer : np.ndarray or None + indexer : np.ndarray[np.intp] or None Indices of output values in original index. """