From ea67d8b9837c4c974ff8f0b08ecacd02881b52b1 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 3 Nov 2022 20:45:23 -0400 Subject: [PATCH 1/2] remove CategoricalIndex.reindex in favor of base class --- pandas/core/indexes/category.py | 74 ---------------------- pandas/tests/frame/methods/test_reindex.py | 9 ++- 2 files changed, 4 insertions(+), 79 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f06d118538c1a..1ee624a52c063 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -11,7 +11,6 @@ from pandas._typing import ( Dtype, DtypeObj, - npt, ) from pandas.util._decorators import ( cache_readonly, @@ -365,79 +364,6 @@ def __contains__(self, key: Any) -> bool: return contains(self, key, container=self._engine) - # TODO(2.0): remove reindex once non-unique deprecation is enforced - def reindex( - self, target, method=None, level=None, limit=None, tolerance=None - ) -> tuple[Index, npt.NDArray[np.intp] | None]: - """ - Create index with target's values (move/add/delete values as necessary) - - Returns - ------- - new_index : pd.Index - Resulting index - indexer : np.ndarray[np.intp] or None - Indices of output values in original index - - """ - if method is not None: - raise NotImplementedError( - "argument method is not implemented for CategoricalIndex.reindex" - ) - if level is not None: - raise NotImplementedError( - "argument level is not implemented for CategoricalIndex.reindex" - ) - if limit is not None: - raise NotImplementedError( - "argument limit is not implemented for CategoricalIndex.reindex" - ) - - target = ibase.ensure_index(target) - - if self.equals(target): - indexer = None - missing = np.array([], dtype=np.intp) - else: - indexer, missing = self.get_indexer_non_unique(target) - if not self.is_unique: - # GH#42568 - raise ValueError("cannot reindex on an axis with duplicate labels") - - new_target: Index - if len(self) and indexer is not None: - new_target = self.take(indexer) - else: - new_target = target - - # filling in missing if needed - if len(missing): - cats = self.categories.get_indexer(target) - - if not isinstance(target, CategoricalIndex) or (cats == -1).any(): - new_target, indexer, _ = super()._reindex_non_unique(target) - else: - # error: "Index" has no attribute "codes" - codes = new_target.codes.copy() # type: ignore[attr-defined] - codes[indexer == -1] = cats[missing] - cat = self._data._from_backing_data(codes) - new_target = type(self)._simple_new(cat, name=self.name) - - # we always want to return an Index type here - # to be consistent with .reindex for other index types (e.g. they don't - # coerce based on the actual values, only on the dtype) - # unless we had an initial Categorical to begin with - # in which case we are going to conform to the passed Categorical - if is_categorical_dtype(target): - cat = Categorical(new_target, dtype=target.dtype) - new_target = type(self)._simple_new(cat, name=self.name) - else: - # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target - new_target_array = np.asarray(new_target) - new_target = Index._with_infer(new_target_array, name=self.name) - - return new_target, indexer - # -------------------------------------------------------------------- # Indexing Methods diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index a132519970721..9dc7eb08eeb6b 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1097,12 +1097,11 @@ def test_reindex_with_categoricalindex(self): df2.reindex(["a", "b"]) # args NotImplemented ATM - msg = r"argument {} is not implemented for CategoricalIndex\.reindex" - with pytest.raises(NotImplementedError, match=msg.format("method")): + msg = "method pad not yet implemented for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): df.reindex(["a"], method="ffill") - with pytest.raises(NotImplementedError, match=msg.format("level")): - df.reindex(["a"], level=1) - with pytest.raises(NotImplementedError, match=msg.format("limit")): + msg = "limit argument only valid if doing pad, backfill or nearest reindexing" + with pytest.raises(ValueError, match=msg): df.reindex(["a"], limit=2) def test_reindex_signature(self): From bee6f9c12438e905f3aabb4520824d5423367227 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Fri, 4 Nov 2022 18:45:50 -0400 Subject: [PATCH 2/2] retain checks --- pandas/core/indexes/category.py | 29 ++++++++++++++++++++++ pandas/tests/frame/methods/test_reindex.py | 9 ++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 1ee624a52c063..287f94fb6b723 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -11,6 +11,7 @@ from pandas._typing import ( Dtype, DtypeObj, + npt, ) from pandas.util._decorators import ( cache_readonly, @@ -364,6 +365,34 @@ def __contains__(self, key: Any) -> bool: return contains(self, key, container=self._engine) + def reindex( + self, target, method=None, level=None, limit=None, tolerance=None + ) -> tuple[Index, npt.NDArray[np.intp] | None]: + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray[np.intp] or None + Indices of output values in original index + + """ + if method is not None: + raise NotImplementedError( + "argument method is not implemented for CategoricalIndex.reindex" + ) + if level is not None: + raise NotImplementedError( + "argument level is not implemented for CategoricalIndex.reindex" + ) + if limit is not None: + raise NotImplementedError( + "argument limit is not implemented for CategoricalIndex.reindex" + ) + return super().reindex(target) + # -------------------------------------------------------------------- # Indexing Methods diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 9dc7eb08eeb6b..a132519970721 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1097,11 +1097,12 @@ def test_reindex_with_categoricalindex(self): df2.reindex(["a", "b"]) # args NotImplemented ATM - msg = "method pad not yet implemented for CategoricalIndex" - with pytest.raises(NotImplementedError, match=msg): + msg = r"argument {} is not implemented for CategoricalIndex\.reindex" + with pytest.raises(NotImplementedError, match=msg.format("method")): df.reindex(["a"], method="ffill") - msg = "limit argument only valid if doing pad, backfill or nearest reindexing" - with pytest.raises(ValueError, match=msg): + with pytest.raises(NotImplementedError, match=msg.format("level")): + df.reindex(["a"], level=1) + with pytest.raises(NotImplementedError, match=msg.format("limit")): df.reindex(["a"], limit=2) def test_reindex_signature(self):