diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst index 7cda067fb24ad..36c2ec53d58b4 100644 --- a/doc/source/user_guide/duplicates.rst +++ b/doc/source/user_guide/duplicates.rst @@ -28,6 +28,7 @@ duplicates present. The output can't be determined, and so pandas raises. .. ipython:: python :okexcept: + :okwarning: s1 = pd.Series([0, 1, 2], index=["a", "b", "b"]) s1.reindex(["a", "b", "c"]) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fa9c424351b00..73e31d9e52fa7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -162,6 +162,7 @@ Deprecations - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`) - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`) - Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`) +- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87c50e94deb34..0d27114161916 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3915,6 +3915,15 @@ def reindex( ) indexer, _ = self.get_indexer_non_unique(target) + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and " + "will raise in a future version", + FutureWarning, + stacklevel=2, + ) + target = self._wrap_reindex_result(target, indexer, preserve_names) return target, indexer diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2faf2cab75117..bce49f2e923e9 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -426,6 +426,14 @@ def reindex( missing = np.array([], dtype=np.intp) else: indexer, missing = self.get_indexer_non_unique(target) + if not self.is_unique: + # GH#42568 + warnings.warn( + "reindexing with a non-unique Index is deprecated and will " + "raise in a future version", + FutureWarning, + stacklevel=2, + ) if len(self) and indexer is not None: new_target = self.take(indexer) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 71e8f84b4ad01..1b350b11b47e9 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -301,7 +301,8 @@ def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_col df = df_dup_cols msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df[df.A > 6] + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df[df.A > 6] def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): # boolean indexing diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 1e23f7d61efde..821aed535297b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -70,7 +70,8 @@ def test_setitem_error_msmgs(self): ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df["newcol"] = ser + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 6b73c6a662da7..c6b19547904ec 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -674,7 +674,8 @@ def test_reindex_dups(self): # reindex fails msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df.reindex(index=list(range(len(df)))) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(index=list(range(len(df)))) def test_reindex_with_duplicate_columns(self): @@ -684,9 +685,11 @@ def test_reindex_with_duplicate_columns(self): ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar"]) with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar", "foo"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar", "foo"]) def test_reindex_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 @@ -958,7 +961,8 @@ def test_reindex_with_categoricalindex(self): # passed duplicate indexers are not allowed msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df2.reindex(["a", "b"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df2.reindex(["a", "b"]) # args NotImplemented ATM msg = r"argument {} is not implemented for CategoricalIndex\.reindex" diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index 03053b66ceaaa..2e8ad0fbd60cc 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -13,26 +13,32 @@ class TestReindex: def test_reindex_dtype(self): - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(["a", "c"]) + # GH#11586 + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(Categorical(["a", "c"])) + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(["a", "c"]) + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) exp = Index(["a", "a", "c"], dtype="object") tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(Categorical(["a", "c"])) + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 340b546125d8d..8136169aa26f6 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -110,7 +110,8 @@ def test_reindex_non_unique(): msg = "cannot handle a non-unique multi-index!" with pytest.raises(ValueError, match=msg): - a.reindex(new_idx) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + a.reindex(new_idx) @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 318289a51f781..be99eb0bf0a69 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -694,7 +694,8 @@ def test_asfreq_non_unique(): msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - ts.asfreq("B") + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + ts.asfreq("B") def test_resample_axis1():