From 846aea9a0e452f0e2e992185e84de1aed5eebc85 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 16 Jul 2021 09:18:49 -0700 Subject: [PATCH 1/3] DEPR: Index.reindex with duplicate index --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/indexes/base.py | 8 +++++++ pandas/core/indexes/category.py | 7 ++++++ pandas/tests/frame/indexing/test_getitem.py | 3 ++- pandas/tests/frame/indexing/test_setitem.py | 3 ++- pandas/tests/frame/methods/test_reindex.py | 12 ++++++---- .../tests/indexes/categorical/test_reindex.py | 22 ++++++++++++------- pandas/tests/indexes/multi/test_reindex.py | 3 ++- pandas/tests/resample/test_datetime_index.py | 3 ++- 9 files changed, 47 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 68f1c78688b1d..fc87a42660ad6 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -155,6 +155,8 @@ Deprecations - Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`) - Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`) - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`) +- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`??`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5866644860831..ddfd64b669d0d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3883,6 +3883,14 @@ def reindex( if self.equals(target): indexer = None else: + if not self.is_unique: + warnings.warn( + "reindexing with a non-unique Index is deprecated and " + "will raise in a future version", + FutureWarning, + stacklevel=2, + ) + if self._index_as_unique: indexer = self.get_indexer( target, method=method, limit=limit, tolerance=tolerance diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7339c82cbcc77..ac5d5a69671dc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -401,6 +401,13 @@ def reindex( missing = np.array([], dtype=np.intp) else: indexer, missing = self.get_indexer_non_unique(target) + if not self.is_unique: + warnings.warn( + "reindexing with a non-unique Index is deprecated and will " + "raise in a future version", + FutureWarning, + stacklevel=2, + ) if len(self) and indexer is not None: new_target = self.take(indexer) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 71e8f84b4ad01..1b350b11b47e9 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -301,7 +301,8 @@ def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_col df = df_dup_cols msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df[df.A > 6] + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df[df.A > 6] def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): # boolean indexing diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 25682330fe19a..a8a6f0caecd28 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -70,7 +70,8 @@ def test_setitem_error_msmgs(self): ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df["newcol"] = ser + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index d0765084adfa9..accab7aaf02bf 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -660,7 +660,8 @@ def test_reindex_dups(self): # reindex fails msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df.reindex(index=list(range(len(df)))) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(index=list(range(len(df)))) def test_reindex_with_duplicate_columns(self): @@ -670,9 +671,11 @@ def test_reindex_with_duplicate_columns(self): ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar"]) with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar", "foo"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df.reindex(columns=["bar", "foo"]) def test_reindex_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 @@ -944,7 +947,8 @@ def test_reindex_with_categoricalindex(self): # passed duplicate indexers are not allowed msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - df2.reindex(["a", "b"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + df2.reindex(["a", "b"]) # args NotImplemented ATM msg = r"argument {} is not implemented for CategoricalIndex\.reindex" diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index 03053b66ceaaa..2e8ad0fbd60cc 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -13,26 +13,32 @@ class TestReindex: def test_reindex_dtype(self): - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(["a", "c"]) + # GH#11586 + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(Categorical(["a", "c"])) + ci = CategoricalIndex(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(["a", "c"]) + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(["a", "c"]) exp = Index(["a", "a", "c"], dtype="object") tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(Categorical(["a", "c"])) + ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + res, indexer = ci.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 340b546125d8d..8136169aa26f6 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -110,7 +110,8 @@ def test_reindex_non_unique(): msg = "cannot handle a non-unique multi-index!" with pytest.raises(ValueError, match=msg): - a.reindex(new_idx) + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + a.reindex(new_idx) @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 318289a51f781..be99eb0bf0a69 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -694,7 +694,8 @@ def test_asfreq_non_unique(): msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - ts.asfreq("B") + with tm.assert_produces_warning(FutureWarning, match="non-unique"): + ts.asfreq("B") def test_resample_axis1(): From fb0ebe00e155e5aa6e389bee12824ee84cfc16f1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 16 Jul 2021 09:21:16 -0700 Subject: [PATCH 2/3] GH refs --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/indexes/base.py | 1 + pandas/core/indexes/category.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fc87a42660ad6..4cd6a332d6d51 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -155,7 +155,7 @@ Deprecations - Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`) - Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`) - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`) -- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`??`) +- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ddfd64b669d0d..f74ccebf5646a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3884,6 +3884,7 @@ def reindex( indexer = None else: if not self.is_unique: + # GH#42568 warnings.warn( "reindexing with a non-unique Index is deprecated and " "will raise in a future version", diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac5d5a69671dc..816945acca0f7 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -402,6 +402,7 @@ def reindex( else: indexer, missing = self.get_indexer_non_unique(target) if not self.is_unique: + # GH#42568 warnings.warn( "reindexing with a non-unique Index is deprecated and will " "raise in a future version", From a9d3d619d898408cd9dcbd0f1ffb30ed25fb80fb Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Aug 2021 08:38:28 -0700 Subject: [PATCH 3/3] catch warning in docs --- doc/source/user_guide/duplicates.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst index 7cda067fb24ad..36c2ec53d58b4 100644 --- a/doc/source/user_guide/duplicates.rst +++ b/doc/source/user_guide/duplicates.rst @@ -28,6 +28,7 @@ duplicates present. The output can't be determined, and so pandas raises. .. ipython:: python :okexcept: + :okwarning: s1 = pd.Series([0, 1, 2], index=["a", "b", "b"]) s1.reindex(["a", "b", "c"])