From 3689faf52646947b1c77bee7bed6277ed19f9afa Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Sat, 14 Nov 2020 17:42:02 +0800 Subject: [PATCH 01/10] Update multi.py --- pandas/core/indexes/multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 95f14bb643744..9c24ddfb25cd8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2105,7 +2105,7 @@ def drop(self, codes, level=None, errors="raise"): Parameters ---------- codes : array-like - Must be a list of tuples + Must be a list of tuples when level is not specified level : int or level name, default None errors : str, default 'raise' @@ -2161,9 +2161,10 @@ def _drop_from_level(self, codes, level, errors="raise"): nan_codes = isna(codes) values[(np.equal(nan_codes, False)) & (values == -1)] = -2 + not_found = np.array(codes)[np.array(values) == -1].tolist() + if len(not_found) != 0 and errors != "ignore": + raise KeyError(f"labels {not_found} not found in level") mask = ~algos.isin(self.codes[i], values) - if mask.all() and errors != "ignore": - raise KeyError(f"labels {codes} not found in level") return self[mask] From 0a4e73af1f90d1721d7d01335c5b35bf0b1564f1 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Sat, 14 Nov 2020 17:45:03 +0800 Subject: [PATCH 02/10] Update test_drop.py --- pandas/tests/indexes/multi/test_drop.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 06019ed0a8b14..8a2ed93f40d5e 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -147,3 +147,14 @@ def test_drop_with_nan_in_index(nulls_fixture): msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" with pytest.raises(KeyError, match=msg): mi.drop(pd.Timestamp("2001"), level="date") + + +def test_single_level_drop(): + # GH 37820 + + mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) + msg = ("labels \[4\] not found in level") + with pytest.raises(KeyError, match=msg): + mi.drop(4, level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([1, 4], level=0) From bea3d3c77b5bada6f3d3fcc2e9b50ccb0e227e82 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Sat, 14 Nov 2020 17:48:29 +0800 Subject: [PATCH 03/10] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 454098f4ace04..58d47f720de4e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -610,6 +610,7 @@ Indexing - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) +- Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`) Missing ^^^^^^^ From ddee8cdd280209da4d268ee4287c7f773201255f Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Sat, 14 Nov 2020 17:52:25 +0800 Subject: [PATCH 04/10] Update test_drop.py --- pandas/tests/indexes/multi/test_drop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 8a2ed93f40d5e..d121d4f662c57 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -153,7 +153,7 @@ def test_single_level_drop(): # GH 37820 mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) - msg = ("labels \[4\] not found in level") + msg = r"labels \[4\] not found in level" with pytest.raises(KeyError, match=msg): mi.drop(4, level=0) with pytest.raises(KeyError, match=msg): From d51ac6c5374c0186ab6023ebf1b07e8206fdc078 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 19 Nov 2020 11:33:13 +0800 Subject: [PATCH 05/10] rebase fix --- pandas/core/indexes/multi.py | 4 +++- pandas/tests/indexes/multi/test_drop.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9c24ddfb25cd8..1468c3aa91376 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2160,8 +2160,10 @@ def _drop_from_level(self, codes, level, errors="raise"): # are not nan and equal -1, this means they are missing in the index nan_codes = isna(codes) values[(np.equal(nan_codes, False)) & (values == -1)] = -2 + if index.shape[0] == self.shape[0]: + values[np.equal(nan_codes, True)] = -2 - not_found = np.array(codes)[np.array(values) == -1].tolist() + not_found = codes[values == -2].tolist() if len(not_found) != 0 and errors != "ignore": raise KeyError(f"labels {not_found} not found in level") mask = ~algos.isin(self.codes[i], values) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index d121d4f662c57..b3c017f75dd65 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -158,3 +158,10 @@ def test_single_level_drop(): mi.drop(4, level=0) with pytest.raises(KeyError, match=msg): mi.drop([1, 4], level=0) + msg = r"labels \[nan\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan], level=0) + mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) + msg = r"labels \[2\.0\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, 2], level=0) From 3930b6b0c7c3735eb3c5838fb19bf96860f0148c Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 19 Nov 2020 11:46:04 +0800 Subject: [PATCH 06/10] delete tolist --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1468c3aa91376..67ca6189dc29c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2163,7 +2163,7 @@ def _drop_from_level(self, codes, level, errors="raise"): if index.shape[0] == self.shape[0]: values[np.equal(nan_codes, True)] = -2 - not_found = codes[values == -2].tolist() + not_found = codes[values == -2] if len(not_found) != 0 and errors != "ignore": raise KeyError(f"labels {not_found} not found in level") mask = ~algos.isin(self.codes[i], values) From c7449857bed2fc867e1f8b20e748c2112f574c13 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 19 Nov 2020 12:46:22 +0800 Subject: [PATCH 07/10] fix test --- pandas/tests/indexes/multi/test_drop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index b3c017f75dd65..d40e79fb7c1f8 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -162,6 +162,6 @@ def test_single_level_drop(): with pytest.raises(KeyError, match=msg): mi.drop([np.nan], level=0) mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) - msg = r"labels \[2\.0\] not found in level" + msg = r"labels \['a'\] not found in level" with pytest.raises(KeyError, match=msg): - mi.drop([np.nan, 1, 2], level=0) + mi.drop([np.nan, 1, "a"], level=0) From 2a1d08c15c2696474dd32cfeb60d555996dad96c Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 26 Nov 2020 08:35:03 +0800 Subject: [PATCH 08/10] Update test_drop.py --- pandas/tests/indexes/multi/test_drop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index d40e79fb7c1f8..42f0905bc8f41 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -161,6 +161,7 @@ def test_single_level_drop(): msg = r"labels \[nan\] not found in level" with pytest.raises(KeyError, match=msg): mi.drop([np.nan], level=0) + mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) msg = r"labels \['a'\] not found in level" with pytest.raises(KeyError, match=msg): From bbe864ab37d5d05248ebefef5bf36bec16c50586 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 26 Nov 2020 16:47:42 +0800 Subject: [PATCH 09/10] add an edge case --- pandas/tests/indexes/multi/test_drop.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 42f0905bc8f41..60c677d2e407f 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -161,6 +161,8 @@ def test_single_level_drop(): msg = r"labels \[nan\] not found in level" with pytest.raises(KeyError, match=msg): mi.drop([np.nan], level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, 2, 3], level=0) mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) msg = r"labels \['a'\] not found in level" From 067f5477555f5e927cfdd8f14a569f331182ca18 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 00:02:05 +0800 Subject: [PATCH 10/10] rename func --- pandas/tests/indexes/multi/test_drop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 60c677d2e407f..c39954b22b0f2 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -149,7 +149,7 @@ def test_drop_with_nan_in_index(nulls_fixture): mi.drop(pd.Timestamp("2001"), level="date") -def test_single_level_drop(): +def test_single_level_drop_partially_missing_elements(): # GH 37820 mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])