diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d357e4a633347..e29e740ddf8fe 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -871,6 +871,7 @@ Indexing - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) +- Bug in :meth:`DataFrame.loc` returning :class:`MultiIndex` in wrong order if indexer has duplicates (:issue:`40978`) - Bug in :meth:`DataFrame.__setitem__` raising ``TypeError`` when using a str subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) Missing diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1a3719233a1da..532e39487ad7b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3446,6 +3446,7 @@ def _reorder_indexer( new_order = np.arange(n)[indexer] elif is_list_like(k): # Generate a map with all level codes as sorted initially + k = algos.unique(k) key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( self.levels[i] ) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 0c6f2faf77f00..cf1ab3eadc7a4 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -764,6 +764,28 @@ def test_loc_getitem_index_differently_ordered_slice_none(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]]) +def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): + # GH#40978 + df = DataFrame( + [1] * 8, + index=MultiIndex.from_tuples( + [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)] + ), + columns=["a"], + ) + result = df.loc[(slice(None), indexer), :] + expected = DataFrame( + [1] * 8, + index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], + columns=["a"], + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[df.index.isin(indexer, level=1), :] + tm.assert_frame_equal(result, df) + + def test_loc_getitem_drops_levels_for_one_row_dataframe(): # GH#10521 mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])