diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index a7c9a7eb88221..75e107bfb5892 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -109,6 +109,7 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`) - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`) - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`) +- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`) - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`) - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`). - Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9506e775ceb03..dd63ea94d5211 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3466,22 +3466,35 @@ def _reorder_indexer( ------- indexer : a sorted position indexer of self ordered as seq """ - # If the index is lexsorted and the list_like label in seq are sorted - # then we do not need to sort - if self._is_lexsorted(): - need_sort = False - for i, k in enumerate(seq): - if is_list_like(k): - if not need_sort: - k_codes = self.levels[i].get_indexer(k) - k_codes = k_codes[k_codes >= 0] # Filter absent keys - # True if the given codes are not ordered - need_sort = (k_codes[:-1] > k_codes[1:]).any() - elif isinstance(k, slice) and k.step is not None and k.step < 0: + + # check if sorting is necessary + need_sort = False + for i, k in enumerate(seq): + if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k): + pass + elif is_list_like(k): + if len(k) <= 1: # type: ignore[arg-type] + pass + elif self._is_lexsorted(): + # If the index is lexsorted and the list_like label + # in seq are sorted then we do not need to sort + k_codes = self.levels[i].get_indexer(k) + k_codes = k_codes[k_codes >= 0] # Filter absent keys + # True if the given codes are not ordered + need_sort = (k_codes[:-1] > k_codes[1:]).any() + else: need_sort = True - # Bail out if both index and seq are sorted - if not need_sort: - return indexer + elif isinstance(k, slice): + if self._is_lexsorted(): + need_sort = k.step is not None and k.step < 0 + else: + need_sort = True + else: + need_sort = True + if need_sort: + break + if not need_sort: + return indexer n = len(self) keys: tuple[np.ndarray, ...] = () diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 9626352ac7e36..591b7abf60cc0 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -898,3 +898,29 @@ def test_pyint_engine(): missing = tuple([0, 1] * 5 * N) result = index.get_indexer([missing] + [keys[i] for i in idces]) tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "keys,expected", + [ + ((slice(None), [5, 4]), [1, 0]), + ((slice(None), [4, 5]), [0, 1]), + (([True, False, True], [4, 6]), [0, 2]), + (([True, False, True], [6, 4]), [0, 2]), + ((2, [4, 5]), [0, 1]), + ((2, [5, 4]), [1, 0]), + (([2], [4, 5]), [0, 1]), + (([2], [5, 4]), [1, 0]), + ], +) +def test_get_locs_reordering(keys, expected): + # GH48384 + idx = MultiIndex.from_arrays( + [ + [2, 2, 1], + [4, 5, 6], + ] + ) + result = idx.get_locs(keys) + expected = np.array(expected, dtype=np.intp) + tm.assert_numpy_array_equal(result, expected)