diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cac0a8cc5791f..398b49884efe4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3040,7 +3040,9 @@ def partial_selection(key, indexer=None): indexer = self._get_level_indexer(key, level=level) return indexer, maybe_mi_droplevels(indexer, [level]) - def _get_level_indexer(self, key, level: int = 0, indexer=None): + def _get_level_indexer( + self, key, level: int = 0, indexer: Int64Index | None = None + ): # `level` kwarg is _always_ positional, never name # return an indexer, boolean array or a slice showing where the key is # in the totality of values @@ -3193,10 +3195,12 @@ def get_locs(self, seq): "MultiIndex slicing requires the index to be lexsorted: slicing " f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" ) - # indexer - # this is the list of all values that we want to select + n = len(self) - indexer = None + # indexer is the list of all positions that we want to take; we + # start with it being everything and narrow it down as we look at each + # entry in `seq` + indexer = Index(np.arange(n)) def _convert_to_indexer(r) -> Int64Index: # return an indexer @@ -3214,14 +3218,10 @@ def _convert_to_indexer(r) -> Int64Index: r = r.nonzero()[0] return Int64Index(r) - def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: - if indexer is None: - indexer = Index(np.arange(n)) - if idxr is None: - return indexer + def _update_indexer(idxr: Index, indexer: Index) -> Index: indexer_intersection = indexer.intersection(idxr) if indexer_intersection.empty and not idxr.empty and not indexer.empty: - raise KeyError(key) + raise KeyError(seq) return indexer_intersection for i, k in enumerate(seq): @@ -3229,63 +3229,73 @@ def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: if com.is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) - indexer = _update_indexer( - _convert_to_indexer(k), indexer=indexer, key=seq - ) + lvl_indexer = _convert_to_indexer(k) + indexer = _update_indexer(lvl_indexer, indexer=indexer) elif is_list_like(k): # a collection of labels to include from this level (these # are or'd) + indexers: Int64Index | None = None for x in k: try: - idxrs = _convert_to_indexer( - self._get_level_indexer(x, level=i, indexer=indexer) - ) - indexers = (idxrs if indexers is None else indexers).union( - idxrs, sort=False + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" + # has incompatible type "Index"; expected "Optional[Int64Index]" + item_lvl_indexer = self._get_level_indexer( + x, level=i, indexer=indexer # type: ignore[arg-type] ) except KeyError: - - # ignore not founds + # ignore not founds; see discussion in GH#39424 continue + else: + idxrs = _convert_to_indexer(item_lvl_indexer) + + if indexers is None: + indexers = idxrs + else: + indexers = indexers.union(idxrs, sort=False) if indexers is not None: - indexer = _update_indexer(indexers, indexer=indexer, key=seq) + indexer = _update_indexer(indexers, indexer=indexer) else: # no matches we are done - return np.array([], dtype=np.int64) + # test_loc_getitem_duplicates_multiindex_empty_indexer + return np.array([], dtype=np.intp) elif com.is_null_slice(k): # empty slice - indexer = _update_indexer(None, indexer=indexer, key=seq) + pass elif isinstance(k, slice): # a slice, include BOTH of the labels + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" has + # incompatible type "Index"; expected "Optional[Int64Index]" + lvl_indexer = self._get_level_indexer( + k, + level=i, + indexer=indexer, # type: ignore[arg-type] + ) indexer = _update_indexer( - _convert_to_indexer( - self._get_level_indexer(k, level=i, indexer=indexer) - ), + _convert_to_indexer(lvl_indexer), indexer=indexer, - key=seq, ) else: # a single label + lvl_indexer = self._get_loc_level(k, level=i)[0] indexer = _update_indexer( - _convert_to_indexer(self._get_loc_level(k, level=i)[0]), + _convert_to_indexer(lvl_indexer), indexer=indexer, - key=seq, ) # empty indexer if indexer is None: - return np.array([], dtype=np.int64) + return np.array([], dtype=np.intp) assert isinstance(indexer, Int64Index), type(indexer) indexer = self._reorder_indexer(seq, indexer) - return indexer._values + return indexer._values.astype(np.intp, copy=False) # -------------------------------------------------------------------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f8578d87e4cad..6e97ce95297d9 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1236,9 +1236,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): return {"key": key} if is_nested_tuple(key, labels): - if isinstance(self.obj, ABCSeries) and any( - isinstance(k, tuple) for k in key - ): + if self.ndim == 1 and any(isinstance(k, tuple) for k in key): # GH#35349 Raise if tuple in tuple for series raise ValueError("Too many indices") return labels.get_locs(key)