Skip to content

CLN: simplify MultiIndex.get_locs #42245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 41 additions & 31 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3040,7 +3040,9 @@ def partial_selection(key, indexer=None):
indexer = self._get_level_indexer(key, level=level)
return indexer, maybe_mi_droplevels(indexer, [level])

def _get_level_indexer(self, key, level: int = 0, indexer=None):
def _get_level_indexer(
self, key, level: int = 0, indexer: Int64Index | None = None
):
# `level` kwarg is _always_ positional, never name
# return an indexer, boolean array or a slice showing where the key is
# in the totality of values
Expand Down Expand Up @@ -3193,10 +3195,12 @@ def get_locs(self, seq):
"MultiIndex slicing requires the index to be lexsorted: slicing "
f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
)
# indexer
# this is the list of all values that we want to select

n = len(self)
indexer = None
# indexer is the list of all positions that we want to take; we
# start with it being everything and narrow it down as we look at each
# entry in `seq`
indexer = Index(np.arange(n))

def _convert_to_indexer(r) -> Int64Index:
# return an indexer
Expand All @@ -3214,78 +3218,84 @@ def _convert_to_indexer(r) -> Int64Index:
r = r.nonzero()[0]
return Int64Index(r)

def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index:
if indexer is None:
indexer = Index(np.arange(n))
if idxr is None:
return indexer
def _update_indexer(idxr: Index, indexer: Index) -> Index:
indexer_intersection = indexer.intersection(idxr)
if indexer_intersection.empty and not idxr.empty and not indexer.empty:
raise KeyError(key)
raise KeyError(seq)
return indexer_intersection

for i, k in enumerate(seq):

if com.is_bool_indexer(k):
# a boolean indexer, must be the same length!
k = np.asarray(k)
indexer = _update_indexer(
_convert_to_indexer(k), indexer=indexer, key=seq
)
lvl_indexer = _convert_to_indexer(k)
indexer = _update_indexer(lvl_indexer, indexer=indexer)

elif is_list_like(k):
# a collection of labels to include from this level (these
# are or'd)

indexers: Int64Index | None = None
for x in k:
try:
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
indexers = (idxrs if indexers is None else indexers).union(
idxrs, sort=False
# Argument "indexer" to "_get_level_indexer" of "MultiIndex"
# has incompatible type "Index"; expected "Optional[Int64Index]"
item_lvl_indexer = self._get_level_indexer(
x, level=i, indexer=indexer # type: ignore[arg-type]
)
except KeyError:

# ignore not founds
# ignore not founds; see discussion in GH#39424
continue
else:
idxrs = _convert_to_indexer(item_lvl_indexer)

if indexers is None:
indexers = idxrs
else:
indexers = indexers.union(idxrs, sort=False)

if indexers is not None:
indexer = _update_indexer(indexers, indexer=indexer, key=seq)
indexer = _update_indexer(indexers, indexer=indexer)
else:
# no matches we are done
return np.array([], dtype=np.int64)
# test_loc_getitem_duplicates_multiindex_empty_indexer
return np.array([], dtype=np.intp)

elif com.is_null_slice(k):
# empty slice
indexer = _update_indexer(None, indexer=indexer, key=seq)
pass

elif isinstance(k, slice):

# a slice, include BOTH of the labels
# Argument "indexer" to "_get_level_indexer" of "MultiIndex" has
# incompatible type "Index"; expected "Optional[Int64Index]"
lvl_indexer = self._get_level_indexer(
k,
level=i,
indexer=indexer, # type: ignore[arg-type]
)
indexer = _update_indexer(
_convert_to_indexer(
self._get_level_indexer(k, level=i, indexer=indexer)
),
_convert_to_indexer(lvl_indexer),
indexer=indexer,
key=seq,
)
else:
# a single label
lvl_indexer = self._get_loc_level(k, level=i)[0]
indexer = _update_indexer(
_convert_to_indexer(self._get_loc_level(k, level=i)[0]),
_convert_to_indexer(lvl_indexer),
indexer=indexer,
key=seq,
)

# empty indexer
if indexer is None:
return np.array([], dtype=np.int64)
return np.array([], dtype=np.intp)

assert isinstance(indexer, Int64Index), type(indexer)
indexer = self._reorder_indexer(seq, indexer)

return indexer._values
return indexer._values.astype(np.intp, copy=False)

# --------------------------------------------------------------------

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,9 +1236,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
return {"key": key}

if is_nested_tuple(key, labels):
if isinstance(self.obj, ABCSeries) and any(
isinstance(k, tuple) for k in key
):
if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
# GH#35349 Raise if tuple in tuple for series
raise ValueError("Too many indices")
return labels.get_locs(key)
Expand Down