From 36676aef33804a3591c7cfedf20de941e36e1f62 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Jun 2021 09:28:33 -0700 Subject: [PATCH 1/2] REF: implement _should_partial_index --- pandas/core/indexes/base.py | 11 ++++++++++- pandas/core/indexes/interval.py | 14 +++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6219aa07478d7..daff36a3a837b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3395,7 +3395,7 @@ def get_indexer( if len(target) == 0: return np.array([], dtype=np.intp) - if not self._should_compare(target) and not is_interval_dtype(self.dtype): + if not self._should_compare(target) and not self._should_partial_index(target): # IntervalIndex get special treatment bc numeric scalars can be # matched to Interval scalars return self._get_indexer_non_comparable(target, method=method, unique=True) @@ -3459,6 +3459,15 @@ def _get_indexer( return ensure_platform_int(indexer) + @final + def _should_partial_index(self, target: Index) -> bool: + """ + Should we attempt partial-matching indexing? + """ + if is_interval_dtype(self.dtype): + return self.left._should_compare(target) + return False + @final def _check_indexing_method( self, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e71d6d690bcda..f3b6370e5b42e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -650,9 +650,6 @@ def _get_indexer( if self.equals(target): return np.arange(len(self), dtype="intp") - if not self._should_compare(target): - return self._get_indexer_non_comparable(target, method, unique=True) - # non-overlapping -> at most one match per interval in target # want exact matches -> need both left/right to match, so defer to # left/right get_indexer, compare elementwise, equality -> match @@ -660,12 +657,14 @@ def _get_indexer( right_indexer = self.right.get_indexer(target.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) - elif not is_object_dtype(target): + elif not is_object_dtype(target.dtype): # homogeneous scalar index: use IntervalTree + # we should always have self._should_partial_index(target) here target = self._maybe_convert_i8(target) indexer = self._engine.get_indexer(target.values) else: # heterogeneous scalar index: defer elementwise to get_loc + # we should always have self._should_partial_index(target) here return self._get_indexer_pointwise(target)[0] return ensure_platform_int(indexer) @@ -675,11 +674,12 @@ def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray] # both returned ndarrays are np.intp target = ensure_index(target) - if isinstance(target, IntervalIndex) and not self._should_compare(target): - # different closed or incompatible subtype -> no matches + if not self._should_compare(target) and not self._should_partial_index(target): + # e.g. IntervalIndex with different closed or incompatible subtype + # -> no matches return self._get_indexer_non_comparable(target, None, unique=False) - elif is_object_dtype(target.dtype) or isinstance(target, IntervalIndex): + elif is_object_dtype(target.dtype) or not self._should_partial_index(target): # target might contain intervals: defer elementwise to get_loc return self._get_indexer_pointwise(target) From 72c59a06fa9147842a173caba92d3622f8652461 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Jun 2021 10:09:44 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index daff36a3a837b..ea1fcd9f7848b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3465,7 +3465,8 @@ def _should_partial_index(self, target: Index) -> bool: Should we attempt partial-matching indexing? """ if is_interval_dtype(self.dtype): - return self.left._should_compare(target) + # "Index" has no attribute "left" + return self.left._should_compare(target) # type: ignore[attr-defined] return False @final