From 516590b03f3f84df689338131a7caf5af651fdc2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 17 Nov 2020 13:21:55 -0800 Subject: [PATCH 1/2] REF: de-duplicate pointwise get_indexer for IntervalIndex --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/interval.py | 52 ++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9ea0ff323a33d..5c90269106d40 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3122,7 +3122,7 @@ def get_indexer( target, method=method, limit=limit, tolerance=tolerance ) - if not self.is_unique: + if not self._index_as_unique: raise InvalidIndexError( "Reindexing only valid with uniquely valued Index objects" ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 267d1330faceb..633f4e3127bf6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -704,7 +704,7 @@ def get_indexer( self._check_indexing_method(method) - if self.is_overlapping: + if not self._index_as_unique: raise InvalidIndexError( "cannot handle overlapping indices; " "use IntervalIndex.get_indexer_non_unique" @@ -741,17 +741,7 @@ def get_indexer( indexer = self._engine.get_indexer(target_as_index.values) else: # heterogeneous scalar index: defer elementwise to get_loc - # (non-overlapping so get_loc guarantees scalar of KeyError) - indexer = [] - for key in target_as_index: - try: - loc = self.get_loc(key) - except KeyError: - loc = -1 - except InvalidIndexError as err: - # i.e. non-scalar key - raise TypeError(key) from err - indexer.append(loc) + return self._get_indexer_pointwise(target_as_index)[0] return ensure_platform_int(indexer) @@ -777,18 +767,8 @@ def get_indexer_non_unique( target_as_index, IntervalIndex ): # target_as_index might contain intervals: defer elementwise to get_loc - indexer, missing = [], [] - for i, key in enumerate(target_as_index): - try: - locs = self.get_loc(key) - if isinstance(locs, slice): - locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") - locs = np.array(locs, ndmin=1) - except KeyError: - missing.append(i) - locs = np.array([-1]) - indexer.append(locs) - indexer = np.concatenate(indexer) + return self._get_indexer_pointwise(target_as_index) + else: target_as_index = self._maybe_convert_i8(target_as_index) indexer, missing = self._engine.get_indexer_non_unique( @@ -797,6 +777,30 @@ def get_indexer_non_unique( return ensure_platform_int(indexer), ensure_platform_int(missing) + def _get_indexer_pointwise(self, target: Index): + """ + pointwise implementation for get_indexer and get_indexer_non_unique. + """ + indexer, missing = [], [] + for i, key in enumerate(target): + try: + locs = self.get_loc(key) + if isinstance(locs, slice): + # Only needed for get_indexer_non_unique + locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") + locs = np.array(locs, ndmin=1) + except KeyError: + missing.append(i) + locs = np.array([-1]) + except InvalidIndexError as err: + # i.e. non-scalar key + raise TypeError(key) from err + + indexer.append(locs) + + indexer = np.concatenate(indexer) + return ensure_platform_int(indexer), ensure_platform_int(missing) + @property def _index_as_unique(self): return not self.is_overlapping From 14dd579e5242ccda63b1f062ebbce76b490eaf0f Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 17 Nov 2020 18:51:30 -0800 Subject: [PATCH 2/2] annotate --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/interval.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index db85619663d9d..1b3e4864843f3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3124,7 +3124,7 @@ def get_indexer( target, method=method, limit=limit, tolerance=tolerance ) - if not self._index_as_unique: + if not self.is_unique: raise InvalidIndexError( "Reindexing only valid with uniquely valued Index objects" ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2d9f190937659..b0f8be986fe5d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -698,7 +698,7 @@ def get_indexer( self._check_indexing_method(method) - if not self._index_as_unique: + if self.is_overlapping: raise InvalidIndexError( "cannot handle overlapping indices; " "use IntervalIndex.get_indexer_non_unique" @@ -766,7 +766,7 @@ def get_indexer_non_unique( return ensure_platform_int(indexer), ensure_platform_int(missing) - def _get_indexer_pointwise(self, target: Index): + def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: """ pointwise implementation for get_indexer and get_indexer_non_unique. """