From d3b361cd0b1cdc7dfbc6c1f71e7b148fa1f2098e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 3 Feb 2020 16:08:14 -0800 Subject: [PATCH 1/5] REF: call _maybe_cast_integer first --- pandas/core/indexes/base.py | 4 +-- pandas/core/indexes/category.py | 38 ++------------------- pandas/tests/indexes/multi/test_indexing.py | 2 +- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/indexing/test_floats.py | 12 +++---- 5 files changed, 12 insertions(+), 46 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a7551391f2a8..7fdf137416774 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2889,9 +2889,9 @@ def get_loc(self, key, method=None, tolerance=None): "backfill or nearest lookups" ) try: - return self._engine.get_loc(key) - except KeyError: return self._engine.get_loc(self._maybe_cast_indexer(key)) + except KeyError: + raise KeyError(key) if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2cdf47ad61cec..e87275a5a20f3 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -450,44 +450,10 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.astype("object") - def get_loc(self, key, method=None): - """ - Get integer location, slice or boolean mask for requested label. - - Parameters - ---------- - key : label - method : {None} - * default: exact matches only. - - Returns - ------- - loc : int if unique index, slice if monotonic index, else mask - - Raises - ------ - KeyError : if the key is not in the index - - Examples - -------- - >>> unique_index = pd.CategoricalIndex(list('abc')) - >>> unique_index.get_loc('b') - 1 - - >>> monotonic_index = pd.CategoricalIndex(list('abbc')) - >>> monotonic_index.get_loc('b') - slice(1, 3, None) - - >>> non_monotonic_index = pd.CategoricalIndex(list('abcb')) - >>> non_monotonic_index.get_loc('b') - array([False, True, False, True], dtype=bool) - """ + def _maybe_cast_indexer(self, key): code = self.categories.get_loc(key) code = self.codes.dtype.type(code) - try: - return self._engine.get_loc(code) - except KeyError: - raise KeyError(key) + return code def get_value(self, series: "Series", key: Any): """ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index b08280a712642..21a4773fa3683 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -392,7 +392,7 @@ def test_get_loc_missing_nan(): # GH 8569 idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) assert isinstance(idx.get_loc(1), slice) - with pytest.raises(KeyError, match=r"^3\.0$"): + with pytest.raises(KeyError, match=r"^3$"): idx.get_loc(3) with pytest.raises(KeyError, match=r"^nan$"): idx.get_loc(np.nan) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 992a91ad8a528..1b504ce99604d 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -385,7 +385,7 @@ def test_get_loc_missing_nan(self): # GH 8569 idx = Float64Index([1, 2]) assert idx.get_loc(1) == 0 - with pytest.raises(KeyError, match=r"^3\.0$"): + with pytest.raises(KeyError, match=r"^3$"): idx.get_loc(3) with pytest.raises(KeyError, match="^nan$"): idx.get_loc(np.nan) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 5530896a90941..89647e87445fc 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -107,7 +107,7 @@ def test_scalar_non_numeric(self): "mixed", }: error = KeyError - msg = r"^3$" + msg = r"^3\.0$" else: error = TypeError msg = ( @@ -187,7 +187,7 @@ def test_scalar_with_mixed(self): with pytest.raises(TypeError, match=msg): idxr(s2)[1.0] - with pytest.raises(KeyError, match=r"^1$"): + with pytest.raises(KeyError, match=r"^1\.0$"): s2.loc[1.0] result = s2.loc["b"] @@ -213,7 +213,7 @@ def test_scalar_with_mixed(self): msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): s3.iloc[1.0] - with pytest.raises(KeyError, match=r"^1$"): + with pytest.raises(KeyError, match=r"^1\.0$"): s3.loc[1.0] result = s3.loc[1.5] @@ -666,11 +666,11 @@ def test_floating_misc(self): # value not found (and no fallbacking at all) # scalar integers - with pytest.raises(KeyError, match=r"^4\.0$"): + with pytest.raises(KeyError, match=r"^4$"): s.loc[4] - with pytest.raises(KeyError, match=r"^4\.0$"): + with pytest.raises(KeyError, match=r"^4$"): s.loc[4] - with pytest.raises(KeyError, match=r"^4\.0$"): + with pytest.raises(KeyError, match=r"^4$"): s[4] # fancy floats/integers create the correct entry (as nan) From 3ba28331848c120da7c07ae66f24deb7211171d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 3 Feb 2020 20:50:14 -0800 Subject: [PATCH 2/5] revert non-central --- pandas/core/indexes/base.py | 6 ++-- pandas/core/indexes/category.py | 38 +++++++++++++++++++-- pandas/core/indexes/extension.py | 23 ------------- pandas/tests/indexes/multi/test_indexing.py | 2 +- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/indexing/test_floats.py | 12 +++---- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7fdf137416774..72ccaca8a12d1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2889,9 +2889,9 @@ def get_loc(self, key, method=None, tolerance=None): "backfill or nearest lookups" ) try: - return self._engine.get_loc(self._maybe_cast_indexer(key)) + return self._engine.get_loc(key) except KeyError: - raise KeyError(key) + return self._engine.get_loc(self._maybe_cast_indexer(key)) if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) @@ -4604,7 +4604,7 @@ def get_value(self, series: "Series", key): # If that fails, raise a KeyError if an integer # index, otherwise, see if key is an integer, and # try that - loc = self._engine.get_loc(key) + loc = self.get_loc(key) except KeyError: if len(self) > 0 and (self.holds_integer() or self.is_boolean()): raise diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e87275a5a20f3..2cdf47ad61cec 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -450,10 +450,44 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.astype("object") - def _maybe_cast_indexer(self, key): + def get_loc(self, key, method=None): + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None} + * default: exact matches only. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Raises + ------ + KeyError : if the key is not in the index + + Examples + -------- + >>> unique_index = pd.CategoricalIndex(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.CategoricalIndex(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.CategoricalIndex(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True], dtype=bool) + """ code = self.categories.get_loc(key) code = self.codes.dtype.type(code) - return code + try: + return self._engine.get_loc(code) + except KeyError: + raise KeyError(key) def get_value(self, series: "Series", key: Any): """ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index c32889a9360bc..80f2575541767 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -297,26 +297,3 @@ def astype(self, dtype, copy=True): # pass copy=False because any copying will be done in the # _data.astype call above return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) - - # -------------------------------------------------------------------- - # Indexing Methods - - @Appender(Index.get_value.__doc__) - def get_value(self, series: "Series", key): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - try: - loc = self.get_loc(key) - except KeyError: - # e.g. DatetimeIndex doesn't hold integers - if is_integer(key) and not self.holds_integer(): - # Fall back to positional - loc = key - else: - raise - - return self._get_values_for_loc(series, loc) - - # -------------------------------------------------------------------- diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 21a4773fa3683..b08280a712642 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -392,7 +392,7 @@ def test_get_loc_missing_nan(): # GH 8569 idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) assert isinstance(idx.get_loc(1), slice) - with pytest.raises(KeyError, match=r"^3$"): + with pytest.raises(KeyError, match=r"^3\.0$"): idx.get_loc(3) with pytest.raises(KeyError, match=r"^nan$"): idx.get_loc(np.nan) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1b504ce99604d..992a91ad8a528 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -385,7 +385,7 @@ def test_get_loc_missing_nan(self): # GH 8569 idx = Float64Index([1, 2]) assert idx.get_loc(1) == 0 - with pytest.raises(KeyError, match=r"^3$"): + with pytest.raises(KeyError, match=r"^3\.0$"): idx.get_loc(3) with pytest.raises(KeyError, match="^nan$"): idx.get_loc(np.nan) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 89647e87445fc..5530896a90941 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -107,7 +107,7 @@ def test_scalar_non_numeric(self): "mixed", }: error = KeyError - msg = r"^3\.0$" + msg = r"^3$" else: error = TypeError msg = ( @@ -187,7 +187,7 @@ def test_scalar_with_mixed(self): with pytest.raises(TypeError, match=msg): idxr(s2)[1.0] - with pytest.raises(KeyError, match=r"^1\.0$"): + with pytest.raises(KeyError, match=r"^1$"): s2.loc[1.0] result = s2.loc["b"] @@ -213,7 +213,7 @@ def test_scalar_with_mixed(self): msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): s3.iloc[1.0] - with pytest.raises(KeyError, match=r"^1\.0$"): + with pytest.raises(KeyError, match=r"^1$"): s3.loc[1.0] result = s3.loc[1.5] @@ -666,11 +666,11 @@ def test_floating_misc(self): # value not found (and no fallbacking at all) # scalar integers - with pytest.raises(KeyError, match=r"^4$"): + with pytest.raises(KeyError, match=r"^4\.0$"): s.loc[4] - with pytest.raises(KeyError, match=r"^4$"): + with pytest.raises(KeyError, match=r"^4\.0$"): s.loc[4] - with pytest.raises(KeyError, match=r"^4$"): + with pytest.raises(KeyError, match=r"^4\.0$"): s[4] # fancy floats/integers create the correct entry (as nan) From ad1c0f769ecfdbeb81403e382a0a985c461ceaee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 5 Feb 2020 09:39:38 -0800 Subject: [PATCH 3/5] REF: Index.get_value call self.get_loc instead of self._engine.get_loc --- pandas/core/indexes/base.py | 10 +++++++++- pandas/core/indexes/interval.py | 7 ++++--- pandas/core/indexes/numeric.py | 26 ++++++++++---------------- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 45dad7c50092e..3c735fc0309b6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4592,7 +4592,7 @@ def get_value(self, series: "Series", key): # try that loc = self.get_loc(key) except KeyError: - if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + if not self._should_fallback_to_positional(): raise elif is_integer(key): # If the Index cannot hold integer, then this is unambiguously @@ -4603,6 +4603,14 @@ def get_value(self, series: "Series", key): return self._get_values_for_loc(series, loc) + def _should_fallback_to_positional(self) -> bool: + """ + If an integer key is not found, should we fall back to positional indexing? + """ + if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + return False + return True + def _get_values_for_loc(self, series: "Series", loc): """ Do a positional lookup on the given Series, returning either a scalar diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 0252a13665b84..9ec72df140c85 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -523,9 +523,10 @@ def is_overlapping(self) -> bool: # GH 23309 return self._engine.is_overlapping - def holds_integer(self): - return self.dtype.subtype.kind not in ["m", "M"] - # TODO: There must already exist something for this? + def _should_fallback_to_positional(self): + # integer lookups in Series.__getitem__ are unambiguously + # positional in this case + return self.dtype.subtype.kind in ["m", "M"] @Appender(Index._convert_scalar_indexer.__doc__) def _convert_scalar_indexer(self, key, kind=None): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ebfe50327b479..5c332d16e4765 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any +from typing import Any import numpy as np @@ -32,12 +32,9 @@ from pandas.core import algorithms import pandas.core.common as com -from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name +from pandas.core.indexes.base import Index, maybe_extract_name from pandas.core.ops import get_op_result_name -if TYPE_CHECKING: - from pandas import Series - _num_index_shared_docs = dict() @@ -383,6 +380,12 @@ def astype(self, dtype, copy=True): return Int64Index(arr) return super().astype(dtype, copy=copy) + # ---------------------------------------------------------------- + # Indexing Methods + + def _should_fallback_to_positional(self): + return False + @Appender(Index._convert_scalar_indexer.__doc__) def _convert_scalar_indexer(self, key, kind=None): assert kind in ["loc", "getitem", "iloc", None] @@ -401,6 +404,8 @@ def _convert_slice_indexer(self, key: slice, kind=None): # translate to locations return self.slice_indexer(key.start, key.stop, key.step, kind=kind) + # ---------------------------------------------------------------- + def _format_native_types( self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs ): @@ -416,17 +421,6 @@ def _format_native_types( ) return formatter.get_result_as_array() - @Appender(Index.get_value.__doc__) - def get_value(self, series: "Series", key): - """ - We always want to get an index value, never a value. - """ - if not is_scalar(key): - raise InvalidIndexError - - loc = self.get_loc(key) - return self._get_values_for_loc(series, loc) - def equals(self, other) -> bool: """ Determines if two Index objects contain the same elements. From 16b064cf5300bb83276d8eeeccbde0a3fa6281e0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 5 Feb 2020 10:05:50 -0800 Subject: [PATCH 4/5] fixup remove unused imports --- pandas/core/indexes/extension.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 80f2575541767..66b551f654bf1 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,7 +1,7 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ -from typing import TYPE_CHECKING, List +from typing import List import numpy as np @@ -11,7 +11,6 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_dtype_equal, - is_integer, is_object_dtype, ) from pandas.core.dtypes.generic import ABCSeries @@ -21,9 +20,6 @@ from pandas.core.indexes.base import Index from pandas.core.ops import get_op_result_name -if TYPE_CHECKING: - from pandas import Series - def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False): """ From 6b0218411983a5989b2a386c1440280ffe6fa16f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 5 Feb 2020 18:02:59 -0800 Subject: [PATCH 5/5] inherit docstring --- pandas/core/indexes/numeric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5c332d16e4765..2f4c48cc2e5a5 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -383,6 +383,7 @@ def astype(self, dtype, copy=True): # ---------------------------------------------------------------- # Indexing Methods + @Appender(Index._should_fallback_to_positional.__doc__) def _should_fallback_to_positional(self): return False