From b20c93a8ae49e1e60df9ee0e7e0a64ac5889e20e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 27 Jan 2020 21:21:01 -0800 Subject: [PATCH 1/5] REF: move DTI/TDI/PI get_value to ExtensionIndex, associated cleanups --- pandas/core/indexes/base.py | 18 ++++++++---------- pandas/core/indexes/category.py | 12 +++++++----- pandas/core/indexes/datetimelike.py | 3 +++ pandas/core/indexes/datetimes.py | 15 ++------------- pandas/core/indexes/extension.py | 24 ++++++++++++++++++++++-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 1 + pandas/core/indexes/numeric.py | 1 + pandas/core/indexes/period.py | 20 +------------------- pandas/core/indexes/timedeltas.py | 11 ----------- 10 files changed, 46 insertions(+), 61 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 10d9552e6f5a7..9e55a83475f44 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import Any, Dict, FrozenSet, Hashable, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Hashable, Optional, Union import warnings import numpy as np @@ -81,6 +81,10 @@ pprint_thing, ) +if TYPE_CHECKING: + from pandas import Series + + __all__ = ["Index"] _unsortable_types = frozenset(("mixed", "mixed-integer")) @@ -4622,21 +4626,15 @@ def argsort(self, *args, **kwargs) -> np.ndarray: result = np.array(self) return result.argsort(*args, **kwargs) - _index_shared_docs[ - "get_value" - ] = """ + def get_value(self, series: "Series", key): + """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing. Returns ------- - scalar - A value in the Series with the index of the key value in self. + scalar or Series """ - - @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) - def get_value(self, series, key): - if not is_scalar(key): # if key is not a scalar, directly raise an error (the code below # would convert to numpy arrays and raise later any way) - GH29926 diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 1a53596fb5967..4d14295792c1e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,4 +1,4 @@ -from typing import Any, List +from typing import TYPE_CHECKING, Any, List import warnings import numpy as np @@ -7,7 +7,6 @@ from pandas._libs import index as libindex from pandas._libs.hashtable import duplicated_int64 -from pandas._typing import AnyArrayLike from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -31,6 +30,9 @@ import pandas.core.missing as missing from pandas.core.ops import get_op_result_name +if TYPE_CHECKING: + from pandas import Series + _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass="CategoricalIndex")) @@ -493,14 +495,14 @@ def get_loc(self, key, method=None): except KeyError: raise KeyError(key) - def get_value(self, series: AnyArrayLike, key: Any): + def get_value(self, series: "Series", key: Any): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing Parameters ---------- - series : Series, ExtensionArray, Index, or ndarray + series : Series 1-dimensional array to take values from key: : scalar The value of this index at the position of the desired value, @@ -520,7 +522,7 @@ def get_value(self, series: AnyArrayLike, key: Any): pass # we might be a positional inexer - return super().get_value(series, key) + return Index.get_value(self, series, key) @Appender(_index_shared_docs["where"]) def where(self, cond, other=None): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b87dd0f02252f..8c99a37c6fedd 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -376,6 +376,7 @@ def _format_attrs(self): return attrs # -------------------------------------------------------------------- + # Indexing Methods def _convert_scalar_indexer(self, key, kind=None): """ @@ -402,6 +403,8 @@ def _convert_scalar_indexer(self, key, kind=None): return super()._convert_scalar_indexer(key, kind=kind) + # -------------------------------------------------------------------- + __add__ = make_wrapped_arith_op("__add__") __radd__ = make_wrapped_arith_op("__radd__") __sub__ = make_wrapped_arith_op("__sub__") diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3afd1ff35806d..8823274881517 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,4 +1,4 @@ -from datetime import datetime, time, timedelta, tzinfo +from datetime import time, timedelta, tzinfo import operator from typing import Optional import warnings @@ -636,17 +636,6 @@ def _maybe_promote(self, other): other = DatetimeIndex(other) return self, other - def get_value(self, series, key): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - if is_integer(key): - loc = key - else: - loc = self.get_loc(key) - return self._get_values_for_loc(series, loc) - def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label @@ -666,7 +655,7 @@ def get_loc(self, key, method=None, tolerance=None): # the try/except clauses below tolerance = self._convert_tolerance(tolerance, np.asarray(key)) - if isinstance(key, (datetime, np.datetime64)): + if isinstance(key, self._data._recognized_scalars): # needed to localize naive datetimes key = self._maybe_cast_for_get_loc(key) return Index.get_loc(self, key, method, tolerance) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 6a10b3650293c..77197678dd72d 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,20 +1,23 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ -from typing import List +from typing import TYPE_CHECKING, List import numpy as np from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly -from pandas.core.dtypes.common import ensure_platform_int, is_dtype_equal +from pandas.core.dtypes.common import ensure_platform_int, is_dtype_equal, is_integer from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray from pandas.core.indexes.base import Index, deprecate_ndim_indexing from pandas.core.ops import get_op_result_name +if TYPE_CHECKING: + from pandas import Series + def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False): """ @@ -279,3 +282,20 @@ def astype(self, dtype, copy=True): # pass copy=False because any copying will be done in the # _data.astype call above return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) + + # -------------------------------------------------------------------- + # Indexing Methods + + @Appender(Index.get_value.__doc__) + def get_value(self, series: "Series", key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + if is_integer(key): + loc = key + else: + loc = self.get_loc(key) + return self._get_values_for_loc(series, loc) + + # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 26b64836172fd..b07ea2798e64d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -883,7 +883,7 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray: return self.get_indexer_non_unique(target)[0] return self.get_indexer(target, **kwargs) - @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) + @Appender(Index.get_value.__doc__) def get_value(self, series: "Series", key): loc = self.get_loc(key) return series.iloc[loc] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5a9825d58b204..f05addc96a2db 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1461,6 +1461,7 @@ def dropna(self, how="any"): new_codes = [level_codes[~indexer] for level_codes in self.codes] return self.copy(codes=new_codes, deep=True) + @Appender(Index.get_value.__doc__) def get_value(self, series, key): # Label-based s = com.values_from_object(series) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index aece294edc3e3..14534a79a9803 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -421,6 +421,7 @@ def _format_native_types( ) return formatter.get_result_as_array() + @Appender(Index.get_value.__doc__) def get_value(self, series: "Series", key): """ We always want to get an index value, never a value. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1e18c16d02784..ddfb72c3f248c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Any +from typing import Any import weakref import numpy as np @@ -18,7 +18,6 @@ is_float, is_integer, is_integer_dtype, - is_list_like, is_object_dtype, is_scalar, pandas_dtype, @@ -54,9 +53,6 @@ _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass="PeriodIndex or list of Periods")) -if TYPE_CHECKING: - from pandas import Series - # --- Period index sketch @@ -483,17 +479,6 @@ def inferred_type(self) -> str: # indexing return "period" - def get_value(self, series: "Series", key): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - if is_integer(key): - loc = key - else: - loc = self.get_loc(key) - return self._get_values_for_loc(series, loc) - @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = ensure_index(target) @@ -588,9 +573,6 @@ def get_loc(self, key, method=None, tolerance=None): key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period - # as we have an invalid type - if is_list_like(key): - raise TypeError(f"'{key}' is an invalid key") raise KeyError(key) ordinal = key.ordinal if key is not NaT else key.value diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1257e410b4125..fd1ab122130c9 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -233,17 +233,6 @@ def _maybe_promote(self, other): other = TimedeltaIndex(other) return self, other - def get_value(self, series, key): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - if is_integer(key): - loc = key - else: - loc = self.get_loc(key) - return self._get_values_for_loc(series, loc) - def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label From 24a611f5abdcae5706f3ed4a978825991894dbbf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 27 Jan 2020 21:21:54 -0800 Subject: [PATCH 2/5] annotation --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9e55a83475f44..93614f452985f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4659,7 +4659,7 @@ def get_value(self, series: "Series", key): return self._get_values_for_loc(series, loc) - def _get_values_for_loc(self, series, loc): + def _get_values_for_loc(self, series: "Series", loc): """ Do a positional lookup on the given Series, returning either a scalar or a Series. From c3ede06ecb41af1fc8f6e79a4adabd75b59b3ad1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Jan 2020 12:17:14 -0800 Subject: [PATCH 3/5] Try to handle IntervalArray case within ExtensionIndex --- pandas/core/indexes/extension.py | 12 +++++++++--- pandas/core/indexes/interval.py | 5 ----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 77197678dd72d..9f299c1c1fbe7 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -292,10 +292,16 @@ def get_value(self, series: "Series", key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - if is_integer(key): - loc = key - else: + try: loc = self.get_loc(key) + except KeyError: + # e.g. DatetimeIndex doesn't hold integers + if is_integer(key): + # Fall back to positional + loc = key + else: + raise + return self._get_values_for_loc(series, loc) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b07ea2798e64d..3b3074dc8169c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -883,11 +883,6 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray: return self.get_indexer_non_unique(target)[0] return self.get_indexer(target, **kwargs) - @Appender(Index.get_value.__doc__) - def get_value(self, series: "Series", key): - loc = self.get_loc(key) - return series.iloc[loc] - def _convert_slice_indexer(self, key: slice, kind=None): if not (key.step is None or key.step == 1): raise ValueError("cannot support not-default step in a slice") From a572171435766afd7caf48a0831f8c87fed593e3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Jan 2020 13:50:36 -0800 Subject: [PATCH 4/5] handle IntervalArray via holds_integer --- pandas/core/indexes/extension.py | 2 +- pandas/core/indexes/interval.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 9f299c1c1fbe7..7aadc8f9fa42e 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -296,7 +296,7 @@ def get_value(self, series: "Series", key): loc = self.get_loc(key) except KeyError: # e.g. DatetimeIndex doesn't hold integers - if is_integer(key): + if is_integer(key) and not self.holds_integer(): # Fall back to positional loc = key else: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3b3074dc8169c..e41a208524d2f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1,7 +1,7 @@ """ define the IntervalIndex """ from operator import le, lt import textwrap -from typing import TYPE_CHECKING, Any, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np @@ -57,10 +57,6 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset -if TYPE_CHECKING: - from pandas import Series - - _VALID_CLOSED = {"left", "right", "both", "neither"} _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -526,6 +522,10 @@ def is_overlapping(self) -> bool: # GH 23309 return self._engine.is_overlapping + def holds_integer(self): + return self.dtype.subtype.kind not in ["m", "M"] + # TODO: There must already exist something for this? + @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): if kind == "iloc": From af308b22f4e44cfa41c3d581885ba581530b2d28 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Jan 2020 20:55:08 -0800 Subject: [PATCH 5/5] restore missing import --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2d8afb3604c04..f682b38dc7c42 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,4 +1,4 @@ -from datetime import time, timedelta, tzinfo +from datetime import datetime, time, timedelta, tzinfo import operator from typing import Optional import warnings