From 2b21be8aefc64844ee119109e9ff378340e7857a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 14:37:11 -0800 Subject: [PATCH 1/5] CLN: de-duplicate indexes methods --- pandas/core/indexes/category.py | 11 ----------- pandas/core/indexes/datetimelike.py | 8 ++++++-- pandas/core/indexes/datetimes.py | 12 ++++-------- pandas/core/indexes/extension.py | 5 +++++ pandas/core/indexes/interval.py | 5 ----- pandas/core/indexes/period.py | 9 --------- pandas/core/indexes/timedeltas.py | 7 ------- 7 files changed, 15 insertions(+), 42 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 9ed977ad1e52e..805ece6591a64 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -6,7 +6,6 @@ from pandas._config import get_option from pandas._libs import index as libindex -from pandas._libs.hashtable import duplicated_int64 from pandas._libs.lib import no_default from pandas._typing import ArrayLike, Label from pandas.util._decorators import Appender, cache_readonly, doc @@ -358,11 +357,6 @@ def values(self): """ return the underlying data, which is a Categorical """ return self._data - @property - def _has_complex_internals(self) -> bool: - # used to avoid libreduction code paths, which raise or require conversion - return True - @doc(Index.__contains__) def __contains__(self, key: Any) -> bool: # if key is a NaN, check if any NaN is in self. @@ -399,11 +393,6 @@ def unique(self, level=None): # of result, not self. return type(self)._simple_new(result, name=self.name) - @doc(Index.duplicated) - def duplicated(self, keep="first"): - codes = self.codes.astype("i8") - return duplicated_int64(codes, keep) - def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.astype("object") diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f80254b91231a..cdfd659c0f81d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -10,7 +10,6 @@ from pandas._libs.tslibs import BaseOffset, Resolution, Tick from pandas._typing import Callable, Label from pandas.compat.numpy import function as nv -from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( @@ -399,7 +398,7 @@ def _format_with_header( @property def _formatter_func(self): - raise AbstractMethodError(self) + return self._data._formatter() def _format_attrs(self): """ @@ -692,6 +691,11 @@ def _with_freq(self, freq): arr = self._data._with_freq(freq) return type(self)._simple_new(arr, name=self.name) + @property + def _has_complex_internals(self) -> bool: + # used to avoid libreduction code paths, which raise or require conversion + return False + # -------------------------------------------------------------------- # Set Operation Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e262d33e1aaf0..7093d0977620b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -20,11 +20,8 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, - is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_float, - is_integer, is_scalar, ) from pandas.core.dtypes.missing import is_valid_nat_for_dtype @@ -354,8 +351,6 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? """ - if not is_datetime64_any_dtype(dtype): - return False if self.tz is not None: # If we have tz, we can compare to tzaware return is_datetime64tz_dtype(dtype) @@ -720,9 +715,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): """ assert kind in ["loc", "getitem", None] - if is_float(label) or isinstance(label, time) or is_integer(label): - self._invalid_indexer("slice", label) - if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) parsed, reso = parsing.parse_time_string(label, freq) @@ -739,6 +731,9 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): return lower if side == "left" else upper elif isinstance(label, (self._data._recognized_scalars, date)): self._deprecate_mismatched_indexing(label) + else: + self._invalid_indexer("slice", label) + return self._maybe_cast_for_get_loc(label) def _get_string_slice(self, key: str): @@ -863,6 +858,7 @@ def indexer_at_time(self, time, asof=False): raise ValueError("Index must be timezone aware.") time_micros = self.tz_convert(time.tzinfo)._get_time_micros() else: + # TODO: should check for self.tz being None? time_micros = self._get_time_micros() micros = _time_to_micros(time) return (micros == time_micros).nonzero()[0] diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index c117c32f26d25..5fde3d244d6ff 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -211,6 +211,11 @@ class ExtensionIndex(Index): __le__ = _make_wrapped_comparison_op("__le__") __ge__ = _make_wrapped_comparison_op("__ge__") + @property + def _has_complex_internals(self) -> bool: + # used to avoid libreduction code paths, which raise or require conversion + return True + # --------------------------------------------------------------------- # NDarray-Like Methods diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b0f8be986fe5d..cb5de046599b9 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -373,11 +373,6 @@ def values(self) -> IntervalArray: """ return self._data - @property - def _has_complex_internals(self) -> bool: - # used to avoid libreduction code paths, which raise or require conversion - return True - def __array_wrap__(self, result, context=None): # we don't want the superclass implementation return result diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0b0f985697da9..38abc18b5f1cb 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -251,11 +251,6 @@ def __new__( def values(self) -> np.ndarray: return np.asarray(self) - @property - def _has_complex_internals(self) -> bool: - # used to avoid libreduction code paths, which raise or require conversion - return True - def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq @@ -307,10 +302,6 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return self.astype(object)._values - @property - def _formatter_func(self): - return self._data._formatter(boxed=False) - # ------------------------------------------------------------------------ # Indexing diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 8ce04b107d23b..6ae10ad2f5da2 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -157,13 +157,6 @@ def __new__( ) return cls._simple_new(tdarr, name=name) - # ------------------------------------------------------------------- - # Rendering Methods - - @property - def _formatter_func(self): - return self._data._formatter() - # ------------------------------------------------------------------- @doc(Index.astype) From 97afb58fa6f2ed08667310e14951aea23f99cd38 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 15:06:08 -0800 Subject: [PATCH 2/5] shallow_copy->simple_new --- pandas/core/indexes/category.py | 4 ++-- pandas/core/indexes/datetimes.py | 1 - pandas/core/indexes/multi.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 805ece6591a64..6c77a45882818 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -471,7 +471,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): new_target = np.asarray(new_target) if is_categorical_dtype(target): new_target = Categorical(new_target, dtype=target.dtype) - new_target = target._shallow_copy(new_target, name=self.name) + new_target = type(self)._simple_new(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) @@ -495,7 +495,7 @@ def _reindex_non_unique(self, target): # .reindex returns normal Index. Revert to CategoricalIndex if # all targets are included in my categories new_target = Categorical(new_target, dtype=self.dtype) - new_target = self._shallow_copy(new_target) + new_target = type(self)._simple_new(new_target, name=self.name) return new_target, indexer, new_indexer diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7093d0977620b..4bafda9c0a611 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -858,7 +858,6 @@ def indexer_at_time(self, time, asof=False): raise ValueError("Index must be timezone aware.") time_micros = self.tz_convert(time.tzinfo)._get_time_micros() else: - # TODO: should check for self.tz being None? time_micros = self._get_time_micros() micros = _time_to_micros(time) return (micros == time_micros).nonzero()[0] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 95f14bb643744..9354be5ed94d9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3631,7 +3631,7 @@ def astype(self, dtype, copy=True): return self._shallow_copy() return self - def _validate_insert_value(self, item): + def _validate_fill_value(self, item): if not isinstance(item, tuple): # Pad the key with empty strings if lower levels of the key # aren't specified: @@ -3654,7 +3654,7 @@ def insert(self, loc: int, item): ------- new_index : Index """ - item = self._validate_insert_value(item) + item = self._validate_fill_value(item) new_levels = [] new_codes = [] From ac8808e4f7c8d093c3fdeb8ad89cc202df8b6dbe Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 17:53:25 -0800 Subject: [PATCH 3/5] REF: share _shallow_copy --- pandas/core/indexes/category.py | 18 ++++++++++++------ pandas/core/indexes/datetimelike.py | 10 ---------- pandas/core/indexes/extension.py | 19 +++++++++++++++++-- pandas/core/indexes/interval.py | 17 ++--------------- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 6c77a45882818..199859d648a21 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_categorical_dtype, - is_list_like, is_scalar, ) from pandas.core.dtypes.dtypes import CategoricalDtype @@ -225,9 +224,14 @@ def _simple_new(cls, values: Categorical, name: Label = None): # -------------------------------------------------------------------- + # error: Argument 1 of "_shallow_copy" is incompatible with supertype + # "ExtensionIndex"; supertype defines the argument type as + # "Optional[ExtensionArray]" [override] @doc(Index._shallow_copy) - def _shallow_copy( - self, values: Optional[Categorical] = None, name: Label = no_default + def _shallow_copy( # type:ignore[override] + self, + values: Optional[Categorical] = None, + name: Label = no_default, ): name = self.name if name is no_default else name @@ -239,13 +243,17 @@ def _shallow_copy( return super()._shallow_copy(values=values, name=name) - def _is_dtype_compat(self, other) -> Categorical: + def _is_dtype_compat(self, other: Index) -> Categorical: """ *this is an internal non-public method* provide a comparison between the dtype of self and other (coercing if needed) + Parameters + ---------- + other : Index + Returns ------- Categorical @@ -262,8 +270,6 @@ def _is_dtype_compat(self, other) -> Categorical: ) else: values = other - if not is_list_like(values): - values = [values] cat = Categorical(other, dtype=self.dtype) other = CategoricalIndex(cat) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cdfd659c0f81d..f0b37b810b28a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -123,16 +123,6 @@ def _simple_new( def _is_all_dates(self) -> bool: return True - def _shallow_copy(self, values=None, name: Label = lib.no_default): - name = self.name if name is lib.no_default else name - - if values is not None: - return self._simple_new(values, name=name) - - result = self._simple_new(self._data, name=name) - result._cache = self._cache - return result - # ------------------------------------------------------------------------ # Abstract data attributes diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 5fde3d244d6ff..00bebe3c5c934 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,10 +1,12 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ -from typing import List, TypeVar +from typing import List, Optional, TypeVar import numpy as np +from pandas._libs import lib +from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc @@ -211,6 +213,19 @@ class ExtensionIndex(Index): __le__ = _make_wrapped_comparison_op("__le__") __ge__ = _make_wrapped_comparison_op("__ge__") + @doc(Index._shallow_copy) + def _shallow_copy( + self, values: Optional[ExtensionArray] = None, name: Label = lib.no_default + ): + name = self.name if name is lib.no_default else name + + if values is not None: + return self._simple_new(values, name=name) + + result = self._simple_new(self._data, name=name) + result._cache = self._cache + return result + @property def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion @@ -256,7 +271,7 @@ def _get_engine_target(self) -> np.ndarray: def repeat(self, repeats, axis=None): nv.validate_repeat(tuple(), dict(axis=axis)) result = self._data.repeat(repeats, axis=axis) - return self._shallow_copy(result) + return type(self)._simple_new(result, name=self.name) def insert(self, loc: int, item): # ExtensionIndex subclasses must override Index.insert diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index cb5de046599b9..ca78a6d319ea6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -320,19 +320,6 @@ def from_tuples( # -------------------------------------------------------------------- - @Appender(Index._shallow_copy.__doc__) - def _shallow_copy( - self, values: Optional[IntervalArray] = None, name: Label = lib.no_default - ): - name = self.name if name is lib.no_default else name - - if values is not None: - return self._simple_new(values, name=name) - - result = self._simple_new(self._data, name=name) - result._cache = self._cache - return result - @cache_readonly def _engine(self): left = self._maybe_convert_i8(self.left) @@ -872,7 +859,7 @@ def delete(self, loc): new_left = self.left.delete(loc) new_right = self.right.delete(loc) result = IntervalArray.from_arrays(new_left, new_right, closed=self.closed) - return self._shallow_copy(result) + return type(self)._simple_new(result, name=self.name) def insert(self, loc, item): """ @@ -894,7 +881,7 @@ def insert(self, loc, item): new_left = self.left.insert(loc, left_insert) new_right = self.right.insert(loc, right_insert) result = IntervalArray.from_arrays(new_left, new_right, closed=self.closed) - return self._shallow_copy(result) + return type(self)._simple_new(result, name=self.name) # -------------------------------------------------------------------- # Rendering Methods From 6629f5d5c7b7f8972e725382d79484dcc2fca61c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 19:17:34 -0800 Subject: [PATCH 4/5] mypy fixup --- pandas/core/indexes/category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 199859d648a21..413c8f6b45275 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -243,7 +243,7 @@ def _shallow_copy( # type:ignore[override] return super()._shallow_copy(values=values, name=name) - def _is_dtype_compat(self, other: Index) -> Categorical: + def _is_dtype_compat(self, other) -> Categorical: """ *this is an internal non-public method* From 3388f2b8694aaa43174b6caed7346c73a536a970 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 20:41:41 -0800 Subject: [PATCH 5/5] dummy commit to force CI