Skip to content

REF: share more methods in ExtensionIndex #37970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 13 additions & 18 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
from pandas._config import get_option

from pandas._libs import index as libindex
from pandas._libs.hashtable import duplicated_int64
from pandas._libs.lib import no_default
from pandas._typing import ArrayLike, Label
from pandas.util._decorators import Appender, cache_readonly, doc

from pandas.core.dtypes.common import (
ensure_platform_int,
is_categorical_dtype,
is_list_like,
is_scalar,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
Expand Down Expand Up @@ -226,9 +224,14 @@ def _simple_new(cls, values: Categorical, name: Label = None):

# --------------------------------------------------------------------

# error: Argument 1 of "_shallow_copy" is incompatible with supertype
# "ExtensionIndex"; supertype defines the argument type as
# "Optional[ExtensionArray]" [override]
@doc(Index._shallow_copy)
def _shallow_copy(
self, values: Optional[Categorical] = None, name: Label = no_default
def _shallow_copy( # type:ignore[override]
self,
values: Optional[Categorical] = None,
name: Label = no_default,
):
name = self.name if name is no_default else name

Expand All @@ -247,6 +250,10 @@ def _is_dtype_compat(self, other) -> Categorical:
provide a comparison between the dtype of self and other (coercing if
needed)

Parameters
----------
other : Index

Returns
-------
Categorical
Expand All @@ -263,8 +270,6 @@ def _is_dtype_compat(self, other) -> Categorical:
)
else:
values = other
if not is_list_like(values):
values = [values]

cat = Categorical(other, dtype=self.dtype)
other = CategoricalIndex(cat)
Expand Down Expand Up @@ -358,11 +363,6 @@ def values(self):
""" return the underlying data, which is a Categorical """
return self._data

@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True

@doc(Index.__contains__)
def __contains__(self, key: Any) -> bool:
# if key is a NaN, check if any NaN is in self.
Expand Down Expand Up @@ -399,11 +399,6 @@ def unique(self, level=None):
# of result, not self.
return type(self)._simple_new(result, name=self.name)

@doc(Index.duplicated)
def duplicated(self, keep="first"):
codes = self.codes.astype("i8")
return duplicated_int64(codes, keep)

def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.astype("object")
Expand Down Expand Up @@ -482,7 +477,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
new_target = np.asarray(new_target)
if is_categorical_dtype(target):
new_target = Categorical(new_target, dtype=target.dtype)
new_target = target._shallow_copy(new_target, name=self.name)
new_target = type(self)._simple_new(new_target, name=self.name)
else:
new_target = Index(new_target, name=self.name)

Expand All @@ -506,7 +501,7 @@ def _reindex_non_unique(self, target):
# .reindex returns normal Index. Revert to CategoricalIndex if
# all targets are included in my categories
new_target = Categorical(new_target, dtype=self.dtype)
new_target = self._shallow_copy(new_target)
new_target = type(self)._simple_new(new_target, name=self.name)

return new_target, indexer, new_indexer

Expand Down
18 changes: 6 additions & 12 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from pandas._libs.tslibs import BaseOffset, Resolution, Tick
from pandas._typing import Callable, Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, cache_readonly, doc

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -124,16 +123,6 @@ def _simple_new(
def _is_all_dates(self) -> bool:
return True

def _shallow_copy(self, values=None, name: Label = lib.no_default):
name = self.name if name is lib.no_default else name

if values is not None:
return self._simple_new(values, name=name)

result = self._simple_new(self._data, name=name)
result._cache = self._cache
return result

# ------------------------------------------------------------------------
# Abstract data attributes

Expand Down Expand Up @@ -399,7 +388,7 @@ def _format_with_header(

@property
def _formatter_func(self):
raise AbstractMethodError(self)
return self._data._formatter()

def _format_attrs(self):
"""
Expand Down Expand Up @@ -692,6 +681,11 @@ def _with_freq(self, freq):
arr = self._data._with_freq(freq)
return type(self)._simple_new(arr, name=self.name)

@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return False

# --------------------------------------------------------------------
# Set Operation Methods

Expand Down
11 changes: 3 additions & 8 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_float,
is_integer,
is_scalar,
)
from pandas.core.dtypes.missing import is_valid_nat_for_dtype
Expand Down Expand Up @@ -354,8 +351,6 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
if not is_datetime64_any_dtype(dtype):
return False
if self.tz is not None:
# If we have tz, we can compare to tzaware
return is_datetime64tz_dtype(dtype)
Expand Down Expand Up @@ -720,9 +715,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
"""
assert kind in ["loc", "getitem", None]

if is_float(label) or isinstance(label, time) or is_integer(label):
self._invalid_indexer("slice", label)

if isinstance(label, str):
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
parsed, reso = parsing.parse_time_string(label, freq)
Expand All @@ -739,6 +731,9 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
return lower if side == "left" else upper
elif isinstance(label, (self._data._recognized_scalars, date)):
self._deprecate_mismatched_indexing(label)
else:
self._invalid_indexer("slice", label)

return self._maybe_cast_for_get_loc(label)

def _get_string_slice(self, key: str):
Expand Down
24 changes: 22 additions & 2 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""
Shared methods for Index subclasses backed by ExtensionArray.
"""
from typing import List, TypeVar
from typing import List, Optional, TypeVar

import numpy as np

from pandas._libs import lib
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly, doc
Expand Down Expand Up @@ -211,6 +213,24 @@ class ExtensionIndex(Index):
__le__ = _make_wrapped_comparison_op("__le__")
__ge__ = _make_wrapped_comparison_op("__ge__")

@doc(Index._shallow_copy)
def _shallow_copy(
self, values: Optional[ExtensionArray] = None, name: Label = lib.no_default
):
name = self.name if name is lib.no_default else name

if values is not None:
return self._simple_new(values, name=name)

result = self._simple_new(self._data, name=name)
result._cache = self._cache
return result

@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True

# ---------------------------------------------------------------------
# NDarray-Like Methods

Expand Down Expand Up @@ -251,7 +271,7 @@ def _get_engine_target(self) -> np.ndarray:
def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
result = self._data.repeat(repeats, axis=axis)
return self._shallow_copy(result)
return type(self)._simple_new(result, name=self.name)

def insert(self, loc: int, item):
# ExtensionIndex subclasses must override Index.insert
Expand Down
22 changes: 2 additions & 20 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,19 +320,6 @@ def from_tuples(

# --------------------------------------------------------------------

@Appender(Index._shallow_copy.__doc__)
def _shallow_copy(
self, values: Optional[IntervalArray] = None, name: Label = lib.no_default
):
name = self.name if name is lib.no_default else name

if values is not None:
return self._simple_new(values, name=name)

result = self._simple_new(self._data, name=name)
result._cache = self._cache
return result

@cache_readonly
def _engine(self):
left = self._maybe_convert_i8(self.left)
Expand Down Expand Up @@ -373,11 +360,6 @@ def values(self) -> IntervalArray:
"""
return self._data

@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True

def __array_wrap__(self, result, context=None):
# we don't want the superclass implementation
return result
Expand Down Expand Up @@ -893,7 +875,7 @@ def delete(self, loc):
new_left = self.left.delete(loc)
new_right = self.right.delete(loc)
result = IntervalArray.from_arrays(new_left, new_right, closed=self.closed)
return self._shallow_copy(result)
return type(self)._simple_new(result, name=self.name)

def insert(self, loc, item):
"""
Expand All @@ -915,7 +897,7 @@ def insert(self, loc, item):
new_left = self.left.insert(loc, left_insert)
new_right = self.right.insert(loc, right_insert)
result = IntervalArray.from_arrays(new_left, new_right, closed=self.closed)
return self._shallow_copy(result)
return type(self)._simple_new(result, name=self.name)

# --------------------------------------------------------------------
# Rendering Methods
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3629,7 +3629,7 @@ def astype(self, dtype, copy=True):
return self._shallow_copy()
return self

def _validate_insert_value(self, item):
def _validate_fill_value(self, item):
if not isinstance(item, tuple):
# Pad the key with empty strings if lower levels of the key
# aren't specified:
Expand All @@ -3652,7 +3652,7 @@ def insert(self, loc: int, item):
-------
new_index : Index
"""
item = self._validate_insert_value(item)
item = self._validate_fill_value(item)

new_levels = []
new_codes = []
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,6 @@ def __new__(
def values(self) -> np.ndarray:
return np.asarray(self)

@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True

def _maybe_convert_timedelta(self, other):
"""
Convert timedelta-like input to an integer multiple of self.freq
Expand Down Expand Up @@ -307,10 +302,6 @@ def _mpl_repr(self):
# how to represent ourselves to matplotlib
return self.astype(object)._values

@property
def _formatter_func(self):
return self._data._formatter(boxed=False)

# ------------------------------------------------------------------------
# Indexing

Expand Down
7 changes: 0 additions & 7 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,6 @@ def __new__(
)
return cls._simple_new(tdarr, name=name)

# -------------------------------------------------------------------
# Rendering Methods

@property
def _formatter_func(self):
return self._data._formatter()

# -------------------------------------------------------------------

@doc(Index.astype)
Expand Down