From d546e76bf34fea2f558823f8a731b4a459823dca Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Jan 2020 10:02:03 -0800 Subject: [PATCH 1/5] implement indexes.extension --- pandas/core/indexes/datetimelike.py | 72 +++++------------------------ pandas/core/indexes/datetimes.py | 26 +++++------ pandas/core/indexes/extension.py | 64 +++++++++++++++++++++++++ pandas/core/indexes/interval.py | 7 +-- pandas/core/indexes/timedeltas.py | 21 +++++---- 5 files changed, 102 insertions(+), 88 deletions(-) create mode 100644 pandas/core/indexes/extension.py diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f957860240dd2..eb1751cb582a0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -40,28 +40,9 @@ from pandas.tseries.frequencies import DateOffset, to_offset -_index_doc_kwargs = dict(ibase._index_doc_kwargs) - +from .extension import inherit_names -def ea_passthrough(array_method): - """ - Make an alias for a method of the underlying ExtensionArray. - - Parameters - ---------- - array_method : method on an Array class - - Returns - ------- - method - """ - - def method(self, *args, **kwargs): - return array_method(self._data, *args, **kwargs) - - method.__name__ = array_method.__name__ - method.__doc__ = array_method.__doc__ - return method +_index_doc_kwargs = dict(ibase._index_doc_kwargs) def _make_wrapped_arith_op(opname): @@ -100,6 +81,15 @@ def wrapper(left, right): return wrapper +@inherit_names( + ["inferred_freq", "_isnan", "_resolution", "resolution"], + DatetimeLikeArrayMixin, + cache=True, +) +@inherit_names( + ["__iter__", "mean", "freq", "freqstr", "_ndarray_values", "asi8", "_box_values"], + DatetimeLikeArrayMixin, +) class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ Common ops mixin to support a unified interface datetimelike Index. @@ -107,41 +97,13 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): _data: ExtensionArray - # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are - # properties there. They can be made into cache_readonly for Index - # subclasses bc they are immutable - inferred_freq = cache_readonly( - DatetimeLikeArrayMixin.inferred_freq.fget # type: ignore - ) - _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) # type: ignore hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) # type: ignore _hasnans = hasnans # for index / array -agnostic code - _resolution = cache_readonly( - DatetimeLikeArrayMixin._resolution.fget # type: ignore - ) - resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) # type: ignore - - __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) - mean = ea_passthrough(DatetimeLikeArrayMixin.mean) @property def is_all_dates(self) -> bool: return True - @property - def freq(self): - """ - Return the frequency object if it is set, otherwise None. - """ - return self._data.freq - - @property - def freqstr(self): - """ - Return the frequency object as a string if it is set, otherwise None. - """ - return self._data.freqstr - def unique(self, level=None): if level is not None: self._validate_index_level(level) @@ -172,10 +134,6 @@ def wrapper(self, other): wrapper.__name__ = f"__{op.__name__}__" return wrapper - @property - def _ndarray_values(self) -> np.ndarray: - return self._data._ndarray_values - # ------------------------------------------------------------------------ # Abstract data attributes @@ -184,11 +142,6 @@ def values(self): # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data - @property # type: ignore # https://github.com/python/mypy/issues/1362 - @Appender(DatetimeLikeArrayMixin.asi8.__doc__) - def asi8(self): - return self._data.asi8 - def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. @@ -248,9 +201,6 @@ def _ensure_localized( return type(self)._simple_new(result, name=self.name) return arg - def _box_values(self, values): - return self._data._box_values(values) - @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) def __contains__(self, key): try: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f6f46d7e66c69..510f11e1ce310 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -35,6 +35,7 @@ DatetimelikeDelegateMixin, DatetimeTimedeltaMixin, ) +from pandas.core.indexes.extension import inherit_names from pandas.core.ops import get_op_result_name import pandas.core.tools.datetimes as tools @@ -72,6 +73,7 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): "_local_timestamps", "_has_same_tz", "_format_native_types", + "__iter__", ] _extra_raw_properties = ["_box_func", "tz", "tzinfo", "dtype"] _delegated_properties = DatetimeArray._datetimelike_ops + _extra_raw_properties @@ -87,6 +89,17 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): _delegate_class = DatetimeArray +@inherit_names( + [ + "_bool_ops", + "_object_ops", + "_field_ops", + "_datetimelike_ops", + "_datetimelike_methods", + ], + DatetimeArray, +) +@inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True) @delegate_names( DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property" ) @@ -209,15 +222,6 @@ class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): _is_numeric_dtype = False _infer_as_myclass = True - # Use faster implementation given we know we have DatetimeArrays - __iter__ = DatetimeArray.__iter__ - # some things like freq inference make use of these attributes. - _bool_ops = DatetimeArray._bool_ops - _object_ops = DatetimeArray._object_ops - _field_ops = DatetimeArray._field_ops - _datetimelike_ops = DatetimeArray._datetimelike_ops - _datetimelike_methods = DatetimeArray._datetimelike_methods - tz: Optional[tzinfo] # -------------------------------------------------------------------- @@ -962,10 +966,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray - _timezone = cache_readonly(DatetimeArray._timezone.fget) # type: ignore - is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) # type: ignore - _resolution = cache_readonly(DatetimeArray._resolution.fget) # type: ignore - def __getitem__(self, key): result = self._data.__getitem__(key) if is_scalar(result): diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py new file mode 100644 index 0000000000000..5dc820439dd2f --- /dev/null +++ b/pandas/core/indexes/extension.py @@ -0,0 +1,64 @@ +""" +Shared methods for Index subclasses backed by ExtensionArray. +""" +from pandas.util._decorators import cache_readonly + +from .base import Index + +from pandas.core.arrays import ExtensionArray + + +def inherit_from_data(name, delegate, cache=False): + """ + Make an alias for a method of the underlying ExtensionArray. + + Parameters + ---------- + name : str + delegate : class + cache : bool, default False + Whether to convert wrapped properties into cache_readonly + + Returns + ------- + method, property, or cache_readonly + """ + attr = getattr(delegate, name) + + if isinstance(attr, property): + # TODO: are we getting the right name/doc here? + if cache: + method = cache_readonly(attr.fget) + + else: + @property + def method(self): + return getattr(self._data, name) + + @method.setter + def method(self, value): + setattr(self._data, name, value) + + elif not callable(attr): + # just a normal attribute, no wrapping + method = attr + + else: + def method(self, *args, **kwargs): + result = attr(self._data, *args, **kwargs) + return result + + method.__name__ = name + method.__doc__ = attr.__doc__ + return method + + +def inherit_names(names, delegate, cache=False): + def wrapper(cls): + for name in names: + meth = inherit_from_data(name, delegate, cache=cache) + setattr(cls, name, meth) + + return cls + + return wrapper diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index abc82dd3c73f5..349362f356b35 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -199,7 +199,7 @@ def func(intvidx_self, other, sort=False): ) @accessor.delegate_names( delegate=IntervalArray, - accessors=["__array__", "overlaps", "contains"], + accessors=["__array__", "overlaps", "contains", "__len__"], typ="method", overwrite=True, ) @@ -437,9 +437,6 @@ def set_closed(self, closed): array = self._data.set_closed(closed) return self._simple_new(array, self.name) # TODO: can we use _shallow_copy? - def __len__(self) -> int: - return len(self.left) - @cache_readonly def values(self): """ @@ -1051,7 +1048,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): result = self._data.take( indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs ) - attributes = self._get_attributes_dict() + attributes = self._get_attributes_dict() # TODO: shallow_copy? return self._simple_new(result, **attributes) def __getitem__(self, value): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 795b4836b9a2a..b7b5c19dbd0ba 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -31,6 +31,7 @@ DatetimelikeDelegateMixin, DatetimeTimedeltaMixin, ) +from pandas.core.indexes.extension import inherit_names from pandas.tseries.frequencies import to_offset @@ -52,6 +53,17 @@ class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): ) +@inherit_names( + [ + "_bool_ops", + "_object_ops", + "_field_ops", + "_datetimelike_ops", + "_datetimelike_methods", + "_other_ops", + ], + TimedeltaArray, +) @delegate_names( TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property" ) @@ -125,15 +137,6 @@ class TimedeltaIndex( _is_numeric_dtype = True _infer_as_myclass = True - _freq = None - - _bool_ops = TimedeltaArray._bool_ops - _object_ops = TimedeltaArray._object_ops - _field_ops = TimedeltaArray._field_ops - _datetimelike_ops = TimedeltaArray._datetimelike_ops - _datetimelike_methods = TimedeltaArray._datetimelike_methods - _other_ops = TimedeltaArray._other_ops - # ------------------------------------------------------------------- # Constructors From f736da2563f41dde218f7c465b0910df0008b8ba Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Jan 2020 11:43:00 -0800 Subject: [PATCH 2/5] remove comment --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 349362f356b35..e8a7305b4a568 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1048,7 +1048,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): result = self._data.take( indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs ) - attributes = self._get_attributes_dict() # TODO: shallow_copy? + attributes = self._get_attributes_dict() return self._simple_new(result, **attributes) def __getitem__(self, value): From 96628dd26836dcb4b1b83bf06463296c5e9c490c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Jan 2020 13:09:09 -0800 Subject: [PATCH 3/5] REF: use inherit_names for IntervalIndex cache_readonlys --- pandas/core/indexes/datetimelike.py | 7 ++++- pandas/core/indexes/extension.py | 19 ++++++------- pandas/core/indexes/interval.py | 42 +++-------------------------- 3 files changed, 20 insertions(+), 48 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index eb1751cb582a0..3549cfa3af507 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -2,7 +2,7 @@ Base and utility classes for tseries type pandas objects. """ import operator -from typing import List, Set +from typing import List, Optional, Set import numpy as np @@ -96,6 +96,11 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ _data: ExtensionArray + freq: Optional[DateOffset] + freqstr: Optional[str] + _resolution: int + _bool_ops: List[str] = [] + _field_ops: List[str] = [] hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) # type: ignore _hasnans = hasnans # for index / array -agnostic code diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 5dc820439dd2f..02acc63bb38ae 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -3,10 +3,6 @@ """ from pandas.util._decorators import cache_readonly -from .base import Index - -from pandas.core.arrays import ExtensionArray - def inherit_from_data(name, delegate, cache=False): """ @@ -23,27 +19,32 @@ def inherit_from_data(name, delegate, cache=False): ------- method, property, or cache_readonly """ + attr = getattr(delegate, name) if isinstance(attr, property): - # TODO: are we getting the right name/doc here? if cache: method = cache_readonly(attr.fget) else: - @property - def method(self): + + def fget(self): return getattr(self._data, name) - @method.setter - def method(self, value): + def fset(self, value): setattr(self._data, name, value) + fget.__name__ = name + fget.__doc__ = attr.__doc__ + + method = property(fget, fset) + elif not callable(attr): # just a normal attribute, no wrapping method = attr else: + def method(self, *args, **kwargs): result = attr(self._data, *args, **kwargs) return result diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e8a7305b4a568..7c16c9d6908b0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -58,6 +58,8 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset +from .extension import inherit_names + _VALID_CLOSED = {"left", "right", "both", "neither"} _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -199,10 +201,11 @@ def func(intvidx_self, other, sort=False): ) @accessor.delegate_names( delegate=IntervalArray, - accessors=["__array__", "overlaps", "contains", "__len__"], + accessors=["__array__", "overlaps", "contains", "__len__", "set_closed"], typ="method", overwrite=True, ) +@inherit_names(["is_non_overlapping_monotonic", "mid"], IntervalArray, cache=True) class IntervalIndex(IntervalMixin, Index, accessor.PandasDelegate): _typ = "intervalindex" _comparables = ["name"] @@ -412,31 +415,6 @@ def to_tuples(self, na_tuple=True): def _multiindex(self): return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) - @Appender( - _interval_shared_docs["set_closed"] - % dict( - klass="IntervalIndex", - examples=textwrap.dedent( - """\ - Examples - -------- - >>> index = pd.interval_range(0, 3) - >>> index - IntervalIndex([(0, 1], (1, 2], (2, 3]], - closed='right', - dtype='interval[int64]') - >>> index.set_closed('both') - IntervalIndex([[0, 1], [1, 2], [2, 3]], - closed='both', - dtype='interval[int64]') - """ - ), - ) - ) - def set_closed(self, closed): - array = self._data.set_closed(closed) - return self._simple_new(array, self.name) # TODO: can we use _shallow_copy? - @cache_readonly def values(self): """ @@ -487,13 +465,6 @@ def memory_usage(self, deep: bool = False) -> int: # so return the bytes here return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) - @cache_readonly - def mid(self): - """ - Return the midpoint of each Interval in the IntervalIndex as an Index. - """ - return self._data.mid - @cache_readonly def is_monotonic(self) -> bool: """ @@ -542,11 +513,6 @@ def is_unique(self): return True - @cache_readonly - @Appender(_interval_shared_docs["is_non_overlapping_monotonic"] % _index_doc_kwargs) - def is_non_overlapping_monotonic(self): - return self._data.is_non_overlapping_monotonic - @property def is_overlapping(self): """ From f231762fb5ab3459d634f0450998508a8dc2a6e1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Jan 2020 15:58:33 -0800 Subject: [PATCH 4/5] update docstrings, annotations --- pandas/core/indexes/extension.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 02acc63bb38ae..779cd8eac4eaf 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,23 +1,26 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ +from typing import List + from pandas.util._decorators import cache_readonly -def inherit_from_data(name, delegate, cache=False): +def inherit_from_data(name: str, delegate, cache: bool = False): """ Make an alias for a method of the underlying ExtensionArray. Parameters ---------- name : str + Name of an attribute the class should inherit from its EA parent. delegate : class cache : bool, default False Whether to convert wrapped properties into cache_readonly Returns ------- - method, property, or cache_readonly + attribute, method, property, or cache_readonly """ attr = getattr(delegate, name) @@ -54,7 +57,17 @@ def method(self, *args, **kwargs): return method -def inherit_names(names, delegate, cache=False): +def inherit_names(names: List[str], delegate, cache: bool = False): + """ + Class decorator to pin attributes from an ExtensionArray to a Index subclass. + + Parameters + ---------- + names : List[str] + delegate : class + cache : bool, default False + """ + def wrapper(cls): for name in names: meth = inherit_from_data(name, delegate, cache=cache) From 0ccb13eec9b157faf7bf52141389a16ea9822d03 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Jan 2020 16:55:26 -0800 Subject: [PATCH 5/5] confirm cache=True comes first --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b17f1ab530404..eefd33c7a9c34 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -89,6 +89,7 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): _delegate_class = DatetimeArray +@inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True) @inherit_names( [ "_bool_ops", @@ -99,7 +100,6 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): ], DatetimeArray, ) -@inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True) @delegate_names( DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property" )