From e90c504d49ee97002c42556504d4215a3622dc5a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Oct 2020 19:51:02 -0700 Subject: [PATCH 1/4] REF: share putmask, insert, delete --- pandas/core/indexes/category.py | 53 +---------------- pandas/core/indexes/datetimelike.py | 90 ++++++++++++++--------------- pandas/core/indexes/extension.py | 58 +++++++++++++++++++ pandas/core/indexes/interval.py | 2 +- 4 files changed, 105 insertions(+), 98 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ebe1ddb07cad0..15dbbcf8ef239 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -27,7 +27,7 @@ from pandas.core.construction import extract_array import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name -from pandas.core.indexes.extension import ExtensionIndex, inherit_names +from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names import pandas.core.missing as missing from pandas.core.ops import get_op_result_name @@ -66,7 +66,7 @@ typ="method", overwrite=True, ) -class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): +class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate): """ Index based on an underlying :class:`Categorical`. @@ -421,17 +421,6 @@ def where(self, cond, other=None): cat = Categorical(values, dtype=self.dtype) return type(self)._simple_new(cat, name=self.name) - def putmask(self, mask, value): - try: - code_value = self._data._validate_where_value(value) - except (TypeError, ValueError): - return self.astype(object).putmask(mask, value) - - codes = self._data._ndarray.copy() - np.putmask(codes, mask, code_value) - cat = self._data._from_backing_data(codes) - return type(self)._simple_new(cat, name=self.name) - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) @@ -661,44 +650,6 @@ def map(self, mapper): mapped = self._values.map(mapper) return Index(mapped, name=self.name) - def delete(self, loc): - """ - Make new Index with passed location(-s) deleted - - Returns - ------- - new_index : Index - """ - codes = np.delete(self.codes, loc) - cat = self._data._from_backing_data(codes) - return type(self)._simple_new(cat, name=self.name) - - def insert(self, loc: int, item): - """ - Make new Index inserting new item at location. Follows - Python list.append semantics for negative values - - Parameters - ---------- - loc : int - item : object - - Returns - ------- - new_index : Index - - Raises - ------ - ValueError if the item is not in the categories - - """ - code = self._data._validate_insert_value(item) - - codes = self.codes - codes = np.concatenate((codes[:loc], [code], codes[loc:])) - cat = self._data._from_backing_data(codes) - return type(self)._simple_new(cat, name=self.name) - def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 863880e222b5d..559bc1c4c73e2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -34,7 +34,7 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.extension import ( - ExtensionIndex, + NDArrayBackedExtensionIndex, inherit_names, make_wrapped_arith_op, ) @@ -83,7 +83,7 @@ def wrapper(left, right): cache=True, ) @inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin) -class DatetimeIndexOpsMixin(ExtensionIndex): +class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ @@ -191,7 +191,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) - result = ExtensionIndex.take( + result = NDArrayBackedExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) if isinstance(maybe_slice, slice): @@ -496,17 +496,6 @@ def where(self, cond, other=None): arr = self._data._from_backing_data(result) return type(self)._simple_new(arr, name=self.name) - def putmask(self, mask, value): - try: - value = self._data._validate_where_value(value) - except (TypeError, ValueError): - return self.astype(object).putmask(mask, value) - - result = self._data._ndarray.copy() - np.putmask(result, mask, value) - arr = self._data._from_backing_data(result) - return type(self)._simple_new(arr, name=self.name) - def _summary(self, name=None) -> str: """ Return a summarized representation. @@ -575,39 +564,30 @@ def shift(self, periods=1, freq=None): # -------------------------------------------------------------------- # List-like Methods - def delete(self, loc): - new_i8s = np.delete(self.asi8, loc) - + def _get_delete_freq(self, loc: int): + """ + Find the `freq` for self.delete(loc). + """ freq = None if is_period_dtype(self.dtype): freq = self.freq - elif is_integer(loc): - if loc in (0, -len(self), -1, len(self) - 1): - freq = self.freq - else: - if is_list_like(loc): - loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) - if isinstance(loc, slice) and loc.step in (1, None): - if loc.start in (0, None) or loc.stop in (len(self), None): + elif self.freq is not None: + if is_integer(loc): + if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq + else: + if is_list_like(loc): + loc = lib.maybe_indices_to_slice( + ensure_int64(np.array(loc)), len(self) + ) + if isinstance(loc, slice) and loc.step in (1, None): + if loc.start in (0, None) or loc.stop in (len(self), None): + freq = self.freq + return freq - arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) - return type(self)._simple_new(arr, name=self.name) - - def insert(self, loc: int, item): + def _get_insert_freq(self, loc, item): """ - Make new Index inserting new item at location - - Parameters - ---------- - loc : int - item : object - if not either a Python datetime or a numpy integer-like, returned - Index dtype will be object rather than datetime. - - Returns - ------- - new_index : Index + Find the `freq` for self.insert(loc, item). """ value = self._data._validate_insert_value(item) item = self._data._box_func(value) @@ -628,14 +608,32 @@ def insert(self, loc: int, item): # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq + return freq + + def delete(self, loc): + result = super().delete(loc) + result._data._freq = self._get_delete_freq(loc) + return result + + def insert(self, loc: int, item): + """ + Make new Index inserting new item at location - arr = self._data + Parameters + ---------- + loc : int + item : object + if not either a Python datetime or a numpy integer-like, returned + Index dtype will be object rather than datetime. - new_values = np.concatenate([arr._ndarray[:loc], [value], arr._ndarray[loc:]]) - new_arr = self._data._from_backing_data(new_values) - new_arr._freq = freq + Returns + ------- + new_index : Index + """ + result = super().insert(loc, item) - return type(self)._simple_new(new_arr, name=self.name) + result._data._freq = self._get_insert_freq(loc, item) + return result # -------------------------------------------------------------------- # Join/Set Methods diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 4da1a43468b57..5a7f26ab9ac84 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.arrays import ExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.base import Index from pandas.core.ops import get_op_result_name @@ -281,3 +282,60 @@ def astype(self, dtype, copy=True): @cache_readonly def _isnan(self) -> np.ndarray: return self._data.isna() + + +class NDArrayBackedExtensionIndex(ExtensionIndex): + """ + Index subclass for indexes backed by NDArrayBackedExtensionArray. + """ + + _data: NDArrayBackedExtensionArray + + def delete(self, loc): + """ + Make new Index with passed location(-s) deleted + + Returns + ------- + new_index : Index + """ + new_vals = np.delete(self._data._ndarray, loc) + arr = self._data._from_backing_data(new_vals) + return type(self)._simple_new(arr, name=self.name) + + def insert(self, loc: int, item): + """ + Make new Index inserting new item at location. Follows + Python list.append semantics for negative values + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + + Raises + ------ + ValueError if the item is not in the categories + """ + # FIXME: docstring is Categorical-specific + arr = self._data + code = arr._validate_insert_value(item) + + new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:])) + new_arr = arr._from_backing_data(new_vals) + return type(self)._simple_new(new_arr, name=self.name) + + def putmask(self, mask, value): + try: + value = self._data._validate_where_value(value) + except (TypeError, ValueError): + return self.astype(object).putmask(mask, value) + + new_values = self._data._ndarray.copy() + np.putmask(new_values, mask, value) + new_arr = self._data._from_backing_data(new_values) + return type(self)._simple_new(new_arr, name=self.name) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3ffb1160c14ce..c2ed098bfc8ef 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -871,7 +871,7 @@ def where(self, cond, other=None): other = self._na_value values = np.where(cond, self._values, other) result = IntervalArray(values) - return self._shallow_copy(result) + return type(self)._simple_new(result, name=self.name) def delete(self, loc): """ From 7a3fdd0ae270bda16334cbaba6d6376f7bb541c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Oct 2020 15:15:44 -0700 Subject: [PATCH 2/4] update docstrings --- pandas/core/indexes/extension.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 5a7f26ab9ac84..1f26ceaf2d1b7 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -306,7 +306,7 @@ def delete(self, loc): def insert(self, loc: int, item): """ Make new Index inserting new item at location. Follows - Python list.append semantics for negative values + Python list.append semantics for negative values. Parameters ---------- @@ -319,9 +319,8 @@ def insert(self, loc: int, item): Raises ------ - ValueError if the item is not in the categories + ValueError if the item is not valid for this dtype. """ - # FIXME: docstring is Categorical-specific arr = self._data code = arr._validate_insert_value(item) From a9600146e449670b42a1e71926dd4ded2ed00b2d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Oct 2020 18:24:35 -0700 Subject: [PATCH 3/4] docstring --- pandas/core/indexes/datetimelike.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 0963c3c64b243..751eafaa0d78e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -610,26 +610,14 @@ def _get_insert_freq(self, loc, item): freq = self.freq return freq + @doc(NDArrayBackedExtensionIndex.delete) def delete(self, loc): result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result + @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): - """ - Make new Index inserting new item at location - - Parameters - ---------- - loc : int - item : object - if not either a Python datetime or a numpy integer-like, returned - Index dtype will be object rather than datetime. - - Returns - ------- - new_index : Index - """ result = super().insert(loc, item) result._data._freq = self._get_insert_freq(loc, item) From 6c3eaa5bb8e0b57bfa9ae64df9211360af0a7a57 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Oct 2020 18:54:31 -0700 Subject: [PATCH 4/4] missing attr --- pandas/core/arrays/_mixins.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 726ca0ce4d776..3eb4615f1fe3e 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -44,6 +44,10 @@ def _box_func(self, x): """ return x + def _validate_insert_value(self, value): + # used by NDArrayBackedExtensionIndex.insert + raise AbstractMethodError(self) + # ------------------------------------------------------------------------ def take(