diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 407aab4bb1f1b..04974f05164f8 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -45,6 +45,7 @@ objects. api.extensions.ExtensionArray.argsort api.extensions.ExtensionArray.astype api.extensions.ExtensionArray.copy + api.extensions.ExtensionArray.view api.extensions.ExtensionArray.dropna api.extensions.ExtensionArray.factorize api.extensions.ExtensionArray.fillna diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e517be4f03a16..41d84b0ae4853 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -64,6 +64,7 @@ class ExtensionArray: shift take unique + view _concat_same_type _formatter _formatting_values @@ -147,7 +148,7 @@ class ExtensionArray: If implementing NumPy's ``__array_ufunc__`` interface, pandas expects that - 1. You defer by raising ``NotImplemented`` when any Series are present + 1. You defer by returning ``NotImplemented`` when any Series are present in `inputs`. Pandas will extract the arrays and call the ufunc again. 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. Pandas inspect this to determine whether the ufunc is valid for the @@ -862,6 +863,27 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) + def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: + """ + Return a view on the array. + + Parameters + ---------- + dtype : str, np.dtype, or ExtensionDtype, optional + Default None + + Returns + ------- + ExtensionArray + """ + # NB: + # - This must return a *new* object referencing the same data, not self. + # - The only case that *must* be implemented is with dtype=None, + # giving a view with the same dtype as self. + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] + # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b16217d5d0a32..e56f623962fa3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -517,19 +517,12 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: return self._set_dtype(dtype) return np.array(self, dtype=dtype, copy=copy) - @cache_readonly - def ndim(self) -> int: - """ - Number of dimensions of the Categorical - """ - return self._codes.ndim - @cache_readonly def size(self) -> int: """ return the len of myself """ - return len(self) + return self._codes.size @cache_readonly def itemsize(self) -> int: @@ -1764,18 +1757,10 @@ def ravel(self, order="C"): ) return np.array(self) - def view(self): - """ - Return a view of myself. - - For internal compatibility with numpy arrays. - - Returns - ------- - view : Categorical - Returns `self`! - """ - return self + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError(dtype) + return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True) def to_dense(self): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 47b138a9e1604..695138ca07f77 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -554,18 +554,8 @@ def astype(self, dtype, copy=True): return np.asarray(self, dtype=dtype) def view(self, dtype=None): - """ - New view on this array with the same data. - - Parameters - ---------- - dtype : numpy dtype, optional - - Returns - ------- - ndarray - With the specified `dtype`. - """ + if dtype is None or dtype is self.dtype: + return type(self)(self._data, dtype=self.dtype) return self._data.view(dtype=dtype) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2b3c02bd1cade..9a1ed79a99146 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -739,18 +739,14 @@ def isna(self): return isna(self.left) @property - def nbytes(self): + def nbytes(self) -> int: return self.left.nbytes + self.right.nbytes @property - def size(self): + def size(self) -> int: # Avoid materializing self.values return self.left.size - @property - def shape(self): - return self.left.shape - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 39529177b9e35..e8397341a1a1d 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -241,11 +241,11 @@ def __setitem__(self, key, value): else: self._ndarray[key] = value - def __len__(self): + def __len__(self) -> int: return len(self._ndarray) @property - def nbytes(self): + def nbytes(self) -> int: return self._ndarray.nbytes def isna(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 47c7c72051150..7bd57c9c6ed32 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -839,7 +839,7 @@ def fill_value(self, value): self._dtype = SparseDtype(self.dtype.subtype, value) @property - def kind(self): + def kind(self) -> str: """ The kind of sparse index for this array. One of {'integer', 'block'}. """ @@ -854,7 +854,7 @@ def _valid_sp_values(self): mask = notna(sp_vals) return sp_vals[mask] - def __len__(self): + def __len__(self) -> int: return self.sp_index.length @property @@ -868,7 +868,7 @@ def _fill_value_matches(self, fill_value): return self.fill_value == fill_value @property - def nbytes(self): + def nbytes(self) -> int: return self.sp_values.nbytes + self.sp_index.nbytes @property @@ -886,7 +886,7 @@ def density(self): return r @property - def npoints(self): + def npoints(self) -> int: """ The number of non- ``fill_value`` points. diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index cc0deca765b41..9c53210b75d6b 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -41,6 +41,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index dee8021f5375f..a29f6deeffae6 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -75,3 +75,18 @@ def test_copy(self, data): data[1] = data[0] assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] + + # check specifically that the `dtype` kwarg is accepted + data.view(dtype=None) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index c28ff956a33a4..a1988744d76a1 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -137,11 +137,11 @@ def __setitem__(self, key, value): value = decimal.Decimal(value) self._data[key] = value - def __len__(self): + def __len__(self) -> int: return len(self._data) @property - def nbytes(self): + def nbytes(self) -> int: n = len(self) if n: return n * sys.getsizeof(self[0]) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 21c4ac8f055a2..b64ddbd6ac84d 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -80,6 +80,9 @@ def __getitem__(self, item): elif isinstance(item, abc.Iterable): # fancy indexing return type(self)([self.data[i] for i in item]) + elif isinstance(item, slice) and item == slice(None): + # Make sure we get a view + return type(self)(self.data) else: # slice return type(self)(self.data[item]) @@ -103,11 +106,11 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __len__(self): + def __len__(self) -> int: return len(self.data) @property - def nbytes(self): + def nbytes(self) -> int: return sys.getsizeof(self.data) def isna(self): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 1aab71286b4a6..4fdcf930d224f 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -95,7 +95,10 @@ class TestGrouping(BaseInterval, base.BaseGroupbyTests): class TestInterface(BaseInterval, base.BaseInterfaceTests): - pass + def test_view(self, data): + # __setitem__ incorrectly makes a copy (GH#27147), so we only + # have a smoke-test + data.view() class TestReduce(base.BaseNoReduceTests): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 84d59902d2aa7..6ebe71e173ec2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -103,6 +103,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass