Skip to content

REF: rename PandasArray->NumpyExtensionArray #54101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.api.indexers.VariableOffsetWindowIndexer \
pandas.api.extensions.ExtensionDtype \
pandas.api.extensions.ExtensionArray \
pandas.arrays.PandasArray \
pandas.arrays.NumpyExtensionArray \
pandas.api.extensions.ExtensionArray._accumulate \
pandas.api.extensions.ExtensionArray._concat_same_type \
pandas.api.extensions.ExtensionArray._formatter \
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ be located.

8) Is your test for one of the pandas-provided ExtensionArrays (``Categorical``,
``DatetimeArray``, ``TimedeltaArray``, ``PeriodArray``, ``IntervalArray``,
``PandasArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)?
``NumpyExtensionArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)?
This test likely belongs in one of:

- tests.arrays
Expand Down
2 changes: 1 addition & 1 deletion doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ objects.
:template: autosummary/class_without_autosummary.rst

api.extensions.ExtensionArray
arrays.PandasArray
arrays.NumpyExtensionArray

.. We need this autosummary so that methods and attributes are generated.
.. Separate block, since they aren't classes.
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor

Other API changes
^^^^^^^^^^^^^^^^^
- :class:`arrays.PandasArray` has been renamed ``NumpyExtensionArray`` and the attached dtype name changed from ``PandasDtype`` to ``NumpyEADtype``; importing ``PandasArray`` still works until the next major version (:issue:`53694`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
from pandas.core.arrays import (
BaseMaskedArray,
ExtensionArray,
PandasArray,
NumpyExtensionArray,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -307,7 +307,7 @@ def box_expected(expected, box_cls, transpose: bool = True):
if box_cls is pd.array:
if isinstance(expected, RangeIndex):
# pd.array would return an IntegerArray
expected = PandasArray(np.asarray(expected._values))
expected = NumpyExtensionArray(np.asarray(expected._values))
else:
expected = pd.array(expected, copy=False)
elif box_cls is Index:
Expand Down
6 changes: 3 additions & 3 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PandasDtype,
NumpyEADtype,
)
from pandas.core.dtypes.missing import array_equivalent

Expand Down Expand Up @@ -577,12 +577,12 @@ def raise_assert_detail(

if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif isinstance(left, (CategoricalDtype, PandasDtype, StringDtype)):
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
left = repr(left)

if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif isinstance(right, (CategoricalDtype, PandasDtype, StringDtype)):
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
right = repr(right)

msg += f"""
Expand Down
21 changes: 19 additions & 2 deletions pandas/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
FloatingArray,
IntegerArray,
IntervalArray,
PandasArray,
NumpyExtensionArray,
PeriodArray,
SparseArray,
StringArray,
Expand All @@ -28,9 +28,26 @@
"FloatingArray",
"IntegerArray",
"IntervalArray",
"PandasArray",
"NumpyExtensionArray",
"PeriodArray",
"SparseArray",
"StringArray",
"TimedeltaArray",
]


def __getattr__(name: str):
if name == "PandasArray":
# GH#53694
import warnings

from pandas.util._exceptions import find_stack_level

warnings.warn(
"PandasArray has been renamed NumpyExtensionArray. Use that "
"instead. This alias will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return NumpyExtensionArray
raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
BaseMaskedDtype,
CategoricalDtype,
ExtensionDtype,
PandasDtype,
NumpyEADtype,
)
from pandas.core.dtypes.generic import (
ABCDatetimeArray,
Expand Down Expand Up @@ -1439,8 +1439,8 @@ def diff(arr, n: int, axis: AxisInt = 0):
else:
op = operator.sub

if isinstance(dtype, PandasDtype):
# PandasArray cannot necessarily hold shifted versions of itself.
if isinstance(dtype, NumpyEADtype):
# NumpyExtensionArray cannot necessarily hold shifted versions of itself.
arr = arr.to_numpy()
dtype = arr.dtype

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.core.arrays.integer import IntegerArray
from pandas.core.arrays.interval import IntervalArray
from pandas.core.arrays.masked import BaseMaskedArray
from pandas.core.arrays.numpy_ import PandasArray
from pandas.core.arrays.numpy_ import NumpyExtensionArray
from pandas.core.arrays.period import (
PeriodArray,
period_array,
Expand All @@ -34,7 +34,7 @@
"FloatingArray",
"IntegerArray",
"IntervalArray",
"PandasArray",
"NumpyExtensionArray",
"PeriodArray",
"period_array",
"SparseArray",
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
func(npvalues, limit=limit, mask=mask.T)
npvalues = npvalues.T

# TODO: PandasArray didn't used to copy, need tests for this
# TODO: NumpyExtensionArray didn't used to copy, need tests
# for this
new_values = self._from_backing_data(npvalues)
else:
# fill with value
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2642,18 +2642,18 @@ def _str_map(
# Optimization to apply the callable `f` to the categories once
# and rebuild the result by `take`ing from the result with the codes.
# Returns the same type as the object-dtype implementation though.
from pandas.core.arrays import PandasArray
from pandas.core.arrays import NumpyExtensionArray

categories = self.categories
codes = self.codes
result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype)
result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
return take_nd(result, codes, fill_value=na_value)

def _str_get_dummies(self, sep: str = "|"):
# sep may not be in categories. Just bail on this.
from pandas.core.arrays import PandasArray
from pandas.core.arrays import NumpyExtensionArray

return PandasArray(self.astype(str))._str_get_dummies(sep)
return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep)

# ------------------------------------------------------------------------
# GroupBy Methods
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ def _validate_listlike(self, value, allow_object: bool = False):
msg = self._validation_error_message(value, True)
raise TypeError(msg)

# Do type inference if necessary up front (after unpacking PandasArray)
# Do type inference if necessary up front (after unpacking
# NumpyExtensionArray)
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
value = extract_array(value, extract_numpy=True)
value = pd_array(value)
Expand Down
44 changes: 24 additions & 20 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pandas.core.dtypes.astype import astype_array
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.dtypes import NumpyEADtype
from pandas.core.dtypes.missing import isna

from pandas.core import (
Expand Down Expand Up @@ -48,7 +48,7 @@

# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
# incompatible with definition in base class "ExtensionArray"
class PandasArray( # type: ignore[misc]
class NumpyExtensionArray( # type: ignore[misc]
OpsMixin,
NDArrayBackedExtensionArray,
ObjectStringArrayMixin,
Expand Down Expand Up @@ -76,19 +76,21 @@ class PandasArray( # type: ignore[misc]
"""

# If you're wondering why pd.Series(cls) doesn't put the array in an
# ExtensionBlock, search for `ABCPandasArray`. We check for
# ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for
# that _typ to ensure that users don't unnecessarily use EAs inside
# pandas internals, which turns off things like block consolidation.
_typ = "npy_extension"
__array_priority__ = 1000
_ndarray: np.ndarray
_dtype: PandasDtype
_dtype: NumpyEADtype
_internal_fill_value = np.nan

# ------------------------------------------------------------------------
# Constructors

def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None:
def __init__(
self, values: np.ndarray | NumpyExtensionArray, copy: bool = False
) -> None:
if isinstance(values, type(self)):
values = values._ndarray
if not isinstance(values, np.ndarray):
Expand All @@ -98,19 +100,19 @@ def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None

if values.ndim == 0:
# Technically we support 2, but do not advertise that fact.
raise ValueError("PandasArray must be 1-dimensional.")
raise ValueError("NumpyExtensionArray must be 1-dimensional.")

if copy:
values = values.copy()

dtype = PandasDtype(values.dtype)
dtype = NumpyEADtype(values.dtype)
super().__init__(values, dtype)

@classmethod
def _from_sequence(
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
) -> PandasArray:
if isinstance(dtype, PandasDtype):
) -> NumpyExtensionArray:
if isinstance(dtype, NumpyEADtype):
dtype = dtype._dtype

# error: Argument "dtype" to "asarray" has incompatible type
Expand All @@ -131,14 +133,14 @@ def _from_sequence(
result = result.copy()
return cls(result)

def _from_backing_data(self, arr: np.ndarray) -> PandasArray:
def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray:
return type(self)(arr)

# ------------------------------------------------------------------------
# Data

@property
def dtype(self) -> PandasDtype:
def dtype(self) -> NumpyEADtype:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to note that the returned dtype has changed too

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. thoughts on the name NumpyEADtype? i was torn between wanting parity with ArrowDtype vs confusion with np.dtype

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO NumpyEADtype sounds good

return self._dtype

# ------------------------------------------------------------------------
Expand All @@ -151,7 +153,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# Lightly modified version of
# https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
# The primary modification is not boxing scalar return values
# in PandasArray, since pandas' ExtensionArrays are 1-d.
# in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d.
out = kwargs.get("out", ())

result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
Expand All @@ -175,10 +177,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
return result

# Defer to the implementation of the ufunc on unwrapped values.
inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
inputs = tuple(
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs
)
if out:
kwargs["out"] = tuple(
x._ndarray if isinstance(x, PandasArray) else x for x in out
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out
)
result = getattr(ufunc, method)(*inputs, **kwargs)

Expand Down Expand Up @@ -499,20 +503,20 @@ def to_numpy(
# ------------------------------------------------------------------------
# Ops

def __invert__(self) -> PandasArray:
def __invert__(self) -> NumpyExtensionArray:
return type(self)(~self._ndarray)

def __neg__(self) -> PandasArray:
def __neg__(self) -> NumpyExtensionArray:
return type(self)(-self._ndarray)

def __pos__(self) -> PandasArray:
def __pos__(self) -> NumpyExtensionArray:
return type(self)(+self._ndarray)

def __abs__(self) -> PandasArray:
def __abs__(self) -> NumpyExtensionArray:
return type(self)(abs(self._ndarray))

def _cmp_method(self, other, op):
if isinstance(other, PandasArray):
if isinstance(other, NumpyExtensionArray):
other = other._ndarray

other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
Expand All @@ -538,7 +542,7 @@ def _cmp_method(self, other, op):

def _wrap_ndarray_result(self, result: np.ndarray):
# If we have timedelta64[ns] result, return a TimedeltaArray instead
# of a PandasArray
# of a NumpyExtensionArray
if result.dtype.kind == "m" and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
Expand Down
Loading