From e4304daf097866412a4747b888a38934d84ef2ed Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Jul 2023 16:11:26 -0700 Subject: [PATCH 1/2] REF: rename PandasArray->NumpyExtensionArray --- ci/code_checks.sh | 2 +- .../development/contributing_codebase.rst | 2 +- doc/source/reference/extensions.rst | 2 +- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/_testing/__init__.py | 4 +- pandas/_testing/asserters.py | 6 +- pandas/arrays/__init__.py | 21 ++++- pandas/core/algorithms.py | 6 +- pandas/core/arrays/__init__.py | 4 +- pandas/core/arrays/_mixins.py | 3 +- pandas/core/arrays/categorical.py | 8 +- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/numpy_.py | 44 +++++----- pandas/core/arrays/string_.py | 14 ++-- pandas/core/base.py | 4 +- pandas/core/construction.py | 48 +++++------ pandas/core/dtypes/astype.py | 6 +- pandas/core/dtypes/dtypes.py | 22 ++--- pandas/core/dtypes/generic.py | 8 +- pandas/core/indexes/base.py | 4 +- pandas/core/internals/array_manager.py | 11 +-- pandas/core/internals/blocks.py | 26 +++--- pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 20 ++--- pandas/core/strings/__init__.py | 2 +- pandas/core/tools/datetimes.py | 4 +- pandas/tests/api/test_api.py | 8 ++ pandas/tests/arithmetic/common.py | 6 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 6 +- pandas/tests/arrays/numpy_/test_numpy.py | 82 +++++++++---------- pandas/tests/arrays/test_array.py | 35 ++++---- pandas/tests/arrays/test_datetimelike.py | 8 +- pandas/tests/arrays/test_ndarray_backed.py | 8 +- pandas/tests/base/test_conversion.py | 6 +- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/dtypes/test_generic.py | 2 +- pandas/tests/dtypes/test_inference.py | 3 +- pandas/tests/extension/base/constructors.py | 2 +- pandas/tests/extension/test_numpy.py | 62 +++++++------- pandas/tests/frame/indexing/test_indexing.py | 2 +- .../frame/methods/test_to_dict_of_blocks.py | 4 +- pandas/tests/frame/test_block_internals.py | 8 +- pandas/tests/frame/test_constructors.py | 4 +- pandas/tests/internals/test_internals.py | 10 +-- pandas/tests/series/methods/test_astype.py | 4 +- pandas/tests/series/test_constructors.py | 2 +- 48 files changed, 293 insertions(+), 253 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fd256f2ff7db0..7123866889422 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -125,7 +125,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.io.formats.style.Styler.to_string \ pandas.api.extensions.ExtensionDtype \ pandas.api.extensions.ExtensionArray \ - pandas.arrays.PandasArray \ + pandas.arrays.NumpyExtensionArray \ pandas.api.extensions.ExtensionArray._accumulate \ pandas.api.extensions.ExtensionArray._concat_same_type \ pandas.api.extensions.ExtensionArray._formatter \ diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index f3ff5b70d4aac..df5b69c471b09 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -475,7 +475,7 @@ be located. 8) Is your test for one of the pandas-provided ExtensionArrays (``Categorical``, ``DatetimeArray``, ``TimedeltaArray``, ``PeriodArray``, ``IntervalArray``, - ``PandasArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)? + ``NumpyExtensionArray``, ``FloatArray``, ``BoolArray``, ``StringArray``)? This test likely belongs in one of: - tests.arrays diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 63eacc3f6d1d9..bff5b2b70b518 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -24,7 +24,7 @@ objects. :template: autosummary/class_without_autosummary.rst api.extensions.ExtensionArray - arrays.PandasArray + arrays.NumpyExtensionArray .. We need this autosummary so that methods and attributes are generated. .. Separate block, since they aren't classes. diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7450fc6fdc1da..dd56002ba3deb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -242,7 +242,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ -- +- :class:`arrays.PandasArray` has been renamed ``NumpyExtensionArray``; importing ``PandasArray`` still works until the next major version (:issue:`53694`) .. --------------------------------------------------------------------------- .. _whatsnew_210.deprecations: diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index d1a729343e062..886c0f389ebeb 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -102,7 +102,7 @@ from pandas.core.arrays import ( BaseMaskedArray, ExtensionArray, - PandasArray, + NumpyExtensionArray, ) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.construction import extract_array @@ -307,7 +307,7 @@ def box_expected(expected, box_cls, transpose: bool = True): if box_cls is pd.array: if isinstance(expected, RangeIndex): # pd.array would return an IntegerArray - expected = PandasArray(np.asarray(expected._values)) + expected = NumpyExtensionArray(np.asarray(expected._values)) else: expected = pd.array(expected, copy=False) elif box_cls is Index: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d296cc998134b..0591394f5d9ed 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -25,7 +25,7 @@ CategoricalDtype, DatetimeTZDtype, ExtensionDtype, - PandasDtype, + NumpyEADtype, ) from pandas.core.dtypes.missing import array_equivalent @@ -577,12 +577,12 @@ def raise_assert_detail( if isinstance(left, np.ndarray): left = pprint_thing(left) - elif isinstance(left, (CategoricalDtype, PandasDtype, StringDtype)): + elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)): left = repr(left) if isinstance(right, np.ndarray): right = pprint_thing(right) - elif isinstance(right, (CategoricalDtype, PandasDtype, StringDtype)): + elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)): right = repr(right) msg += f""" diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py index 3a8e80a6b5d2b..32e2afc0eef52 100644 --- a/pandas/arrays/__init__.py +++ b/pandas/arrays/__init__.py @@ -12,7 +12,7 @@ FloatingArray, IntegerArray, IntervalArray, - PandasArray, + NumpyExtensionArray, PeriodArray, SparseArray, StringArray, @@ -28,9 +28,26 @@ "FloatingArray", "IntegerArray", "IntervalArray", - "PandasArray", + "NumpyExtensionArray", "PeriodArray", "SparseArray", "StringArray", "TimedeltaArray", ] + + +def __getattr__(name: str): + if name == "PandasArray": + # GH#53694 + import warnings + + from pandas.util._exceptions import find_stack_level + + warnings.warn( + "PandasArray has been renamed NumpyExtensionArray. Use that " + "instead. This alias will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return NumpyExtensionArray + raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'") diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e0340f99b92e1..14dee202a9d8d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -58,7 +58,7 @@ BaseMaskedDtype, CategoricalDtype, ExtensionDtype, - PandasDtype, + NumpyEADtype, ) from pandas.core.dtypes.generic import ( ABCDatetimeArray, @@ -1439,8 +1439,8 @@ def diff(arr, n: int, axis: AxisInt = 0): else: op = operator.sub - if isinstance(dtype, PandasDtype): - # PandasArray cannot necessarily hold shifted versions of itself. + if isinstance(dtype, NumpyEADtype): + # NumpyExtensionArray cannot necessarily hold shifted versions of itself. arr = arr.to_numpy() dtype = arr.dtype diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 79be8760db931..245a171fea74b 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -11,7 +11,7 @@ from pandas.core.arrays.integer import IntegerArray from pandas.core.arrays.interval import IntervalArray from pandas.core.arrays.masked import BaseMaskedArray -from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray from pandas.core.arrays.period import ( PeriodArray, period_array, @@ -34,7 +34,7 @@ "FloatingArray", "IntegerArray", "IntervalArray", - "PandasArray", + "NumpyExtensionArray", "PeriodArray", "period_array", "SparseArray", diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 190b74a675711..d399b4780a938 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -317,7 +317,8 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: func(npvalues, limit=limit, mask=mask.T) npvalues = npvalues.T - # TODO: PandasArray didn't used to copy, need tests for this + # TODO: NumpyExtensionArray didn't used to copy, need tests + # for this new_values = self._from_backing_data(npvalues) else: # fill with value diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 46e2b64cb60c6..4903269b13e95 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2633,18 +2633,18 @@ def _str_map( # Optimization to apply the callable `f` to the categories once # and rebuild the result by `take`ing from the result with the codes. # Returns the same type as the object-dtype implementation though. - from pandas.core.arrays import PandasArray + from pandas.core.arrays import NumpyExtensionArray categories = self.categories codes = self.codes - result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype) + result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype) return take_nd(result, codes, fill_value=na_value) def _str_get_dummies(self, sep: str = "|"): # sep may not be in categories. Just bail on this. - from pandas.core.arrays import PandasArray + from pandas.core.arrays import NumpyExtensionArray - return PandasArray(self.astype(str))._str_get_dummies(sep) + return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep) # ------------------------------------------------------------------------ # GroupBy Methods diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3274b822f3bd7..e11878dace88e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -652,7 +652,8 @@ def _validate_listlike(self, value, allow_object: bool = False): msg = self._validation_error_message(value, True) raise TypeError(msg) - # Do type inference if necessary up front (after unpacking PandasArray) + # Do type inference if necessary up front (after unpacking + # NumpyExtensionArray) # e.g. we passed PeriodIndex.values and got an ndarray of Periods value = extract_array(value, extract_numpy=True) value = pd_array(value) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 5f02053a454ed..6d01dfcf6d90b 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype -from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.dtypes import NumpyEADtype from pandas.core.dtypes.missing import isna from pandas.core import ( @@ -48,7 +48,7 @@ # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" -class PandasArray( # type: ignore[misc] +class NumpyExtensionArray( # type: ignore[misc] OpsMixin, NDArrayBackedExtensionArray, ObjectStringArrayMixin, @@ -76,19 +76,21 @@ class PandasArray( # type: ignore[misc] """ # If you're wondering why pd.Series(cls) doesn't put the array in an - # ExtensionBlock, search for `ABCPandasArray`. We check for + # ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for # that _typ to ensure that users don't unnecessarily use EAs inside # pandas internals, which turns off things like block consolidation. _typ = "npy_extension" __array_priority__ = 1000 _ndarray: np.ndarray - _dtype: PandasDtype + _dtype: NumpyEADtype _internal_fill_value = np.nan # ------------------------------------------------------------------------ # Constructors - def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None: + def __init__( + self, values: np.ndarray | NumpyExtensionArray, copy: bool = False + ) -> None: if isinstance(values, type(self)): values = values._ndarray if not isinstance(values, np.ndarray): @@ -98,19 +100,19 @@ def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None if values.ndim == 0: # Technically we support 2, but do not advertise that fact. - raise ValueError("PandasArray must be 1-dimensional.") + raise ValueError("NumpyExtensionArray must be 1-dimensional.") if copy: values = values.copy() - dtype = PandasDtype(values.dtype) + dtype = NumpyEADtype(values.dtype) super().__init__(values, dtype) @classmethod def _from_sequence( cls, scalars, *, dtype: Dtype | None = None, copy: bool = False - ) -> PandasArray: - if isinstance(dtype, PandasDtype): + ) -> NumpyExtensionArray: + if isinstance(dtype, NumpyEADtype): dtype = dtype._dtype # error: Argument "dtype" to "asarray" has incompatible type @@ -131,14 +133,14 @@ def _from_sequence( result = result.copy() return cls(result) - def _from_backing_data(self, arr: np.ndarray) -> PandasArray: + def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray: return type(self)(arr) # ------------------------------------------------------------------------ # Data @property - def dtype(self) -> PandasDtype: + def dtype(self) -> NumpyEADtype: return self._dtype # ------------------------------------------------------------------------ @@ -151,7 +153,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html # The primary modification is not boxing scalar return values - # in PandasArray, since pandas' ExtensionArrays are 1-d. + # in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d. out = kwargs.get("out", ()) result = arraylike.maybe_dispatch_ufunc_to_dunder_op( @@ -175,10 +177,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return result # Defer to the implementation of the ufunc on unwrapped values. - inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) + inputs = tuple( + x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs + ) if out: kwargs["out"] = tuple( - x._ndarray if isinstance(x, PandasArray) else x for x in out + x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out ) result = getattr(ufunc, method)(*inputs, **kwargs) @@ -499,20 +503,20 @@ def to_numpy( # ------------------------------------------------------------------------ # Ops - def __invert__(self) -> PandasArray: + def __invert__(self) -> NumpyExtensionArray: return type(self)(~self._ndarray) - def __neg__(self) -> PandasArray: + def __neg__(self) -> NumpyExtensionArray: return type(self)(-self._ndarray) - def __pos__(self) -> PandasArray: + def __pos__(self) -> NumpyExtensionArray: return type(self)(+self._ndarray) - def __abs__(self) -> PandasArray: + def __abs__(self) -> NumpyExtensionArray: return type(self)(abs(self._ndarray)) def _cmp_method(self, other, op): - if isinstance(other, PandasArray): + if isinstance(other, NumpyExtensionArray): other = other._ndarray other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) @@ -538,7 +542,7 @@ def _cmp_method(self, other, op): def _wrap_ndarray_result(self, result: np.ndarray): # If we have timedelta64[ns] result, return a TimedeltaArray instead - # of a PandasArray + # of a NumpyExtensionArray if result.dtype.kind == "m" and is_supported_unit( get_unit_from_dtype(result.dtype) ): diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2640cbd7f6ba1..7c834d2e26b3a 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -43,7 +43,7 @@ IntegerArray, IntegerDtype, ) -from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import isna @@ -231,7 +231,7 @@ def tolist(self): # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" -class StringArray(BaseStringArray, PandasArray): # type: ignore[misc] +class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] """ Extension array for string data. @@ -294,7 +294,7 @@ class StringArray(BaseStringArray, PandasArray): # type: ignore[misc] will convert the values to strings. >>> pd.array(['1', 1], dtype="object") - + ['1', 1] Length: 2, dtype: object >>> pd.array(['1', 1], dtype="string") @@ -312,7 +312,7 @@ class StringArray(BaseStringArray, PandasArray): # type: ignore[misc] Length: 3, dtype: boolean """ - # undo the PandasArray hack + # undo the NumpyExtensionArray hack _typ = "extension" def __init__(self, values, copy: bool = False) -> None: @@ -404,7 +404,7 @@ def _values_for_factorize(self): def __setitem__(self, key, value): value = extract_array(value, extract_numpy=True) if isinstance(value, type(self)): - # extract_array doesn't extract PandasArray subclasses + # extract_array doesn't extract NumpyExtensionArray subclasses value = value._ndarray key = check_array_indexer(self, key) @@ -461,7 +461,7 @@ def astype(self, dtype, copy: bool = True): values = arr.astype(dtype.numpy_dtype) return FloatingArray(values, mask, copy=False) elif isinstance(dtype, ExtensionDtype): - # Skip the PandasArray.astype method + # Skip the NumpyExtensionArray.astype method return ExtensionArray.astype(self, dtype, copy) elif np.issubdtype(dtype, np.floating): arr = self._ndarray.copy() @@ -557,7 +557,7 @@ def _cmp_method(self, other, op): # ------------------------------------------------------------------------ # String methods interface # error: Incompatible types in assignment (expression has type "NAType", - # base class "PandasArray" defined the type as "float") + # base class "NumpyExtensionArray" defined the type as "float") _str_na_value = libmissing.NA # type: ignore[assignment] def _str_map( diff --git a/pandas/core/base.py b/pandas/core/base.py index 057b381bbdb58..e707a151fdb7f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -518,11 +518,11 @@ def array(self) -> ExtensionArray: Examples -------- - For regular NumPy types like int, and float, a PandasArray + For regular NumPy types like int, and float, a NumpyExtensionArray is returned. >>> pd.Series([1, 2, 3]).array - + [1, 2, 3] Length: 3, dtype: int64 diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 9112c7e52a348..4ce6c35244e5b 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -49,7 +49,7 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.dtypes import NumpyEADtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, @@ -150,7 +150,7 @@ def array( numpy.array : Construct a NumPy array. Series : Construct a pandas Series. Index : Construct a pandas Index. - arrays.PandasArray : ExtensionArray wrapping a NumPy array. + arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array. Series.array : Extract the array stored within a Series. Notes @@ -169,11 +169,11 @@ def array( rather than a string alias or allowing it to be inferred. For example, a future version of pandas or a 3rd-party library may include a dedicated ExtensionArray for string data. In this event, the following - would no longer return a :class:`arrays.PandasArray` backed by a NumPy - array. + would no longer return a :class:`arrays.NumpyExtensionArray` backed by a + NumPy array. >>> pd.array(['a', 'b'], dtype=str) - + ['a', 'b'] Length: 2, dtype: str32 @@ -182,7 +182,7 @@ def array( specify that in the dtype. >>> pd.array(['a', 'b'], dtype=np.dtype(" + ['a', 'b'] Length: 2, dtype: str32 @@ -193,7 +193,7 @@ def array( When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` - rather than a ``PandasArray``. This is for symmetry with the case of + rather than a ``NumpyExtensionArray``. This is for symmetry with the case of timezone-aware data, which NumPy does not natively support. >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') @@ -258,21 +258,21 @@ def array( Categories (3, object): ['a' < 'b' < 'c'] If pandas does not infer a dedicated extension type a - :class:`arrays.PandasArray` is returned. + :class:`arrays.NumpyExtensionArray` is returned. >>> pd.array([1 + 1j, 3 + 2j]) - + [(1+1j), (3+2j)] Length: 2, dtype: complex128 As mentioned in the "Notes" section, new extension types may be added in the future (by pandas or 3rd party libraries), causing the return - value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` - as a NumPy dtype if you need to ensure there's no future change in + value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the + `dtype` as a NumPy dtype if you need to ensure there's no future change in behavior. >>> pd.array([1, 2], dtype=np.dtype("int32")) - + [1, 2] Length: 2, dtype: int32 @@ -291,7 +291,7 @@ def array( FloatingArray, IntegerArray, IntervalArray, - PandasArray, + NumpyExtensionArray, PeriodArray, TimedeltaArray, ) @@ -314,7 +314,7 @@ def array( dtype = pandas_dtype(dtype) if isinstance(data, ExtensionArray) and (dtype is None or data.dtype == dtype): - # e.g. TimedeltaArray[s], avoid casting to PandasArray + # e.g. TimedeltaArray[s], avoid casting to NumpyExtensionArray if copy: return data.copy() return data @@ -337,7 +337,7 @@ def array( try: return DatetimeArray._from_sequence(data, copy=copy) except ValueError: - # Mixture of timezones, fall back to PandasArray + # Mixture of timezones, fall back to NumpyExtensionArray pass elif inferred_dtype.startswith("timedelta"): @@ -356,7 +356,7 @@ def array( and getattr(data, "dtype", None) != np.float16 ): # GH#44715 Exclude np.float16 bc FloatingArray does not support it; - # we will fall back to PandasArray. + # we will fall back to NumpyExtensionArray. return FloatingArray._from_sequence(data, copy=copy) elif inferred_dtype == "boolean": @@ -381,7 +381,7 @@ def array( stacklevel=find_stack_level(), ) - return PandasArray._from_sequence(data, dtype=dtype, copy=copy) + return NumpyExtensionArray._from_sequence(data, dtype=dtype, copy=copy) _typs = frozenset( @@ -427,7 +427,7 @@ def extract_array( For Series / Index, the underlying ExtensionArray is unboxed. extract_numpy : bool, default False - Whether to extract the ndarray from a PandasArray. + Whether to extract the ndarray from a NumpyExtensionArray. extract_range : bool, default False If we have a RangeIndex, return range._values if True @@ -471,7 +471,7 @@ def extract_array( return obj._values # type: ignore[attr-defined] elif extract_numpy and typ == "npy_extension": - # i.e. isinstance(obj, ABCPandasArray) + # i.e. isinstance(obj, ABCNumpyExtensionArray) # error: "T" has no attribute "to_numpy" return obj.to_numpy() # type: ignore[attr-defined] @@ -540,11 +540,11 @@ def sanitize_array( if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) - if isinstance(dtype, PandasDtype): - # Avoid ending up with a PandasArray + if isinstance(dtype, NumpyEADtype): + # Avoid ending up with a NumpyExtensionArray dtype = dtype.numpy_dtype - # extract ndarray or ExtensionArray, ensure we have no PandasArray + # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray data = extract_array(data, extract_numpy=True, extract_range=True) if isinstance(data, np.ndarray) and data.ndim == 0: @@ -563,7 +563,7 @@ def sanitize_array( return data elif isinstance(data, ABCExtensionArray): - # it is already ensured above this is not a PandasArray + # it is already ensured above this is not a NumpyExtensionArray # Until GH#49309 is fixed this check needs to come before the # ExtensionDtype check if dtype is not None: @@ -688,7 +688,7 @@ def _sanitize_ndim( f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead" ) if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): - # i.e. PandasDtype("O") + # i.e. NumpyEADtype("O") result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) cls = dtype.construct_array_type() diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 64df3827d7a3d..ac3a44276ac6d 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,7 +24,7 @@ ) from pandas.core.dtypes.dtypes import ( ExtensionDtype, - PandasDtype, + NumpyEADtype, ) if TYPE_CHECKING: @@ -230,8 +230,8 @@ def astype_array_safe( raise TypeError(msg) dtype = pandas_dtype(dtype) - if isinstance(dtype, PandasDtype): - # Ensure we don't end up with a PandasArray + if isinstance(dtype, NumpyEADtype): + # Ensure we don't end up with a NumpyExtensionArray dtype = dtype.numpy_dtype try: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 04e2b00744156..05ebe8295f817 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -86,7 +86,7 @@ BaseMaskedArray, DatetimeArray, IntervalArray, - PandasArray, + NumpyExtensionArray, PeriodArray, SparseArray, ) @@ -1382,7 +1382,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: return IntervalDtype(common, closed=closed) -class PandasDtype(ExtensionDtype): +class NumpyEADtype(ExtensionDtype): """ A Pandas ExtensionDtype for NumPy dtypes. @@ -1401,19 +1401,19 @@ class PandasDtype(ExtensionDtype): _metadata = ("_dtype",) - def __init__(self, dtype: npt.DTypeLike | PandasDtype | None) -> None: - if isinstance(dtype, PandasDtype): + def __init__(self, dtype: npt.DTypeLike | NumpyEADtype | None) -> None: + if isinstance(dtype, NumpyEADtype): # make constructor univalent dtype = dtype.numpy_dtype self._dtype = np.dtype(dtype) def __repr__(self) -> str: - return f"PandasDtype({repr(self.name)})" + return f"NumpyEADtype({repr(self.name)})" @property def numpy_dtype(self) -> np.dtype: """ - The NumPy dtype this PandasDtype wraps. + The NumPy dtype this NumpyEADtype wraps. """ return self._dtype @@ -1441,19 +1441,19 @@ def _is_boolean(self) -> bool: return self.kind == "b" @classmethod - def construct_from_string(cls, string: str) -> PandasDtype: + def construct_from_string(cls, string: str) -> NumpyEADtype: try: dtype = np.dtype(string) except TypeError as err: if not isinstance(string, str): msg = f"'construct_from_string' expects a string, got {type(string)}" else: - msg = f"Cannot construct a 'PandasDtype' from '{string}'" + msg = f"Cannot construct a 'NumpyEADtype' from '{string}'" raise TypeError(msg) from err return cls(dtype) @classmethod - def construct_array_type(cls) -> type_t[PandasArray]: + def construct_array_type(cls) -> type_t[NumpyExtensionArray]: """ Return the array type associated with this dtype. @@ -1461,9 +1461,9 @@ def construct_array_type(cls) -> type_t[PandasArray]: ------- type """ - from pandas.core.arrays import PandasArray + from pandas.core.arrays import NumpyExtensionArray - return PandasArray + return NumpyExtensionArray @property def kind(self) -> str: diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 5904ba4895aef..9718ad600cb80 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -24,7 +24,7 @@ from pandas.core.arrays import ( DatetimeArray, ExtensionArray, - PandasArray, + NumpyExtensionArray, PeriodArray, TimedeltaArray, ) @@ -141,7 +141,7 @@ def _subclasscheck(cls, inst) -> bool: {"extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"}, ), ) -ABCPandasArray = cast( - "Type[PandasArray]", - create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)), +ABCNumpyExtensionArray = cast( + "Type[NumpyExtensionArray]", + create_pandas_abc_type("ABCNumpyExtensionArray", "_typ", ("npy_extension",)), ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4b6b59898c199..4cf93aebf7de5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5099,9 +5099,9 @@ def values(self) -> ArrayLike: def array(self) -> ExtensionArray: array = self._data if isinstance(array, np.ndarray): - from pandas.core.arrays.numpy_ import PandasArray + from pandas.core.arrays.numpy_ import NumpyExtensionArray - array = PandasArray(array) + array = NumpyExtensionArray(array) return array @property diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 5591253618f5f..3b77540efcdd2 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -37,7 +37,7 @@ ) from pandas.core.dtypes.dtypes import ( ExtensionDtype, - PandasDtype, + NumpyEADtype, SparseDtype, ) from pandas.core.dtypes.generic import ( @@ -56,7 +56,7 @@ from pandas.core.arrays import ( DatetimeArray, ExtensionArray, - PandasArray, + NumpyExtensionArray, TimedeltaArray, ) from pandas.core.construction import ( @@ -331,7 +331,8 @@ def convert(self, copy: bool | None) -> Self: def _convert(arr): if is_object_dtype(arr.dtype): - # extract PandasArray for tests that patch PandasArray._typ + # extract NumpyExtensionArray for tests that patch + # NumpyExtensionArray._typ arr = np.asarray(arr) result = lib.maybe_convert_objects( arr, @@ -1022,7 +1023,7 @@ def as_array( if isinstance(dtype, SparseDtype): dtype = dtype.subtype - elif isinstance(dtype, PandasDtype): + elif isinstance(dtype, NumpyEADtype): dtype = dtype.numpy_dtype elif isinstance(dtype, ExtensionDtype): dtype = np.dtype("object") @@ -1148,7 +1149,7 @@ def array_values(self): """The array that Series.array returns""" arr = self.array if isinstance(arr, np.ndarray): - arr = PandasArray(arr) + arr = NumpyExtensionArray(arr) return arr @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b4c42804d7484..7e97876684698 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -64,14 +64,14 @@ DatetimeTZDtype, ExtensionDtype, IntervalDtype, - PandasDtype, + NumpyEADtype, PeriodDtype, SparseDtype, ) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndex, - ABCPandasArray, + ABCNumpyExtensionArray, ABCSeries, ) from pandas.core.dtypes.missing import ( @@ -101,7 +101,7 @@ DatetimeArray, ExtensionArray, IntervalArray, - PandasArray, + NumpyExtensionArray, PeriodArray, TimedeltaArray, ) @@ -1399,9 +1399,9 @@ def pad_or_backfill( copy, refs = self._get_refs_and_copy(using_cow, inplace) - # Dispatch to the PandasArray method. - # We know self.array_values is a PandasArray bc EABlock overrides - vals = cast(PandasArray, self.array_values) + # Dispatch to the NumpyExtensionArray method. + # We know self.array_values is a NumpyExtensionArray bc EABlock overrides + vals = cast(NumpyExtensionArray, self.array_values) if axis == 1: vals = vals.T new_values = vals.pad_or_backfill( @@ -2178,7 +2178,7 @@ def is_view(self) -> bool: @property def array_values(self) -> ExtensionArray: - return PandasArray(self.values) + return NumpyExtensionArray(self.values) def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: if dtype == _dtype_obj: @@ -2276,7 +2276,7 @@ def maybe_coerce_values(values: ArrayLike) -> ArrayLike: ------- values : np.ndarray or ExtensionArray """ - # Caller is responsible for ensuring PandasArray is already extracted. + # Caller is responsible for ensuring NumpyExtensionArray is already extracted. if isinstance(values, np.ndarray): values = ensure_wrapped_if_datetimelike(values) @@ -2308,7 +2308,7 @@ def get_block_type(dtype: DtypeObj) -> type[Block]: elif isinstance(dtype, PeriodDtype): return NDArrayBackedExtensionBlock elif isinstance(dtype, ExtensionDtype): - # Note: need to be sure PandasArray is unwrapped before we get here + # Note: need to be sure NumpyExtensionArray is unwrapped before we get here return ExtensionBlock # We use kind checks because it is much more performant @@ -2341,7 +2341,7 @@ def new_block( refs: BlockValuesRefs | None = None, ) -> Block: # caller is responsible for ensuring: - # - values is NOT a PandasArray + # - values is NOT a NumpyExtensionArray # - check_ndim/ensure_block_shape already checked # - maybe_coerce_values already called/unnecessary klass = get_block_type(values.dtype) @@ -2394,16 +2394,16 @@ def extract_pandas_array( values: ArrayLike, dtype: DtypeObj | None, ndim: int ) -> tuple[ArrayLike, DtypeObj | None]: """ - Ensure that we don't allow PandasArray / PandasDtype in internals. + Ensure that we don't allow NumpyExtensionArray / NumpyEADtype in internals. """ # For now, blocks should be backed by ndarrays when possible. - if isinstance(values, ABCPandasArray): + if isinstance(values, ABCNumpyExtensionArray): values = values.to_numpy() if ndim and ndim > 1: # TODO(EA2D): special case not needed with 2D EAs values = np.atleast_2d(values) - if isinstance(dtype, PandasDtype): + if isinstance(dtype, NumpyEADtype): dtype = dtype.numpy_dtype return values, dtype diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ee85bc5a87834..2290cd86f35e6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -118,7 +118,7 @@ def arrays_to_mgr( # - all(len(x) == len(index) for x in arrays) # - all(x.ndim == 1 for x in arrays) # - all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) - # - all(type(x) is not PandasArray for x in arrays) + # - all(type(x) is not NumpyExtensionArray for x in arrays) else: index = ensure_index(index) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b25eba15dfc06..b69706b8d1d42 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2065,7 +2065,7 @@ def create_block_manager_from_column_arrays( # assert isinstance(axes, list) # assert all(isinstance(x, Index) for x in axes) # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) - # assert all(type(x) is not PandasArray for x in arrays) + # assert all(type(x) is not NumpyExtensionArray for x in arrays) # assert all(x.ndim == 1 for x in arrays) # assert all(len(x) == len(axes[1]) for x in arrays) # assert len(arrays) == len(axes[0]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 265be87be40f1..607a6d0870be8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -768,15 +768,15 @@ def _values(self): Overview: - dtype | values | _values | array | - ----------- | ------------- | ------------- | ------------- | - Numeric | ndarray | ndarray | PandasArray | - Category | Categorical | Categorical | Categorical | - dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | - dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | - td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | - Period | ndarray[obj] | PeriodArray | PeriodArray | - Nullable | EA | EA | EA | + dtype | values | _values | array | + ----------- | ------------- | ------------- | --------------------- | + Numeric | ndarray | ndarray | NumpyExtensionArray | + Category | Categorical | Categorical | Categorical | + dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | + td64[ns] | ndarray[m8ns] | TimedeltaArray| TimedeltaArray | + Period | ndarray[obj] | PeriodArray | PeriodArray | + Nullable | EA | EA | EA | """ return self._mgr.internal_values() @@ -890,7 +890,7 @@ def view(self, dtype: Dtype | None = None) -> Series: 4 2 dtype: int8 """ - # self.array instead of self._values so we piggyback on PandasArray + # self.array instead of self._values so we piggyback on NumpyExtensionArray # implementation res_values = self.array.view(dtype) res_ser = self._constructor(res_values, index=self.index, copy=False) diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py index eb650477c2b6b..d4ce75f768c5d 100644 --- a/pandas/core/strings/__init__.py +++ b/pandas/core/strings/__init__.py @@ -23,6 +23,6 @@ # BaseStringArrayMethods # - ObjectStringArrayMixin # - StringArray -# - PandasArray +# - NumpyExtensionArray # - Categorical # - ArrowStringArray diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index afe904f02ea8b..801968bd59f4e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -67,7 +67,7 @@ from pandas.arrays import ( DatetimeArray, IntegerArray, - PandasArray, + NumpyExtensionArray, ) from pandas.core import algorithms from pandas.core.algorithms import unique @@ -393,7 +393,7 @@ def _convert_listlike_datetimes( """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") - elif isinstance(arg, PandasArray): + elif isinstance(arg, NumpyExtensionArray): arg = np.array(arg) arg_dtype = getattr(arg, "dtype", None) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 924a6db4b901b..60bcb97aaa364 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -373,3 +373,11 @@ def test_testing(self): def test_util_in_top_level(self): with pytest.raises(AttributeError, match="foo"): pd.util.foo + + +def test_pandas_array_alias(): + msg = "PandasArray has been renamed NumpyExtensionArray" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = pd.arrays.PandasArray + + assert res is pd.arrays.NumpyExtensionArray diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index f3173e8f0eb57..b608df1554154 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -13,7 +13,7 @@ import pandas._testing as tm from pandas.core.arrays import ( BooleanArray, - PandasArray, + NumpyExtensionArray, ) @@ -95,8 +95,8 @@ def assert_invalid_comparison(left, right, box): def xbox2(x): # Eventually we'd like this to be tighter, but for now we'll - # just exclude PandasArray[bool] - if isinstance(x, PandasArray): + # just exclude NumpyExtensionArray[bool] + if isinstance(x, NumpyExtensionArray): return x._ndarray if isinstance(x, BooleanArray): # NB: we are assuming no pd.NAs for now diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 6a0584485be42..e6c743c76a2c1 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1570,7 +1570,7 @@ def test_dt64arr_add_sub_offset_array( # Same thing but boxing other other = tm.box_expected(other, box_with_array) if box_with_array is pd.array and op is roperator.radd: - # We expect a PandasArray, not ndarray[object] here + # We expect a NumpyExtensionArray, not ndarray[object] here expected = pd.array(expected, dtype=object) with tm.assert_produces_warning(PerformanceWarning): res = op(dtarr, other) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index c2f835212529f..0ffe1ddc3dfb7 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -27,7 +27,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.arrays import PandasArray +from pandas.core.arrays import NumpyExtensionArray from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, @@ -1716,7 +1716,7 @@ def test_td64_div_object_mixed_result(self, box_with_array): expected = Index([1.0, np.timedelta64("NaT", "ns"), orig[0], 1.5], dtype=object) expected = tm.box_expected(expected, box_with_array, transpose=False) - if isinstance(expected, PandasArray): + if isinstance(expected, NumpyExtensionArray): expected = expected.to_numpy() tm.assert_equal(res, expected) if box_with_array is DataFrame: @@ -1727,7 +1727,7 @@ def test_td64_div_object_mixed_result(self, box_with_array): expected = Index([1, np.timedelta64("NaT", "ns"), orig[0], 1], dtype=object) expected = tm.box_expected(expected, box_with_array, transpose=False) - if isinstance(expected, PandasArray): + if isinstance(expected, NumpyExtensionArray): expected = expected.to_numpy() tm.assert_equal(res, expected) if box_with_array is DataFrame: diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index c748d487a2f9c..4217745e60e76 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -1,15 +1,15 @@ """ -Additional tests for PandasArray that aren't covered by +Additional tests for NumpyExtensionArray that aren't covered by the interface tests. """ import numpy as np import pytest -from pandas.core.dtypes.dtypes import PandasDtype +from pandas.core.dtypes.dtypes import NumpyEADtype import pandas as pd import pandas._testing as tm -from pandas.arrays import PandasArray +from pandas.arrays import NumpyExtensionArray @pytest.fixture( @@ -33,7 +33,7 @@ def any_numpy_array(request): # ---------------------------------------------------------------------------- -# PandasDtype +# NumpyEADtype @pytest.mark.parametrize( @@ -52,7 +52,7 @@ def any_numpy_array(request): ], ) def test_is_numeric(dtype, expected): - dtype = PandasDtype(dtype) + dtype = NumpyEADtype(dtype) assert dtype._is_numeric is expected @@ -72,25 +72,25 @@ def test_is_numeric(dtype, expected): ], ) def test_is_boolean(dtype, expected): - dtype = PandasDtype(dtype) + dtype = NumpyEADtype(dtype) assert dtype._is_boolean is expected def test_repr(): - dtype = PandasDtype(np.dtype("int64")) - assert repr(dtype) == "PandasDtype('int64')" + dtype = NumpyEADtype(np.dtype("int64")) + assert repr(dtype) == "NumpyEADtype('int64')" def test_constructor_from_string(): - result = PandasDtype.construct_from_string("int64") - expected = PandasDtype(np.dtype("int64")) + result = NumpyEADtype.construct_from_string("int64") + expected = NumpyEADtype(np.dtype("int64")) assert result == expected def test_dtype_univalent(any_numpy_dtype): - dtype = PandasDtype(any_numpy_dtype) + dtype = NumpyEADtype(any_numpy_dtype) - result = PandasDtype(dtype) + result = NumpyEADtype(dtype) assert result == dtype @@ -100,40 +100,40 @@ def test_dtype_univalent(any_numpy_dtype): def test_constructor_no_coercion(): with pytest.raises(ValueError, match="NumPy array"): - PandasArray([1, 2, 3]) + NumpyExtensionArray([1, 2, 3]) def test_series_constructor_with_copy(): ndarray = np.array([1, 2, 3]) - ser = pd.Series(PandasArray(ndarray), copy=True) + ser = pd.Series(NumpyExtensionArray(ndarray), copy=True) assert ser.values is not ndarray def test_series_constructor_with_astype(): ndarray = np.array([1, 2, 3]) - result = pd.Series(PandasArray(ndarray), dtype="float64") + result = pd.Series(NumpyExtensionArray(ndarray), dtype="float64") expected = pd.Series([1.0, 2.0, 3.0], dtype="float64") tm.assert_series_equal(result, expected) def test_from_sequence_dtype(): arr = np.array([1, 2, 3], dtype="int64") - result = PandasArray._from_sequence(arr, dtype="uint64") - expected = PandasArray(np.array([1, 2, 3], dtype="uint64")) + result = NumpyExtensionArray._from_sequence(arr, dtype="uint64") + expected = NumpyExtensionArray(np.array([1, 2, 3], dtype="uint64")) tm.assert_extension_array_equal(result, expected) def test_constructor_copy(): arr = np.array([0, 1]) - result = PandasArray(arr, copy=True) + result = NumpyExtensionArray(arr, copy=True) assert not tm.shares_memory(result, arr) def test_constructor_with_data(any_numpy_array): nparr = any_numpy_array - arr = PandasArray(nparr) + arr = NumpyExtensionArray(nparr) assert arr.dtype.numpy_dtype == nparr.dtype @@ -142,7 +142,7 @@ def test_constructor_with_data(any_numpy_array): def test_to_numpy(): - arr = PandasArray(np.array([1, 2, 3])) + arr = NumpyExtensionArray(np.array([1, 2, 3])) result = arr.to_numpy() assert result is arr._ndarray @@ -167,7 +167,7 @@ def test_setitem_series(): def test_setitem(any_numpy_array): nparr = any_numpy_array - arr = PandasArray(nparr, copy=True) + arr = NumpyExtensionArray(nparr, copy=True) arr[0] = arr[1] nparr[0] = nparr[1] @@ -181,14 +181,14 @@ def test_setitem(any_numpy_array): def test_bad_reduce_raises(): arr = np.array([1, 2, 3], dtype="int64") - arr = PandasArray(arr) + arr = NumpyExtensionArray(arr) msg = "cannot perform not_a_method with type int" with pytest.raises(TypeError, match=msg): arr._reduce(msg) def test_validate_reduction_keyword_args(): - arr = PandasArray(np.array([1, 2, 3])) + arr = NumpyExtensionArray(np.array([1, 2, 3])) msg = "the 'keepdims' parameter is not supported .*all" with pytest.raises(ValueError, match=msg): arr.all(keepdims=True) @@ -217,7 +217,7 @@ def test_np_max_nested_tuples(): def test_np_reduce_2d(): raw = np.arange(12).reshape(4, 3) - arr = PandasArray(raw) + arr = NumpyExtensionArray(raw) res = np.maximum.reduce(arr, axis=0) tm.assert_extension_array_equal(res, arr[-1]) @@ -232,24 +232,24 @@ def test_np_reduce_2d(): @pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive]) def test_ufunc_unary(ufunc): - arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0])) result = ufunc(arr) - expected = PandasArray(ufunc(arr._ndarray)) + expected = NumpyExtensionArray(ufunc(arr._ndarray)) tm.assert_extension_array_equal(result, expected) # same thing but with the 'out' keyword - out = PandasArray(np.array([-9.0, -9.0, -9.0])) + out = NumpyExtensionArray(np.array([-9.0, -9.0, -9.0])) ufunc(arr, out=out) tm.assert_extension_array_equal(out, expected) def test_ufunc(): - arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0])) r1, r2 = np.divmod(arr, np.add(arr, 2)) e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) - e1 = PandasArray(e1) - e2 = PandasArray(e2) + e1 = NumpyExtensionArray(e1) + e2 = NumpyExtensionArray(e2) tm.assert_extension_array_equal(r1, e1) tm.assert_extension_array_equal(r2, e2) @@ -257,23 +257,23 @@ def test_ufunc(): def test_basic_binop(): # Just a basic smoke test. The EA interface tests exercise this # more thoroughly. - x = PandasArray(np.array([1, 2, 3])) + x = NumpyExtensionArray(np.array([1, 2, 3])) result = x + x - expected = PandasArray(np.array([2, 4, 6])) + expected = NumpyExtensionArray(np.array([2, 4, 6])) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize("dtype", [None, object]) def test_setitem_object_typecode(dtype): - arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype)) + arr = NumpyExtensionArray(np.array(["a", "b", "c"], dtype=dtype)) arr[0] = "t" - expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype)) + expected = NumpyExtensionArray(np.array(["t", "b", "c"], dtype=dtype)) tm.assert_extension_array_equal(arr, expected) def test_setitem_no_coercion(): # https://github.com/pandas-dev/pandas/issues/28150 - arr = PandasArray(np.array([1, 2, 3])) + arr = NumpyExtensionArray(np.array([1, 2, 3])) with pytest.raises(ValueError, match="int"): arr[0] = "a" @@ -285,7 +285,7 @@ def test_setitem_no_coercion(): def test_setitem_preserves_views(): # GH#28150, see also extension test of the same name - arr = PandasArray(np.array([1, 2, 3])) + arr = NumpyExtensionArray(np.array([1, 2, 3])) view1 = arr.view() view2 = arr[:] view3 = np.asarray(arr) @@ -303,22 +303,22 @@ def test_setitem_preserves_views(): @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) def test_quantile_empty(dtype): # we should get back np.nans, not -1s - arr = PandasArray(np.array([], dtype=dtype)) + arr = NumpyExtensionArray(np.array([], dtype=dtype)) idx = pd.Index([0.0, 0.5]) result = arr._quantile(idx, interpolation="linear") - expected = PandasArray(np.array([np.nan, np.nan])) + expected = NumpyExtensionArray(np.array([np.nan, np.nan])) tm.assert_extension_array_equal(result, expected) def test_factorize_unsigned(): - # don't raise when calling factorize on unsigned int PandasArray + # don't raise when calling factorize on unsigned int NumpyExtensionArray arr = np.array([1, 2, 3], dtype=np.uint64) - obj = PandasArray(arr) + obj = NumpyExtensionArray(arr) res_codes, res_unique = obj.factorize() exp_codes, exp_unique = pd.factorize(arr) tm.assert_numpy_array_equal(res_codes, exp_codes) - tm.assert_extension_array_equal(res_unique, PandasArray(exp_unique)) + tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index d5c1d5bbd03b0..b8b5e3588d48f 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -19,7 +19,7 @@ TimedeltaArray, ) from pandas.core.arrays import ( - PandasArray, + NumpyExtensionArray, period_array, ) from pandas.tests.extension.decimal import ( @@ -48,11 +48,11 @@ def test_dt64_array(dtype_unit): [ # Basic NumPy defaults. ([1, 2], None, IntegerArray._from_sequence([1, 2])), - ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))), + ([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))), ( [1, 2], np.dtype("float32"), - PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), + NumpyExtensionArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), ), (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), ( @@ -61,19 +61,20 @@ def test_dt64_array(dtype_unit): FloatingArray._from_sequence([1.0, 2.0]), ), # String alias passes through to NumPy - ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), - ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))), - # GH#44715 FloatingArray does not support float16, so fall back to PandasArray + ([1, 2], "float32", NumpyExtensionArray(np.array([1, 2], dtype="float32"))), + ([1, 2], "int64", NumpyExtensionArray(np.array([1, 2], dtype=np.int64))), + # GH#44715 FloatingArray does not support float16, so fall + # back to NumpyExtensionArray ( np.array([1, 2], dtype=np.float16), None, - PandasArray(np.array([1, 2], dtype=np.float16)), + NumpyExtensionArray(np.array([1, 2], dtype=np.float16)), ), # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64")) ( - PandasArray(np.array([1, 2], dtype=np.int32)), + NumpyExtensionArray(np.array([1, 2], dtype=np.int32)), None, - PandasArray(np.array([1, 2], dtype=np.int32)), + NumpyExtensionArray(np.array([1, 2], dtype=np.int32)), ), # Period alias ( @@ -148,7 +149,7 @@ def test_dt64_array(dtype_unit): TimedeltaArray._from_sequence(["1H", "2H"]), ), ( - # preserve non-nano, i.e. don't cast to PandasArray + # preserve non-nano, i.e. don't cast to NumpyExtensionArray TimedeltaArray._simple_new( np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") ), @@ -158,7 +159,7 @@ def test_dt64_array(dtype_unit): ), ), ( - # preserve non-nano, i.e. don't cast to PandasArray + # preserve non-nano, i.e. don't cast to NumpyExtensionArray TimedeltaArray._simple_new( np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") ), @@ -184,7 +185,11 @@ def test_dt64_array(dtype_unit): ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", pd.array([1, None], dtype="Int16")), - (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + ( + pd.Series([1, 2]), + None, + NumpyExtensionArray(np.array([1, 2], dtype=np.int64)), + ), # String ( ["a", None], @@ -200,7 +205,7 @@ def test_dt64_array(dtype_unit): ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), # Index - (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + (pd.Index([1, 2]), None, NumpyExtensionArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA ( pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), @@ -351,13 +356,13 @@ def test_array_inference(data, expected): ) def test_array_inference_fails(data): result = pd.array(data) - expected = PandasArray(np.array(data, dtype=object)) + expected = NumpyExtensionArray(np.array(data, dtype=object)) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize("data", [np.array(0)]) def test_nd_raises(data): - with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"): + with pytest.raises(ValueError, match="NumpyExtensionArray must be 1-dimensional"): pd.array(data, dtype="int64") diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 9e402af931199..7df17c42134e9 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -23,7 +23,7 @@ import pandas._testing as tm from pandas.core.arrays import ( DatetimeArray, - PandasArray, + NumpyExtensionArray, PeriodArray, TimedeltaArray, ) @@ -425,7 +425,7 @@ def test_setitem(self): pd.Series, np.array, list, - PandasArray, + NumpyExtensionArray, ], ) def test_setitem_object_dtype(self, box, arr1d): @@ -439,7 +439,7 @@ def test_setitem_object_dtype(self, box, arr1d): elif box is np.array: # if we do np.array(x).astype(object) then dt64 and td64 cast to ints vals = np.array(vals.astype(object)) - elif box is PandasArray: + elif box is NumpyExtensionArray: vals = box(np.asarray(vals, dtype=object)) else: vals = box(vals).astype(object) @@ -1291,7 +1291,7 @@ def test_period_index_construction_from_strings(klass): def test_from_pandas_array(dtype): # GH#24615 data = np.array([1, 2, 3], dtype=dtype) - arr = PandasArray(data) + arr = NumpyExtensionArray(data) cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] diff --git a/pandas/tests/arrays/test_ndarray_backed.py b/pandas/tests/arrays/test_ndarray_backed.py index c48fb7e78d45b..1fe7cc9b03e8a 100644 --- a/pandas/tests/arrays/test_ndarray_backed.py +++ b/pandas/tests/arrays/test_ndarray_backed.py @@ -10,7 +10,7 @@ from pandas.core.arrays import ( Categorical, DatetimeArray, - PandasArray, + NumpyExtensionArray, TimedeltaArray, ) @@ -65,11 +65,11 @@ def test_empty_td64(self): assert result.shape == shape def test_empty_pandas_array(self): - arr = PandasArray(np.array([1, 2])) + arr = NumpyExtensionArray(np.array([1, 2])) dtype = arr.dtype shape = (3, 9) - result = PandasArray._empty(shape, dtype=dtype) - assert isinstance(result, PandasArray) + result = NumpyExtensionArray._empty(shape, dtype=dtype) + assert isinstance(result, NumpyExtensionArray) assert result.dtype == dtype assert result.shape == shape diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index eda7871e5ab0a..0e618ea20bf67 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -15,7 +15,7 @@ from pandas.core.arrays import ( DatetimeArray, IntervalArray, - PandasArray, + NumpyExtensionArray, PeriodArray, SparseArray, TimedeltaArray, @@ -222,7 +222,7 @@ def test_values_consistent(arr, expected_type, dtype): def test_numpy_array(arr): ser = Series(arr) result = ser.array - expected = PandasArray(arr) + expected = NumpyExtensionArray(arr) tm.assert_extension_array_equal(result, expected) @@ -234,7 +234,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): elif np.dtype(any_numpy_dtype).kind == "m": assert isinstance(result, TimedeltaArray) else: - assert isinstance(result, PandasArray) + assert isinstance(result, NumpyExtensionArray) @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 85fbac186b369..aefed29a490ca 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -34,7 +34,7 @@ def to_numpy_dtypes(dtypes): return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)] -class TestPandasDtype: +class TestNumpyEADtype: # Passing invalid dtype, both as a string or object, must raise TypeError # Per issue GH15520 @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 9a5bd5b1d047b..6f516b0564edc 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -33,7 +33,7 @@ class TestABCClasses: "ABCPeriodArray", pd.arrays.PeriodArray([2000, 2001, 2002], dtype="period[D]"), ), - ("ABCPandasArray", pd.arrays.PandasArray(np.array([0, 1, 2]))), + ("ABCNumpyExtensionArray", pd.arrays.NumpyExtensionArray(np.array([0, 1, 2]))), ("ABCPeriodIndex", period_index), ("ABCCategoricalIndex", categorical_df.index), ("ABCSeries", pd.Series([1, 2, 3])), diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index cadc3a46e0ba4..9931e71c16254 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1362,7 +1362,8 @@ def test_infer_dtype_period_array(self, klass, skipna): pd.NaT, ] ) - # with pd.array this becomes PandasArray which ends up as "unknown-array" + # with pd.array this becomes NumpyExtensionArray which ends up + # as "unknown-array" exp = "unknown-array" if klass is pd.array else "mixed" assert lib.infer_dtype(values, skipna=skipna) == exp diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 1f85c89ef38be..26716922da8fa 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -112,7 +112,7 @@ def test_pandas_array(self, data): def test_pandas_array_dtype(self, data): # ... but specifying dtype will override idempotency result = pd.array(data, dtype=np.dtype(object)) - expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) + expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object)) self.assert_equal(result, expected) def test_construct_empty_dataframe(self, dtype): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 16b05be2e0bb9..01868dcf71f4f 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -12,7 +12,7 @@ classes (if they are relevant for the extension interface for all dtypes), or be added to the array-specific tests in `pandas/tests/arrays/`. -Note: we do not bother with base.BaseIndexTests because PandasArray +Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray will never be held in an Index. """ import numpy as np @@ -21,19 +21,19 @@ from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.dtypes import ( ExtensionDtype, - PandasDtype, + NumpyEADtype, ) import pandas as pd import pandas._testing as tm from pandas.api.types import is_object_dtype -from pandas.core.arrays.numpy_ import PandasArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray from pandas.core.internals import blocks from pandas.tests.extension import base def _can_hold_element_patched(obj, element) -> bool: - if isinstance(element, PandasArray): + if isinstance(element, NumpyExtensionArray): element = element.to_numpy() return can_hold_element(obj, element) @@ -43,15 +43,15 @@ def _can_hold_element_patched(obj, element) -> bool: def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): """ - patch tm.assert_attr_equal so PandasDtype("object") is closed enough to + patch tm.assert_attr_equal so NumpyEADtype("object") is closed enough to np.dtype("object") """ if attr == "dtype": lattr = getattr(left, "dtype", None) rattr = getattr(right, "dtype", None) - if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype): + if isinstance(lattr, NumpyEADtype) and not isinstance(rattr, NumpyEADtype): left = left.astype(lattr.numpy_dtype) - elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype): + elif isinstance(rattr, NumpyEADtype) and not isinstance(lattr, NumpyEADtype): right = right.astype(rattr.numpy_dtype) orig_assert_attr_equal(attr, left, right, obj) @@ -59,7 +59,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): @pytest.fixture(params=["float", "object"]) def dtype(request): - return PandasDtype(np.dtype(request.param)) + return NumpyEADtype(np.dtype(request.param)) @pytest.fixture @@ -67,20 +67,20 @@ def allow_in_pandas(monkeypatch): """ A monkeypatch to tells pandas to let us in. - By default, passing a PandasArray to an index / series / frame - constructor will unbox that PandasArray to an ndarray, and treat + By default, passing a NumpyExtensionArray to an index / series / frame + constructor will unbox that NumpyExtensionArray to an ndarray, and treat it as a non-EA column. We don't want people using EAs without reason. - The mechanism for this is a check against ABCPandasArray + The mechanism for this is a check against ABCNumpyExtensionArray in each constructor. But, for testing, we need to allow them in pandas. So we patch - the _typ of PandasArray, so that we evade the ABCPandasArray + the _typ of NumpyExtensionArray, so that we evade the ABCNumpyExtensionArray check. """ with monkeypatch.context() as m: - m.setattr(PandasArray, "_typ", "extension") + m.setattr(NumpyExtensionArray, "_typ", "extension") m.setattr(blocks, "can_hold_element", _can_hold_element_patched) m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal) yield @@ -90,14 +90,14 @@ def allow_in_pandas(monkeypatch): def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return pd.Series([(i,) for i in range(100)]).array - return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) + return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) @pytest.fixture def data_missing(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return PandasArray(np.array([np.nan, (1,)], dtype=object)) - return PandasArray(np.array([np.nan, 1.0])) + return NumpyExtensionArray(np.array([np.nan, (1,)], dtype=object)) + return NumpyExtensionArray(np.array([np.nan, 1.0])) @pytest.fixture @@ -123,8 +123,8 @@ def data_for_sorting(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": # Use an empty tuple for first element, then remove, # to disable np.array's shape inference. - return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) - return PandasArray(np.array([1, 2, 0])) + return NumpyExtensionArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) + return NumpyExtensionArray(np.array([1, 2, 0])) @pytest.fixture @@ -135,8 +135,8 @@ def data_missing_for_sorting(allow_in_pandas, dtype): A < B and NA missing. """ if dtype.numpy_dtype == "object": - return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) - return PandasArray(np.array([1, np.nan, 0])) + return NumpyExtensionArray(np.array([(1,), np.nan, (0,)], dtype=object)) + return NumpyExtensionArray(np.array([1, np.nan, 0])) @pytest.fixture @@ -151,7 +151,7 @@ def data_for_grouping(allow_in_pandas, dtype): a, b, c = (1,), (2,), (3,) else: a, b, c = np.arange(3) - return PandasArray( + return NumpyExtensionArray( np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) ) @@ -159,7 +159,7 @@ def data_for_grouping(allow_in_pandas, dtype): @pytest.fixture def skip_numpy_object(dtype, request): """ - Tests for PandasArray with nested data. Users typically won't create + Tests for NumpyExtensionArray with nested data. Users typically won't create these objects via `pd.array`, but they can show up through `.array` on a Series with nested data. Many of the base tests fail, as they aren't appropriate for nested data. @@ -179,13 +179,13 @@ class BaseNumPyTests: @classmethod def assert_series_equal(cls, left, right, *args, **kwargs): # base class tests hard-code expected values with numpy dtypes, - # whereas we generally want the corresponding PandasDtype + # whereas we generally want the corresponding NumpyEADtype if ( isinstance(right, pd.Series) and not isinstance(right.dtype, ExtensionDtype) - and isinstance(left.dtype, PandasDtype) + and isinstance(left.dtype, NumpyEADtype) ): - right = right.astype(PandasDtype(right.dtype)) + right = right.astype(NumpyEADtype(right.dtype)) return tm.assert_series_equal(left, right, *args, **kwargs) @@ -210,7 +210,7 @@ def test_check_dtype(self, data, request): if data.dtype.numpy_dtype == "object": request.node.add_marker( pytest.mark.xfail( - reason=f"PandasArray expectedly clashes with a " + reason=f"NumpyExtensionArray expectedly clashes with a " f"NumPy name: {data.dtype.numpy_dtype}" ) ) @@ -219,7 +219,7 @@ def test_check_dtype(self, data, request): def test_is_not_object_type(self, dtype, request): if dtype.numpy_dtype == "object": # Different from BaseDtypeTests.test_is_not_object_type - # because PandasDtype(object) is an object type + # because NumpyEADtype(object) is an object type assert is_object_dtype(dtype) else: super().test_is_not_object_type(dtype) @@ -264,7 +264,7 @@ def test_searchsorted(self, data_for_sorting, as_series): # Test setup fails. super().test_searchsorted(data_for_sorting, as_series) - @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") + @pytest.mark.xfail(reason="NumpyExtensionArray.diff may fail on dtype") def test_diff(self, data, periods): return super().test_diff(data, periods) @@ -277,7 +277,7 @@ def test_insert(self, data, request): @skip_nested def test_insert_invalid(self, data, invalid_scalar): - # PandasArray[object] can hold anything, so skip + # NumpyExtensionArray[object] can hold anything, so skip super().test_insert_invalid(data, invalid_scalar) @@ -373,7 +373,7 @@ def test_setitem_scalar_key_sequence_raise(self, data): # Failed: DID NOT RAISE super().test_setitem_scalar_key_sequence_raise(data) - # TODO: there is some issue with PandasArray, therefore, + # TODO: there is some issue with NumpyExtensionArray, therefore, # skip the setitem test for now, and fix it later (GH 31446) @skip_nested @@ -428,7 +428,7 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): key = full_indexer(df) result.loc[key, "data"] = df["data"] - # base class method has expected = df; PandasArray behaves oddly because + # base class method has expected = df; NumpyExtensionArray behaves oddly because # we patch _typ for these tests. if data.dtype.numpy_dtype != object: if not isinstance(key, slice) or key != slice(None): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9005798d66d17..722f61de3f43a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1248,7 +1248,7 @@ def test_iloc_setitem_nullable_2d_values(self): df.loc[:] = df.values[:, ::-1] tm.assert_frame_equal(df, orig) - df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1]) + df.loc[:] = pd.core.arrays.NumpyExtensionArray(df.values[:, ::-1]) tm.assert_frame_equal(df, orig) df.iloc[:] = df.iloc[:, :] diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index cc4860beea491..906e74230a762 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -8,7 +8,7 @@ MultiIndex, ) import pandas._testing as tm -from pandas.core.arrays import PandasArray +from pandas.core.arrays import NumpyExtensionArray pytestmark = td.skip_array_manager_invalid_test @@ -55,7 +55,7 @@ def test_to_dict_of_blocks_item_cache(request, using_copy_on_write): request.node.add_marker(pytest.mark.xfail(reason="CoW - not yet implemented")) # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) - df["c"] = PandasArray(np.array([1, 2, None, 3], dtype=object)) + df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) mgr = df._mgr assert len(mgr.blocks) == 3 # i.e. not consolidated diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 00df0530fe70f..d8e46d4b606c6 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -378,7 +378,7 @@ def test_strange_column_corruption_issue(self, using_copy_on_write): assert first == second == 0 def test_constructor_no_pandas_array(self): - # Ensure that PandasArray isn't allowed inside Series + # Ensure that NumpyExtensionArray isn't allowed inside Series # See https://github.com/pandas-dev/pandas/issues/23995 for more. arr = Series([1, 2, 3]).array result = DataFrame({"A": arr}) @@ -390,12 +390,14 @@ def test_constructor_no_pandas_array(self): def test_add_column_with_pandas_array(self): # GH 26390 df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) - df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) + df["c"] = pd.arrays.NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) df2 = DataFrame( { "a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"], - "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), + "c": pd.arrays.NumpyExtensionArray( + np.array([1, 2, None, 3], dtype=object) + ), } ) assert type(df["c"]._mgr.blocks[0]) == NumpyBlock diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8a4d1624fcb30..c71a0dd5f92b2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -30,7 +30,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, IntervalDtype, - PandasDtype, + NumpyEADtype, PeriodDtype, ) @@ -188,7 +188,7 @@ def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series): assert obj._mgr.arrays[0].dtype == object assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) - obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object)) + obj = frame_or_series(frame_or_series(arr), dtype=NumpyEADtype(object)) assert obj._mgr.arrays[0].dtype == object assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 47e7092743b00..8a4b5fd5f2e01 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1407,23 +1407,23 @@ def test_block_shape(): def test_make_block_no_pandas_array(block_maker): # https://github.com/pandas-dev/pandas/pull/24866 - arr = pd.arrays.PandasArray(np.array([1, 2])) + arr = pd.arrays.NumpyExtensionArray(np.array([1, 2])) - # PandasArray, no dtype + # NumpyExtensionArray, no dtype result = block_maker(arr, BlockPlacement(slice(len(arr))), ndim=arr.ndim) assert result.dtype.kind in ["i", "u"] if block_maker is make_block: - # new_block requires caller to unwrap PandasArray + # new_block requires caller to unwrap NumpyExtensionArray assert result.is_extension is False - # PandasArray, PandasDtype + # NumpyExtensionArray, NumpyEADtype result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) assert result.dtype.kind in ["i", "u"] assert result.is_extension is False # new_block no longer taked dtype keyword - # ndarray, PandasDtype + # ndarray, NumpyEADtype result = block_maker( arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim ) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index d72c8599dfe5e..524bf1b310d38 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -147,8 +147,8 @@ def test_astype_float_to_period(self): def test_astype_no_pandas_dtype(self): # https://github.com/pandas-dev/pandas/pull/24866 ser = Series([1, 2], dtype="int64") - # Don't have PandasDtype in the public API, so we use `.array.dtype`, - # which is a PandasDtype. + # Don't have NumpyEADtype in the public API, so we use `.array.dtype`, + # which is a NumpyEADtype. result = ser.astype(ser.array.dtype) tm.assert_series_equal(result, ser) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c7536273862c0..9540d7a014409 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1918,7 +1918,7 @@ def test_constructor_with_pandas_dtype(self): # going through 2D->1D path vals = [(1,), (2,), (3,)] ser = Series(vals) - dtype = ser.array.dtype # PandasDtype + dtype = ser.array.dtype # NumpyEADtype ser2 = Series(vals, dtype=dtype) tm.assert_series_equal(ser, ser2) From 3ff62ab168776ca494eea375add7fcfe1e746121 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Jul 2023 09:32:04 -0700 Subject: [PATCH 2/2] document PandasDtype->NumpyEADtype --- doc/source/whatsnew/v2.1.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d676c668bf2b2..54032695d9555 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -242,7 +242,8 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ -- :class:`arrays.PandasArray` has been renamed ``NumpyExtensionArray``; importing ``PandasArray`` still works until the next major version (:issue:`53694`) +- :class:`arrays.PandasArray` has been renamed ``NumpyExtensionArray`` and the attached dtype name changed from ``PandasDtype`` to ``NumpyEADtype``; importing ``PandasArray`` still works until the next major version (:issue:`53694`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.deprecations: