From 9fe592971f9fa81a30e1c7cb04772a456d29d35c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 13:55:44 -0700 Subject: [PATCH 1/2] REF: move Block._can_hold_element logic to can_hold_element for AM compat --- pandas/core/dtypes/cast.py | 54 +++++++++++++++++++-- pandas/core/indexes/base.py | 2 +- pandas/core/internals/blocks.py | 71 +++++++++------------------- pandas/tests/extension/test_numpy.py | 13 ++++- 4 files changed, 85 insertions(+), 55 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7a2175a364a8a..e3437b0f99241 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -113,6 +113,9 @@ from pandas.core.arrays import ( DatetimeArray, ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, ) _int8_max = np.iinfo(np.int8).max @@ -2167,24 +2170,51 @@ def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: raise ValueError(f"Cannot assign {type(value).__name__} to bool series") -def can_hold_element(dtype: np.dtype, element: Any) -> bool: +def can_hold_element(arr: ArrayLike, element: Any) -> bool: """ Can we do an inplace setitem with this element in an array with this dtype? Parameters ---------- - dtype : np.dtype + arr : np.ndarray or ExtensionArray element : Any Returns ------- bool """ + dtype = arr.dtype + if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: + if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): + # np.dtype here catches datetime64ns and timedelta64ns; we assume + # in this case that we have DatetimeArray/TimedeltaArray + arr = cast( + "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr + ) + try: + arr._validate_setitem_value(element) + return True + except (ValueError, TypeError): + return False + + # This is technically incorrect, but maintains the behavior of + # ExtensionBlock._can_hold_element + return True + tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: if tipo is not None: - return tipo.kind in ["i", "u"] and dtype.itemsize >= tipo.itemsize + if tipo.kind not in ["i", "u"]: + # Anything other than integer we cannot hold + return False + elif dtype.itemsize < tipo.itemsize: + return False + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype; we can put this into an ndarray + # losslessly iff it has no NAs + return not element._mask.any() + return True # We have not inferred an integer from the dtype # check if we have a builtin int or a float equal to an int @@ -2192,7 +2222,16 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: elif dtype.kind == "f": if tipo is not None: - return tipo.kind in ["f", "i", "u"] + # TODO: itemsize check? + if tipo.kind not in ["f", "i", "u"]: + # Anything other than float/integer we cannot hold + return False + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype or FloatingDtype; + # we can put this into an ndarray losslessly iff it has no NAs + return not element._mask.any() + return True + return lib.is_integer(element) or lib.is_float(element) elif dtype.kind == "c": @@ -2210,4 +2249,11 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: elif dtype == object: return True + elif dtype.kind == "S": + # TODO: test tests.frame.methods.test_replace tests get here, + # need more targeted tests. xref phofl has a PR about this + if tipo is not None: + return tipo.kind == "S" and tipo.itemsize <= dtype.itemsize + return isinstance(element, bytes) and len(element) <= dtype.itemsize + raise NotImplementedError(dtype) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 094f4a67d2e61..b4a8aebd2d48d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4482,7 +4482,7 @@ def _validate_fill_value(self, value): TypeError If the value cannot be inserted into an array of this dtype. """ - if not can_hold_element(self.dtype, value): + if not can_hold_element(self._values, value): raise TypeError return value diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7d8dcb34ed582..c4720591c2c96 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -5,7 +5,6 @@ from typing import ( TYPE_CHECKING, Any, - Callable, List, Optional, Tuple, @@ -103,6 +102,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.base import PandasObject import pandas.core.common as com import pandas.core.computation.expressions as expressions @@ -123,7 +123,6 @@ Float64Index, Index, ) - from pandas.core.arrays._mixins import NDArrayBackedExtensionArray # comparison is faster than is_object_dtype _dtype_obj = np.dtype("object") @@ -608,9 +607,12 @@ def convert( """ return [self.copy()] if copy else [self] + @final def _can_hold_element(self, element: Any) -> bool: """ require the same dtype as ourselves """ - raise NotImplementedError("Implemented on subclasses") + element = extract_array(element, extract_numpy=True) + arr = ensure_wrapped_if_datetimelike(self.values) + return can_hold_element(arr, element) @final def should_store(self, value: ArrayLike) -> bool: @@ -1556,10 +1558,6 @@ def take_nd( return self.make_block_same_class(new_values, new_mgr_locs) - def _can_hold_element(self, element: Any) -> bool: - # TODO: We may need to think about pushing this onto the array. - return True - def _slice(self, slicer): """ Return a slice of my values. @@ -1708,24 +1706,7 @@ def _unstack(self, unstacker, fill_value, new_placement): return blocks, mask -class HybridMixin: - """ - Mixin for Blocks backed (maybe indirectly) by ExtensionArrays. - """ - - array_values: Callable - - def _can_hold_element(self, element: Any) -> bool: - values = self.array_values() - - try: - values._validate_setitem_value(element) - return True - except (ValueError, TypeError): - return False - - -class ObjectValuesExtensionBlock(HybridMixin, ExtensionBlock): +class ObjectValuesExtensionBlock(ExtensionBlock): """ Block providing backwards-compatibility for `.values`. @@ -1740,21 +1721,15 @@ class NumericBlock(Block): __slots__ = () is_numeric = True - def _can_hold_element(self, element: Any) -> bool: - element = extract_array(element, extract_numpy=True) - if isinstance(element, (IntegerArray, FloatingArray)): - if element._mask.any(): - return False - # error: Argument 1 to "can_hold_element" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" - return can_hold_element(self.dtype, element) # type: ignore[arg-type] - -class NDArrayBackedExtensionBlock(HybridMixin, Block): +class NDArrayBackedExtensionBlock(Block): """ Block backed by an NDArrayBackedExtensionArray """ + def array_values(self) -> NDArrayBackedExtensionArray: + return ensure_wrapped_if_datetimelike(self.values) + def internal_values(self): # Override to return DatetimeArray and TimedeltaArray return self.array_values() @@ -1783,7 +1758,6 @@ def putmask(self, mask, new) -> List[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs arr = self.array_values().reshape(self.shape) - arr = cast("NDArrayBackedExtensionArray", arr) arr.T.putmask(mask, new) return [self] @@ -1800,8 +1774,8 @@ def where(self, other, cond, errors="raise") -> List[Block]: # TODO(EA2D): reshape not needed with 2D EAs res_values = res_values.reshape(self.values.shape) - res_values = maybe_coerce_values(res_values) - nb = self.make_block_same_class(res_values) + coerced = maybe_coerce_values(res_values) + nb = self.make_block_same_class(coerced) return [nb] def diff(self, n: int, axis: int = 0) -> List[Block]: @@ -1828,15 +1802,15 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: values = self.array_values().reshape(self.shape) new_values = values - values.shift(n, axis=axis) - new_values = maybe_coerce_values(new_values) - return [self.make_block(new_values)] + coerced = maybe_coerce_values(new_values) + return [self.make_block(coerced)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs values = self.array_values().reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) - new_values = maybe_coerce_values(new_values) - return [self.make_block_same_class(new_values)] + coerced = maybe_coerce_values(new_values) + return [self.make_block_same_class(coerced)] def fillna( self, value, limit=None, inplace: bool = False, downcast=None @@ -1851,8 +1825,8 @@ def fillna( values = self.array_values() values = values if inplace else values.copy() new_values = values.fillna(value=value, limit=limit) - new_values = maybe_coerce_values(new_values) - return [self.make_block_same_class(values=new_values)] + coerced = maybe_coerce_values(new_values) + return [self.make_block_same_class(values=coerced)] class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): @@ -1886,13 +1860,15 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): is_numeric = False internal_values = Block.internal_values - _can_hold_element = DatetimeBlock._can_hold_element diff = DatetimeBlock.diff where = DatetimeBlock.where putmask = DatetimeLikeBlockMixin.putmask fillna = DatetimeLikeBlockMixin.fillna - array_values = ExtensionBlock.array_values + # error: Incompatible types in assignment (expression has type "Callable[[], + # ExtensionArray]", base class "NDArrayBackedExtensionBlock" defined the + # type as "Callable[[], Union[TimedeltaArray, DatetimeArray]]") + array_values = ExtensionBlock.array_values # type: ignore[assignment] @property def is_view(self) -> bool: @@ -1960,9 +1936,6 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: # split and convert the blocks return extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks]) - def _can_hold_element(self, element: Any) -> bool: - return True - class CategoricalBlock(ExtensionBlock): # this Block type is kept for backwards-compatibility diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 051871513a14e..e11e74f16030c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -18,6 +18,7 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.dtypes import ( ExtensionDtype, PandasDtype, @@ -27,7 +28,10 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.numpy_ import PandasArray -from pandas.core.internals import managers +from pandas.core.internals import ( + blocks, + managers, +) from pandas.tests.extension import base # TODO(ArrayManager) PandasArray @@ -45,6 +49,12 @@ def _extract_array_patched(obj): return obj +def _can_hold_element_patched(obj, element) -> bool: + if isinstance(element, PandasArray): + element = element.to_numpy() + return can_hold_element(obj, element) + + @pytest.fixture(params=["float", "object"]) def dtype(request): return PandasDtype(np.dtype(request.param)) @@ -70,6 +80,7 @@ def allow_in_pandas(monkeypatch): with monkeypatch.context() as m: m.setattr(PandasArray, "_typ", "extension") m.setattr(managers, "_extract_array", _extract_array_patched) + m.setattr(blocks, "can_hold_element", _can_hold_element_patched) yield From 7f8d9b766bcde2108150817e835b287831f80489 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 31 Mar 2021 12:29:10 -0700 Subject: [PATCH 2/2] remove ObjectValuesExtensionBlock --- pandas/core/internals/blocks.py | 20 ++------------------ pandas/core/internals/managers.py | 9 --------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2b34a94ae1cd4..944844dfbbb5b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -17,8 +17,6 @@ import numpy as np from pandas._libs import ( - Interval, - Period, Timestamp, algos as libalgos, internals as libinternals, @@ -628,8 +626,7 @@ def convert( def _can_hold_element(self, element: Any) -> bool: """ require the same dtype as ourselves """ element = extract_array(element, extract_numpy=True) - arr = ensure_wrapped_if_datetimelike(self.values) - return can_hold_element(arr, element) + return can_hold_element(self.values, element) @final def should_store(self, value: ArrayLike) -> bool: @@ -1547,7 +1544,7 @@ def setitem(self, indexer, value): be a compatible shape. """ if not self._can_hold_element(value): - # This is only relevant for DatetimeTZBlock, ObjectValuesExtensionBlock, + # This is only relevant for DatetimeTZBlock, PeriodDtype, IntervalDtype, # which has a non-trivial `_can_hold_element`. # https://github.com/pandas-dev/pandas/issues/24020 # Need a dedicated setitem until GH#24020 (type promotion in setitem @@ -1744,17 +1741,6 @@ def _unstack(self, unstacker, fill_value, new_placement): return blocks, mask -class ObjectValuesExtensionBlock(ExtensionBlock): - """ - Block providing backwards-compatibility for `.values`. - - Used by PeriodArray and IntervalArray to ensure that - Series[T].values is an ndarray of objects. - """ - - pass - - class NumericBlock(Block): __slots__ = () is_numeric = True @@ -2021,8 +2007,6 @@ def get_block_type(values, dtype: Optional[Dtype] = None): cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock - elif vtype is Interval or vtype is Period: - cls = ObjectValuesExtensionBlock elif isinstance(dtype, ExtensionDtype): # Note: need to be sure PandasArray is unwrapped before we get here cls = ExtensionBlock diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6681015856d6b..de0a5687aeb8b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -73,7 +73,6 @@ CategoricalBlock, DatetimeTZBlock, ExtensionBlock, - ObjectValuesExtensionBlock, ensure_block_shape, extend_blocks, get_block_type, @@ -1841,14 +1840,6 @@ def _form_blocks( blocks.extend(external_blocks) - if len(items_dict["ObjectValuesExtensionBlock"]): - external_blocks = [ - new_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2) - for i, array in items_dict["ObjectValuesExtensionBlock"] - ] - - blocks.extend(external_blocks) - if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])