diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index 9d517fe55f808..e6ea2c642650d 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -14,13 +14,13 @@ from pandas._typing import Dtype from pandas.core.dtypes.common import is_datetime64tz_dtype -from pandas.core.dtypes.dtypes import PandasDtype -from pandas.core.dtypes.generic import ABCPandasArray from pandas.core.arrays import DatetimeArray from pandas.core.internals.blocks import ( Block, DatetimeTZBlock, + check_ndim, + extract_pandas_array, get_block_type, ) @@ -39,29 +39,28 @@ def make_block( - Block.make_block_same_class - Block.__init__ """ - if isinstance(values, ABCPandasArray): - # Ensure that we don't allow PandasArray / PandasDtype in internals. - # For now, blocks should be backed by ndarrays when possible. - values = values.to_numpy() - if ndim and ndim > 1: - # TODO(EA2D): special case not needed with 2D EAs - values = np.atleast_2d(values) - - if isinstance(dtype, PandasDtype): - dtype = dtype.numpy_dtype + values, dtype = extract_pandas_array(values, dtype, ndim) if klass is None: dtype = dtype or values.dtype klass = get_block_type(values, dtype) elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): - # TODO: This is no longer hit internally; does it need to be retained - # for e.g. pyarrow? + # pyarrow calls get here values = DatetimeArray._simple_new(values, dtype=dtype) if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) + ndim = _maybe_infer_ndim(values, placement, ndim) + check_ndim(values, placement, ndim) + return klass(values, ndim=ndim, placement=placement) + + +def _maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int: + """ + If `ndim` is not provided, infer it from placment and values. + """ if ndim is None: # GH#38134 Block constructor now assumes ndim is not None if not isinstance(values.dtype, np.dtype): @@ -71,5 +70,4 @@ def make_block( ndim = 2 else: ndim = values.ndim - - return klass(values, ndim=ndim, placement=placement) + return ndim diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a11ca0aa82b29..88c8122f17d34 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -7,6 +7,7 @@ Callable, List, Optional, + Tuple, Type, Union, cast, @@ -47,7 +48,6 @@ ) from pandas.core.dtypes.common import ( is_categorical_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_list_like, @@ -162,21 +162,10 @@ def __init__(self, values, placement, ndim: int): ndim : int 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame """ - # TODO(EA2D): ndim will be unnecessary with 2D EAs - self.ndim = self._check_ndim(values, ndim) + self.ndim = ndim self.mgr_locs = placement self.values = self._maybe_coerce_values(values) - if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values): - raise ValueError( - f"Wrong number of items passed {len(self.values)}, " - f"placement implies {len(self.mgr_locs)}" - ) - - elif self.is_extension and self.ndim == 2 and len(self.mgr_locs) != 1: - # TODO(EA2D): check unnecessary with 2D EAs - raise AssertionError("block.size != values.size") - @classmethod def _maybe_coerce_values(cls, values): """ @@ -192,43 +181,6 @@ def _maybe_coerce_values(cls, values): """ return values - def _check_ndim(self, values, ndim: int): - """ - ndim inference and validation. - - Infers ndim from 'values' if not provided to __init__. - Validates that values.ndim and ndim are consistent if and only if - the class variable '_validate_ndim' is True. - - Parameters - ---------- - values : array-like - ndim : int - - Returns - ------- - ndim : int - - Raises - ------ - ValueError : the number of dimensions do not match - """ - assert isinstance(ndim, int) # GH#38134 enforce this - - if self._validate_ndim: - if values.ndim != ndim: - raise ValueError( - "Wrong number of dimensions. " - f"values.ndim != ndim [{values.ndim} != {ndim}]" - ) - elif values.ndim > ndim: - # ExtensionBlock - raise ValueError( - "Wrong number of dimensions. " - f"values.ndim > ndim [{values.ndim} > {ndim}]" - ) - return ndim - @property def _holder(self): """ @@ -378,7 +330,7 @@ def getitem_block(self, slicer, new_mgr_locs=None) -> Block: new_values = self._slice(slicer) - if self._validate_ndim and new_values.ndim != self.ndim: + if new_values.ndim != self.values.ndim: raise ValueError("Only same dim slicing is allowed") return type(self)._simple_new(new_values, new_mgr_locs, self.ndim) @@ -2262,10 +2214,68 @@ def get_block_type(values, dtype: Optional[Dtype] = None): return cls -def new_block( - values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None -) -> Block: - # Ensure that we don't allow PandasArray / PandasDtype in internals. +def new_block(values, placement, *, ndim: int, klass=None) -> Block: + + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + values, _ = extract_pandas_array(values, None, ndim) + check_ndim(values, placement, ndim) + + if klass is None: + klass = get_block_type(values, values.dtype) + + return klass(values, ndim=ndim, placement=placement) + + +def check_ndim(values, placement: BlockPlacement, ndim: int): + """ + ndim inference and validation. + + Validates that values.ndim and ndim are consistent. + Validates that len(values) and len(placement) are consistent. + + Parameters + ---------- + values : array-like + placement : BlockPlacement + ndim : int + + Raises + ------ + ValueError : the number of dimensions do not match + """ + + if values.ndim > ndim: + # Check for both np.ndarray and ExtensionArray + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim > ndim [{values.ndim} > {ndim}]" + ) + + elif isinstance(values.dtype, np.dtype): + # TODO(EA2D): special case not needed with 2D EAs + if values.ndim != ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + if len(placement) != len(values): + raise ValueError( + f"Wrong number of items passed {len(values)}, " + f"placement implies {len(placement)}" + ) + elif ndim == 2 and len(placement) != 1: + # TODO(EA2D): special case unnecessary with 2D EAs + raise AssertionError("block.size != values.size") + + +def extract_pandas_array( + values: ArrayLike, dtype: Optional[DtypeObj], ndim: int +) -> Tuple[ArrayLike, Optional[DtypeObj]]: + """ + Ensure that we don't allow PandasArray / PandasDtype in internals. + """ # For now, blocks should be backed by ndarrays when possible. if isinstance(values, ABCPandasArray): values = values.to_numpy() @@ -2276,16 +2286,7 @@ def new_block( if isinstance(dtype, PandasDtype): dtype = dtype.numpy_dtype - if klass is None: - dtype = dtype or values.dtype - klass = get_block_type(values, dtype) - - elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype): - # TODO: This is no longer hit internally; does it need to be retained - # for e.g. pyarrow? - values = DatetimeArray._simple_new(values, dtype=dtype) - - return klass(values, ndim=ndim, placement=placement) + return values, dtype # ----------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8b08a5fd70537..1a2e8693b5097 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1610,6 +1610,9 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: blk = self._block array = blk._slice(slobj) + if array.ndim > blk.values.ndim: + # This will be caught by Series._get_values + raise ValueError("dimension-expanding indexing not allowed") block = blk.make_block_same_class(array, placement=slice(0, len(array))) return type(self)(block, self.index[slobj]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 7f0fed71ca5f1..880872bfa713a 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -75,7 +75,7 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): msgs = [] if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: - msgs.append(r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]") + msgs.append(r"Wrong number of dimensions. values.ndim > ndim \[3 > 1\]") if frame_or_series is Series or indexer_sli is tm.iloc: msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") if indexer_sli is tm.loc or ( diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 683006d9b3b9c..6681fa958c7ad 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1318,17 +1318,19 @@ def test_make_block_no_pandas_array(block_maker): assert result.dtype.kind in ["i", "u"] assert result.is_extension is False - # PandasArray, PandasDtype - result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) - assert result.dtype.kind in ["i", "u"] - assert result.is_extension is False - - # ndarray, PandasDtype - result = block_maker( - arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim - ) - assert result.dtype.kind in ["i", "u"] - assert result.is_extension is False + if block_maker is make_block: + # PandasArray, PandasDtype + result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False + + # new_block no longer taked dtype keyword + # ndarray, PandasDtype + result = block_maker( + arr.to_numpy(), slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim + ) + assert result.dtype.kind in ["i", "u"] + assert result.is_extension is False def test_single_block_manager_fastpath_deprecated():