diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f86d8755acb22..f3889ff360aa8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -42,7 +42,6 @@ from pandas.core.dtypes.common import ( DT64NS_DTYPE, TD64NS_DTYPE, - is_categorical_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -53,7 +52,10 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCMultiIndex, +) from pandas.core.dtypes.missing import isna from pandas.core import nanops @@ -970,7 +972,7 @@ def sequence_to_td64ns( elif not isinstance(data, (np.ndarray, ExtensionArray)): # GH#24539 e.g. xarray, dask object data = np.asarray(data) - elif is_categorical_dtype(data.dtype): + elif isinstance(data, ABCCategorical): data = data.categories.take(data.codes, fill_value=NaT)._values copy = False diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ac16d2609eb13..3fd1ebaca19f0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -173,7 +173,7 @@ def _simple_new( obj._mgr_locs = placement return obj - def __init__(self, values, placement, ndim: int): + def __init__(self, values, placement: BlockPlacement, ndim: int): """ Parameters ---------- @@ -183,8 +183,10 @@ def __init__(self, values, placement, ndim: int): ndim : int 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame """ + assert isinstance(ndim, int) + assert isinstance(placement, BlockPlacement) self.ndim = ndim - self.mgr_locs = placement + self._mgr_locs = placement self.values = values @property @@ -263,14 +265,12 @@ def fill_value(self): return np.nan @property - def mgr_locs(self): + def mgr_locs(self) -> BlockPlacement: return self._mgr_locs @mgr_locs.setter - def mgr_locs(self, new_mgr_locs): - if not isinstance(new_mgr_locs, libinternals.BlockPlacement): - new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs) - + def mgr_locs(self, new_mgr_locs: BlockPlacement): + assert isinstance(new_mgr_locs, BlockPlacement) self._mgr_locs = new_mgr_locs @final @@ -289,7 +289,9 @@ def make_block(self, values, placement=None) -> Block: return new_block(values, placement=placement, ndim=self.ndim) @final - def make_block_same_class(self, values, placement=None) -> Block: + def make_block_same_class( + self, values, placement: Optional[BlockPlacement] = None + ) -> Block: """ Wrap given values in a block of same type as self. """ if placement is None: placement = self._mgr_locs @@ -1221,7 +1223,11 @@ def func(yvalues: np.ndarray) -> np.ndarray: return self._maybe_downcast(blocks, downcast) def take_nd( - self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_default + self, + indexer, + axis: int, + new_mgr_locs: Optional[BlockPlacement] = None, + fill_value=lib.no_default, ) -> Block: """ Take values according to indexer and return them as a block.bb @@ -1569,7 +1575,11 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs): return self.make_block(new_values) def take_nd( - self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default + self, + indexer, + axis: int = 0, + new_mgr_locs: Optional[BlockPlacement] = None, + fill_value=lib.no_default, ) -> Block: """ Take values according to indexer and return them as a block. @@ -2258,8 +2268,8 @@ def check_ndim(values, placement: BlockPlacement, ndim: int): def extract_pandas_array( - values: ArrayLike, dtype: Optional[DtypeObj], ndim: int -) -> Tuple[ArrayLike, Optional[DtypeObj]]: + values: Union[np.ndarray, ExtensionArray], dtype: Optional[DtypeObj], ndim: int +) -> Tuple[Union[np.ndarray, ExtensionArray], Optional[DtypeObj]]: """ Ensure that we don't allow PandasArray / PandasDtype in internals. """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0e502c08cb8f2..ea264da4c7b5f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -240,7 +240,8 @@ def make_empty(self: T, axes=None) -> T: assert isinstance(self, SingleBlockManager) # for mypy blk = self.blocks[0] arr = blk.values[:0] - nb = blk.make_block_same_class(arr, placement=slice(0, 0)) + bp = BlockPlacement(slice(0, 0)) + nb = blk.make_block_same_class(arr, placement=bp) blocks = [nb] else: blocks = [] @@ -786,7 +787,7 @@ def _combine( new_blocks: List[Block] = [] for b in blocks: b = b.copy(deep=copy) - b.mgr_locs = inv_indexer[b.mgr_locs.indexer] + b.mgr_locs = BlockPlacement(inv_indexer[b.mgr_locs.indexer]) new_blocks.append(b) axes = list(self.axes) @@ -1053,8 +1054,9 @@ def iget(self, i: int) -> SingleBlockManager: values = block.iget(self.blklocs[i]) # shortcut for select a single-dim from a 2-dim BM + bp = BlockPlacement(slice(0, len(values))) values = maybe_coerce_values(values) - nb = type(block)(values, placement=slice(0, len(values)), ndim=1) + nb = type(block)(values, placement=bp, ndim=1) return SingleBlockManager(nb, self.axes[1]) def iget_values(self, i: int) -> ArrayLike: @@ -1266,7 +1268,7 @@ def insert( else: new_mgr_locs = blk.mgr_locs.as_array.copy() new_mgr_locs[new_mgr_locs >= loc] += 1 - blk.mgr_locs = new_mgr_locs + blk.mgr_locs = BlockPlacement(new_mgr_locs) # Accessing public blklocs ensures the public versions are initialized if loc == self.blklocs.shape[0]: @@ -1415,11 +1417,12 @@ def _slice_take_blocks_ax0( # all(np.shares_memory(nb.values, blk.values) for nb in blocks) return blocks else: + bp = BlockPlacement(slice(0, sllen)) return [ blk.take_nd( slobj, axis=0, - new_mgr_locs=slice(0, sllen), + new_mgr_locs=bp, fill_value=fill_value, ) ] @@ -1456,7 +1459,7 @@ def _slice_take_blocks_ax0( # item. for mgr_loc in mgr_locs: newblk = blk.copy(deep=False) - newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1) + newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) blocks.append(newblk) else: @@ -1655,12 +1658,15 @@ def getitem_mgr(self, indexer) -> SingleBlockManager: # similar to get_slice, but not restricted to slice indexer blk = self._block array = blk._slice(indexer) - if array.ndim > blk.values.ndim: + if array.ndim > 1: # This will be caught by Series._get_values raise ValueError("dimension-expanding indexing not allowed") - block = blk.make_block_same_class(array, placement=slice(0, len(array))) - return type(self)(block, self.index[indexer]) + bp = BlockPlacement(slice(0, len(array))) + block = blk.make_block_same_class(array, placement=bp) + + new_idx = self.index[indexer] + return type(self)(block, new_idx) def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: assert isinstance(slobj, slice), type(slobj) @@ -1669,7 +1675,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: blk = self._block array = blk._slice(slobj) - block = blk.make_block_same_class(array, placement=slice(0, len(array))) + bp = BlockPlacement(slice(0, len(array))) + block = blk.make_block_same_class(array, placement=bp) new_index = self.index._getitem_slice(slobj) return type(self)(block, new_index) @@ -1733,7 +1740,7 @@ def set_values(self, values: ArrayLike): valid for the current Block/SingleBlockManager (length, dtype, etc). """ self.blocks[0].values = values - self.blocks[0]._mgr_locs = libinternals.BlockPlacement(slice(len(values))) + self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) # -------------------------------------------------------------------- @@ -1985,7 +1992,8 @@ def _merge_blocks( new_values = new_values[argsort] new_mgr_locs = new_mgr_locs[argsort] - return [new_block(new_values, placement=new_mgr_locs, ndim=2)] + bp = BlockPlacement(new_mgr_locs) + return [new_block(new_values, placement=bp, ndim=2)] # can't consolidate --> no merge return blocks diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 88e70723517e3..df5cd66060659 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -87,7 +87,7 @@ def _reset_block_mgr_locs(nbs: List[Block], locs): Reset mgr_locs to correspond to our original DataFrame. """ for nb in nbs: - nblocs = locs.as_array[nb.mgr_locs.indexer] + nblocs = locs[nb.mgr_locs.indexer] nb.mgr_locs = nblocs # Assertions are disabled for performance, but should hold: # assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape) diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 693d0645c9519..9360294f5a3f7 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._libs.internals import BlockPlacement + import pandas as pd from pandas.core.internals import BlockManager from pandas.core.internals.blocks import ExtensionBlock @@ -17,7 +19,8 @@ def df(): df1 = pd.DataFrame({"a": [1, 2, 3]}) blocks = df1._mgr.blocks values = np.arange(3, dtype="int64") - custom_block = CustomBlock(values, placement=slice(1, 2), ndim=2) + bp = BlockPlacement(slice(1, 2)) + custom_block = CustomBlock(values, placement=bp, ndim=2) blocks = blocks + (custom_block,) block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index]) return pd.DataFrame(block_manager) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a8c9a7a22ecdc..ba85ff1a044d6 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -327,8 +327,8 @@ def test_duplicate_ref_loc_failure(self): axes, blocks = tmp_mgr.axes, tmp_mgr.blocks - blocks[0].mgr_locs = np.array([0]) - blocks[1].mgr_locs = np.array([0]) + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([0])) # test trying to create block manager with overlapping ref locs @@ -338,8 +338,8 @@ def test_duplicate_ref_loc_failure(self): mgr = BlockManager(blocks, axes) mgr._rebuild_blknos_and_blklocs() - blocks[0].mgr_locs = np.array([0]) - blocks[1].mgr_locs = np.array([1]) + blocks[0].mgr_locs = BlockPlacement(np.array([0])) + blocks[1].mgr_locs = BlockPlacement(np.array([1])) mgr = BlockManager(blocks, axes) mgr.iget(1)