diff --git a/pandas/_typing.py b/pandas/_typing.py index d6561176deb71..36441e620286d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -185,7 +185,7 @@ ] # internals -Manager = Union["ArrayManager", "BlockManager"] +Manager = Union["ArrayManager", "BlockManager", "SingleBlockManager"] SingleManager = Union["SingleArrayManager", "SingleBlockManager"] # indexing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 596993182a9c1..64ec0801b5d8a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -548,6 +548,7 @@ class DataFrame(NDFrame, OpsMixin): _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray) _accessors: set[str] = {"sparse"} _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) + _mgr: BlockManager | ArrayManager @property def _constructor(self) -> type[DataFrame]: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2c55f46ea3d62..cdf919923d21c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -24,6 +24,7 @@ Dtype, DtypeObj, Shape, + type_t, ) from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg @@ -80,10 +81,10 @@ # TODO: flexible with index=None and/or items=None -T = TypeVar("T", bound="BlockManager") +T = TypeVar("T", bound="BaseBlockManager") -class BlockManager(DataManager): +class BaseBlockManager(DataManager): """ Core internal data structure to implement DataFrame, Series, etc. @@ -145,59 +146,18 @@ class BlockManager(DataManager): _blknos: np.ndarray _blklocs: np.ndarray + blocks: tuple[Block, ...] + axes: list[Index] # Non-trivially faster than a property - ndim = 2 # overridden by SingleBlockManager + ndim: int - def __init__( - self, - blocks: Sequence[Block], - axes: Sequence[Index], - verify_integrity: bool = True, - ): - self.axes = [ensure_index(ax) for ax in axes] - self.blocks: tuple[Block, ...] = tuple(blocks) - - for block in blocks: - if self.ndim != block.ndim: - raise AssertionError( - f"Number of Block dimensions ({block.ndim}) must equal " - f"number of axes ({self.ndim})" - ) - - if verify_integrity: - self._verify_integrity() - - # Populate known_consolidate, blknos, and blklocs lazily - self._known_consolidated = False - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - self._blknos = None # type: ignore[assignment] - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - self._blklocs = None # type: ignore[assignment] + def __init__(self, blocks, axes, verify_integrity=True): + raise NotImplementedError @classmethod - def _simple_new(cls, blocks: tuple[Block, ...], axes: list[Index]): - """ - Fastpath constructor; does NO validation. - """ - obj = cls.__new__(cls) - obj.axes = axes - obj.blocks = blocks - - # Populate known_consolidate, blknos, and blklocs lazily - obj._known_consolidated = False - obj._blknos = None - obj._blklocs = None - return obj - - @classmethod - def from_blocks(cls, blocks: list[Block], axes: list[Index]): - """ - Constructor for BlockManager and SingleBlockManager with same signature. - """ - return cls(blocks, axes, verify_integrity=False) + def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T: + raise NotImplementedError @property def blknos(self): @@ -336,7 +296,7 @@ def __getstate__(self): return axes_array, block_values, block_items, extra_state def __setstate__(self, state): - def unpickle_block(values, mgr_locs, ndim: int): + def unpickle_block(values, mgr_locs, ndim: int) -> Block: # TODO(EA2D): ndim would be unnecessary with 2D EAs # older pickles may store e.g. DatetimeIndex instead of DatetimeArray values = extract_array(values, extract_numpy=True) @@ -466,12 +426,6 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: return type(self).from_blocks(result_blocks, [self.axes[0], index]) - def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: - """ - Apply array_op blockwise with another (aligned) BlockManager. - """ - return operate_blockwise(self, other, array_op) - def apply( self: T, f, @@ -539,12 +493,12 @@ def apply( return type(self).from_blocks(result_blocks, self.axes) def quantile( - self, + self: T, *, qs: Float64Index, axis: int = 0, interpolation="linear", - ) -> BlockManager: + ) -> T: """ Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and @@ -578,7 +532,7 @@ def quantile( return type(self)(blocks, new_axes) - def where(self, other, cond, align: bool, errors: str) -> BlockManager: + def where(self: T, other, cond, align: bool, errors: str) -> T: if align: align_keys = ["other", "cond"] else: @@ -593,7 +547,7 @@ def where(self, other, cond, align: bool, errors: str) -> BlockManager: errors=errors, ) - def setitem(self, indexer, value) -> BlockManager: + def setitem(self: T, indexer, value) -> T: return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): @@ -611,38 +565,38 @@ def putmask(self, mask, new, align: bool = True): new=new, ) - def diff(self, n: int, axis: int) -> BlockManager: + def diff(self: T, n: int, axis: int) -> T: axis = self._normalize_axis(axis) return self.apply("diff", n=n, axis=axis) - def interpolate(self, **kwargs) -> BlockManager: + def interpolate(self: T, **kwargs) -> T: return self.apply("interpolate", **kwargs) - def shift(self, periods: int, axis: int, fill_value) -> BlockManager: + def shift(self: T, periods: int, axis: int, fill_value) -> T: axis = self._normalize_axis(axis) if fill_value is lib.no_default: fill_value = None return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) - def fillna(self, value, limit, inplace: bool, downcast) -> BlockManager: + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: return self.apply( "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) - def downcast(self) -> BlockManager: + def downcast(self: T) -> T: return self.apply("downcast") - def astype(self, dtype, copy: bool = False, errors: str = "raise") -> BlockManager: + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: return self.apply("astype", dtype=dtype, copy=copy, errors=errors) def convert( - self, + self: T, copy: bool = True, datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> BlockManager: + ) -> T: return self.apply( "convert", copy=copy, @@ -651,7 +605,7 @@ def convert( timedelta=timedelta, ) - def replace(self, to_replace, value, inplace: bool, regex: bool) -> BlockManager: + def replace(self: T, to_replace, value, inplace: bool, regex: bool) -> T: assert np.ndim(value) == 0, value return self.apply( "replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex @@ -677,7 +631,7 @@ def replace_list( bm._consolidate_inplace() return bm - def to_native_types(self, **kwargs) -> BlockManager: + def to_native_types(self: T, **kwargs) -> T: """ Convert values to native types (strings / python objects) that are used in formatting (repr / csv). @@ -721,7 +675,7 @@ def is_view(self) -> bool: return False - def get_bool_data(self, copy: bool = False) -> BlockManager: + def get_bool_data(self: T, copy: bool = False) -> T: """ Select blocks that are bool-dtype and columns from object-dtype blocks that are all-bool. @@ -746,7 +700,7 @@ def get_bool_data(self, copy: bool = False) -> BlockManager: return self._combine(new_blocks, copy) - def get_numeric_data(self, copy: bool = False) -> BlockManager: + def get_numeric_data(self: T, copy: bool = False) -> T: """ Parameters ---------- @@ -779,21 +733,6 @@ def _combine( return type(self).from_blocks(new_blocks, axes) - def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: - assert isinstance(slobj, slice), type(slobj) - - if axis == 0: - new_blocks = self._slice_take_blocks_ax0(slobj) - elif axis == 1: - new_blocks = [blk.getitem_block_index(slobj) for blk in self.blocks] - else: - raise IndexError("Requested axis not found in manager") - - new_axes = list(self.axes) - new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - - return type(self)._simple_new(tuple(new_blocks), new_axes) - @property def nblocks(self) -> int: return len(self.blocks) @@ -1007,7 +946,7 @@ def fast_xs(self, loc: int) -> ArrayLike: return result - def consolidate(self) -> BlockManager: + def consolidate(self: T) -> T: """ Join together blocks having same dtype @@ -1030,19 +969,6 @@ def _consolidate_inplace(self) -> None: self._known_consolidated = True self._rebuild_blknos_and_blklocs() - def iget(self, i: int) -> SingleBlockManager: - """ - Return the data as a SingleBlockManager. - """ - block = self.blocks[self.blknos[i]] - values = block.iget(self.blklocs[i]) - - # shortcut for select a single-dim from a 2-dim BM - bp = BlockPlacement(slice(0, len(values))) - values = maybe_coerce_values(values) - nb = type(block)(values, placement=bp, ndim=1) - return SingleBlockManager(nb, self.axes[1]) - def iget_values(self, i: int) -> ArrayLike: """ Return the data for column i as the values (ndarray or ExtensionArray). @@ -1051,19 +977,6 @@ def iget_values(self, i: int) -> ArrayLike: values = block.iget(self.blklocs[i]) return values - def idelete(self, indexer) -> BlockManager: - """ - Delete selected locations, returning a new BlockManager. - """ - is_deleted = np.zeros(self.shape[0], dtype=np.bool_) - is_deleted[indexer] = True - taker = (~is_deleted).nonzero()[0] - - nbs = self._slice_take_blocks_ax0(taker, only_slice=True) - new_columns = self.items[~is_deleted] - axes = [new_columns, self.axes[1]] - return type(self)._simple_new(tuple(nbs), axes) - def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): """ Set new item in-place. Does not consolidate. Adds new Block if not @@ -1481,21 +1394,123 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: consolidate=False, ) - def _equal_values(self: T, other: T) -> bool: + +class BlockManager(BaseBlockManager): + """ + BaseBlockManager that holds 2D blocks. + """ + + ndim = 2 + + def __init__( + self, + blocks: Sequence[Block], + axes: Sequence[Index], + verify_integrity: bool = True, + ): + self.axes = [ensure_index(ax) for ax in axes] + self.blocks: tuple[Block, ...] = tuple(blocks) + + for block in blocks: + if self.ndim != block.ndim: + raise AssertionError( + f"Number of Block dimensions ({block.ndim}) must equal " + f"number of axes ({self.ndim})" + ) + + if verify_integrity: + self._verify_integrity() + + # Populate known_consolidate, blknos, and blklocs lazily + self._known_consolidated = False + # error: Incompatible types in assignment (expression has type "None", + # variable has type "ndarray") + self._blknos = None # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "None", + # variable has type "ndarray") + self._blklocs = None # type: ignore[assignment] + + @classmethod + def _simple_new(cls, blocks: tuple[Block, ...], axes: list[Index]): + """ + Fastpath constructor; does NO validation. + """ + obj = cls.__new__(cls) + obj.axes = axes + obj.blocks = blocks + + # Populate known_consolidate, blknos, and blklocs lazily + obj._known_consolidated = False + obj._blknos = None + obj._blklocs = None + return obj + + @classmethod + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager: + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + return cls(blocks, axes, verify_integrity=False) + + def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: + assert isinstance(slobj, slice), type(slobj) + + if axis == 0: + new_blocks = self._slice_take_blocks_ax0(slobj) + elif axis == 1: + new_blocks = [blk.getitem_block_index(slobj) for blk in self.blocks] + else: + raise IndexError("Requested axis not found in manager") + + new_axes = list(self.axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)._simple_new(tuple(new_blocks), new_axes) + + def iget(self, i: int) -> SingleBlockManager: + """ + Return the data as a SingleBlockManager. + """ + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) + + # shortcut for select a single-dim from a 2-dim BM + bp = BlockPlacement(slice(0, len(values))) + values = maybe_coerce_values(values) + nb = type(block)(values, placement=bp, ndim=1) + return SingleBlockManager(nb, self.axes[1]) + + def idelete(self, indexer) -> BlockManager: + """ + Delete selected locations, returning a new BlockManager. + """ + is_deleted = np.zeros(self.shape[0], dtype=np.bool_) + is_deleted[indexer] = True + taker = (~is_deleted).nonzero()[0] + + nbs = self._slice_take_blocks_ax0(taker, only_slice=True) + new_columns = self.items[~is_deleted] + axes = [new_columns, self.axes[1]] + return type(self)._simple_new(tuple(nbs), axes) + + # ---------------------------------------------------------------- + # Block-wise Operation + + def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: + """ + Apply array_op blockwise with another (aligned) BlockManager. + """ + return operate_blockwise(self, other, array_op) + + def _equal_values(self: BlockManager, other: BlockManager) -> bool: """ Used in .equals defined in base class. Only check the column values assuming shape and indexes have already been checked. """ - if self.ndim == 1: - # For SingleBlockManager (i.e.Series) - if other.ndim != 1: - return False - left = self.blocks[0].values - right = other.blocks[0].values - return array_equals(left, right) - return blockwise_all(self, other, array_equals) + # ---------------------------------------------------------------- + def unstack(self, unstacker, fill_value) -> BlockManager: """ Return a BlockManager with all blocks unstacked.. @@ -1534,7 +1549,7 @@ def unstack(self, unstacker, fill_value) -> BlockManager: return bm -class SingleBlockManager(BlockManager, SingleDataManager): +class SingleBlockManager(BaseBlockManager, SingleDataManager): """ manage a single block with """ ndim = 1 @@ -1687,6 +1702,18 @@ def set_values(self, values: ArrayLike): self.blocks[0].values = values self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) + def _equal_values(self: T, other: T) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + # For SingleBlockManager (i.e.Series) + if other.ndim != 1: + return False + left = self.blocks[0].values + right = other.blocks[0].values + return array_equals(left, right) + # -------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/series.py b/pandas/core/series.py index 8e9df114feda0..968ab27d6d58c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1107,12 +1107,8 @@ def _set_labels(self, key, value): def _set_values(self, key, value): if isinstance(key, Series): key = key._values - # error: Incompatible types in assignment (expression has type "Union[Any, - # BlockManager]", variable has type "Union[SingleArrayManager, - # SingleBlockManager]") - self._mgr = self._mgr.setitem( # type: ignore[assignment] - indexer=key, value=value - ) + + self._mgr = self._mgr.setitem(indexer=key, value=value) self._maybe_update_cacher() def _set_value(self, label, value, takeable: bool = False):