From dae089c8b4c2398ef0604a9675be8fe9cb619180 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 09:36:16 -0800 Subject: [PATCH] REF: consistent arguments for create_block_manager_from_blocks --- pandas/core/internals/construction.py | 32 +++++++++++++++++++++++---- pandas/core/internals/managers.py | 31 +++++++++----------------- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 63a437a91f6e4..5ec4f8623aa0f 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -296,6 +296,8 @@ def ndarray_to_mgr( ) values = values.T + _check_values_indices_shape_match(values, index, columns) + # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type @@ -317,15 +319,37 @@ def ndarray_to_mgr( else: datelike_vals = maybe_infer_to_datetimelike(values) datelike_vals = maybe_squeeze_dt64tz(datelike_vals) - block_values = [datelike_vals] + nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2) + block_values = [nb] else: - # error: List item 0 has incompatible type "Union[ExtensionArray, ndarray]"; - # expected "Block" - block_values = [maybe_squeeze_dt64tz(values)] # type: ignore[list-item] + new_values = maybe_squeeze_dt64tz(values) + nb = new_block(new_values, placement=slice(len(columns)), ndim=2) + block_values = [nb] + + if len(columns) == 0: + block_values = [] return create_block_manager_from_blocks(block_values, [columns, index]) +def _check_values_indices_shape_match( + values: np.ndarray, index: Index, columns: Index +) -> None: + """ + Check that the shape implied by our axes matches the actual shape of the + data. + """ + if values.shape[0] != len(columns): + # Could let this raise in Block constructor, but we get a more + # helpful exception message this way. + if values.shape[1] == 0: + raise ValueError("Empty data passed with indices specified.") + + passed = values.T.shape + implied = (len(index), len(columns)) + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + def maybe_squeeze_dt64tz(dta: ArrayLike) -> ArrayLike: """ If we have a tzaware DatetimeArray with shape (1, N), squeeze to (N,) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6bd3e37ae101e..1bbd253fc56c5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1726,30 +1726,19 @@ def set_values(self, values: ArrayLike): # Constructor Helpers -def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager: +def create_block_manager_from_blocks( + blocks: List[Block], axes: List[Index] +) -> BlockManager: try: - if len(blocks) == 1 and not isinstance(blocks[0], Block): - # if blocks[0] is of length 0, return empty blocks - if not len(blocks[0]): - blocks = [] - else: - # It's OK if a single block is passed as values, its placement - # is basically "all items", but if there're many, don't bother - # converting, it's an error anyway. - blocks = [ - new_block( - values=blocks[0], placement=slice(0, len(axes[0])), ndim=2 - ) - ] - mgr = BlockManager(blocks, axes) - mgr._consolidate_inplace() - return mgr - except ValueError as e: - blocks = [getattr(b, "values", b) for b in blocks] - tot_items = sum(b.shape[0] for b in blocks) - raise construction_error(tot_items, blocks[0].shape[1:], axes, e) + except ValueError as err: + arrays = [blk.values for blk in blocks] + tot_items = sum(arr.shape[0] for arr in arrays) + raise construction_error(tot_items, arrays[0].shape[1:], axes, err) + + mgr._consolidate_inplace() + return mgr # We define this here so we can override it in tests.extension.test_numpy