From 9b38dc5377213c84340e83dd8efd92c6623b89aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 28 Mar 2020 07:53:35 -0700 Subject: [PATCH 1/3] TYP: require Index objects earlier in internals --- pandas/core/internals/__init__.py | 4 ---- pandas/core/internals/construction.py | 23 ++++++++++++--------- pandas/core/internals/managers.py | 29 +++++++++++++++++---------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index e70652b81c42f..bc45b7c74ecc1 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -18,8 +18,6 @@ BlockManager, SingleBlockManager, concatenate_block_managers, - create_block_manager_from_arrays, - create_block_manager_from_blocks, ) __all__ = [ @@ -40,6 +38,4 @@ "BlockManager", "SingleBlockManager", "concatenate_block_managers", - "create_block_manager_from_arrays", - "create_block_manager_from_blocks", ] diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 3e0fb8455884a..e49901ba55f64 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -29,7 +29,6 @@ ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, - ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex, ) @@ -44,7 +43,7 @@ get_objs_combined_axis, union_indexes, ) -from pandas.core.internals import ( +from pandas.core.internals.managers import ( create_block_manager_from_arrays, create_block_manager_from_blocks, ) @@ -53,12 +52,16 @@ # BlockManager Interface -def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrity=True): +def arrays_to_mgr( + arrays, arr_names, index, columns, dtype=None, verify_integrity: bool = True +): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ + arr_names = ensure_index(arr_names) + if verify_integrity: # figure out the index, if necessary if index is None: @@ -74,6 +77,8 @@ def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrit # from BlockManager perspective axes = [columns, index] + assert isinstance(columns, Index), type(columns) + assert isinstance(index, Index), type(index) return create_block_manager_from_arrays(arrays, arr_names, axes) @@ -163,7 +168,8 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): values = [values] if columns is None: - columns = list(range(len(values))) + columns = Index(range(len(values))) + return arrays_to_mgr(values, columns, index, columns, dtype=dtype) # by definition an array here @@ -209,6 +215,8 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): else: block_values = [values] + assert isinstance(columns, Index), type(columns) + assert isinstance(index, Index), type(index) return create_block_manager_from_blocks(block_values, [columns, index]) @@ -635,12 +643,7 @@ def sanitize_index(data, index: Index): if len(data) != len(index): raise ValueError("Length of values does not match length of index") - if isinstance(data, ABCIndexClass): - pass - elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)): - data = data._values - - elif isinstance(data, np.ndarray): + if isinstance(data, np.ndarray): # coerce datetimelike types if data.dtype.kind in ["M", "m"]: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dda932cafe73b..f2920d0f238c6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1673,7 +1673,7 @@ def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager": # Constructor Helpers -def create_block_manager_from_blocks(blocks, axes): +def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager: try: if len(blocks) == 1 and not isinstance(blocks[0], Block): # if blocks[0] is of length 0, return empty blocks @@ -1694,10 +1694,15 @@ def create_block_manager_from_blocks(blocks, axes): except ValueError as e: blocks = [getattr(b, "values", b) for b in blocks] tot_items = sum(b.shape[0] for b in blocks) - construction_error(tot_items, blocks[0].shape[1:], axes, e) + raise construction_error(tot_items, blocks[0].shape[1:], axes, e) -def create_block_manager_from_arrays(arrays, names, axes): +def create_block_manager_from_arrays( + arrays, names: Index, axes: List[Index] +) -> BlockManager: + assert isinstance(names, Index) + assert isinstance(axes, list) + assert all(isinstance(x, Index) for x in axes) try: blocks = form_blocks(arrays, names, axes) @@ -1705,7 +1710,7 @@ def create_block_manager_from_arrays(arrays, names, axes): mgr._consolidate_inplace() return mgr except ValueError as e: - construction_error(len(arrays), arrays[0].shape, axes, e) + raise construction_error(len(arrays), arrays[0].shape, axes, e) def construction_error(tot_items, block_shape, axes, e=None): @@ -1720,23 +1725,25 @@ def construction_error(tot_items, block_shape, axes, e=None): if len(implied) <= 2: implied = implied[::-1] + # We return the exception object instead of raising it so that we + # can raise it in the caller; mypy plays better with that if passed == implied and e is not None: - raise e + return e if block_shape[0] == 0: - raise ValueError("Empty data passed with indices specified.") - raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + return ValueError("Empty data passed with indices specified.") + return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") # ----------------------------------------------------------------------- -def form_blocks(arrays, names, axes): +def form_blocks(arrays, names: Index, axes) -> List[Block]: # put "leftover" items in float bucket, where else? # generalize? - items_dict = defaultdict(list) + items_dict: Dict[str, List] = defaultdict(list) extra_locs = [] - names_idx = ensure_index(names) + names_idx = names if names_idx.equals(axes[0]): names_indexer = np.arange(len(names_idx)) else: @@ -1754,7 +1761,7 @@ def form_blocks(arrays, names, axes): block_type = get_block_type(v) items_dict[block_type.__name__].append((i, k, v)) - blocks = [] + blocks: List[Block] = [] if len(items_dict["FloatBlock"]): float_blocks = _multi_blockify(items_dict["FloatBlock"]) blocks.extend(float_blocks) From 9ab29879edab0fc7f37f52720cecbe13a87c56da Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 29 Mar 2020 11:44:58 -0700 Subject: [PATCH 2/3] raise --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5dd303e5bff79..6b19c94c2c899 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -342,7 +342,7 @@ def _verify_integrity(self) -> None: tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: - construction_error(tot_items, block.shape[1:], self.axes) + raise construction_error(tot_items, block.shape[1:], self.axes) if len(self.items) != tot_items: raise AssertionError( "Number of manager items must equal union of " From 70573e87cf3155b8eee1bf6f3869ea3085981378 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 30 Mar 2020 18:17:16 -0700 Subject: [PATCH 3/3] typing.DefaultDict, fewer assertions --- pandas/core/internals/construction.py | 10 +++++----- pandas/core/internals/managers.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index e49901ba55f64..fc7da4155db36 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -3,6 +3,7 @@ constructors before passing them to a BlockManager. """ from collections import abc +from typing import Tuple import numpy as np import numpy.ma as ma @@ -73,12 +74,13 @@ def arrays_to_mgr( arrays = _homogenize(arrays, index, dtype) columns = ensure_index(columns) + else: + columns = ensure_index(columns) + index = ensure_index(index) # from BlockManager perspective axes = [columns, index] - assert isinstance(columns, Index), type(columns) - assert isinstance(index, Index), type(index) return create_block_manager_from_arrays(arrays, arr_names, axes) @@ -215,8 +217,6 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): else: block_values = [values] - assert isinstance(columns, Index), type(columns) - assert isinstance(index, Index), type(index) return create_block_manager_from_blocks(block_values, [columns, index]) @@ -424,7 +424,7 @@ def get_names_from_index(data): return index -def _get_axes(N, K, index, columns): +def _get_axes(N, K, index, columns) -> Tuple[Index, Index]: # helper to create the axes as indexes # return axes or defaults diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6b19c94c2c899..b9de97593c6e3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,7 +3,7 @@ import itertools import operator import re -from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np @@ -1724,7 +1724,7 @@ def construction_error(tot_items, block_shape, axes, e=None): def form_blocks(arrays, names: Index, axes) -> List[Block]: # put "leftover" items in float bucket, where else? # generalize? - items_dict: Dict[str, List] = defaultdict(list) + items_dict: DefaultDict[str, List] = defaultdict(list) extra_locs = [] names_idx = names