From e7eeee740f2a70b64b79935e40df36994325b4de Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Aug 2021 11:18:32 -0700 Subject: [PATCH 1/2] REF: ensure to_arrays returns matched-length arrays/columns --- pandas/core/frame.py | 2 ++ pandas/core/internals/construction.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 823de2133f0b3..c6b5803c3053a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2323,6 +2323,8 @@ def _from_arrays( manager = get_option("mode.data_manager") columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(columns) must match len(arrays)") mgr = arrays_to_mgr( arrays, columns, diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7f3d246a6fda6..638a21e3250cb 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -433,6 +433,7 @@ def dict_to_mgr( arrays.loc[missing] = [val] * missing.sum() arrays = list(arrays) + data_names = columns else: keys = list(data.keys()) @@ -738,6 +739,17 @@ def to_arrays( ) -> tuple[list[ArrayLike], Index]: """ Return list of arrays, columns. + + Returns + ------- + list[ArrayLike] + These will become columns in a DataFrame. + Index + This will become frame.columns. + + Notes + ----- + Ensures that len(result_arrays) == len(result_index). """ if isinstance(data, ABCDataFrame): # see test_from_records_with_index_data, test_from_records_bad_index_column @@ -783,6 +795,11 @@ def to_arrays( ) if columns is None: columns = ibase.default_index(len(data)) + elif len(columns) > len(data): + raise ValueError("len(columns) > len(data)") + elif len(columns) < len(data): + # doing this here is akin to a pre-emptive reindex + data = data[: len(columns)] return data, columns elif isinstance(data, np.ndarray) and data.dtype.names is not None: From 70c1e4582082e30b63ff52ad9643a257c169f870 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Aug 2021 12:01:50 -0700 Subject: [PATCH 2/2] ensure_index on data_names --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 638a21e3250cb..33568e4a72c4b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -433,7 +433,7 @@ def dict_to_mgr( arrays.loc[missing] = [val] * missing.sum() arrays = list(arrays) - data_names = columns + data_names = ensure_index(columns) else: keys = list(data.keys())