diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 823de2133f0b3..c6b5803c3053a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2323,6 +2323,8 @@ def _from_arrays( manager = get_option("mode.data_manager") columns = ensure_index(columns) + if len(columns) != len(arrays): + raise ValueError("len(columns) must match len(arrays)") mgr = arrays_to_mgr( arrays, columns, diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7f3d246a6fda6..33568e4a72c4b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -433,6 +433,7 @@ def dict_to_mgr( arrays.loc[missing] = [val] * missing.sum() arrays = list(arrays) + data_names = ensure_index(columns) else: keys = list(data.keys()) @@ -738,6 +739,17 @@ def to_arrays( ) -> tuple[list[ArrayLike], Index]: """ Return list of arrays, columns. + + Returns + ------- + list[ArrayLike] + These will become columns in a DataFrame. + Index + This will become frame.columns. + + Notes + ----- + Ensures that len(result_arrays) == len(result_index). """ if isinstance(data, ABCDataFrame): # see test_from_records_with_index_data, test_from_records_bad_index_column @@ -783,6 +795,11 @@ def to_arrays( ) if columns is None: columns = ibase.default_index(len(data)) + elif len(columns) > len(data): + raise ValueError("len(columns) > len(data)") + elif len(columns) < len(data): + # doing this here is akin to a pre-emptive reindex + data = data[: len(columns)] return data, columns elif isinstance(data, np.ndarray) and data.dtype.names is not None: