pandas-dev · jorisvandenbossche · Mar 3, 2021 · Feb 23, 2021 · Feb 23, 2021 · Feb 23, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -153,6 +153,8 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/frame/test_constructors.py --array-manager
+        pytest pandas/tests/frame/constructors/ --array-manager
         pytest pandas/tests/frame/test_reductions.py --array-manager
         pytest pandas/tests/reductions/ --array-manager
         pytest pandas/tests/generic/test_generic.py --array-manager

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -564,41 +564,59 @@ def __init__(
         if isinstance(data, DataFrame):
             data = data._mgr
 
-        if isinstance(data, (BlockManager, ArrayManager)):
-            if index is None and columns is None and dtype is None and copy is False:
-                # GH#33357 fastpath
-                NDFrame.__init__(self, data)
-                return
+        # first check if a Manager is passed without any other arguments
+        # -> use fastpath (without checking Manager type)
+        if (
+            index is None
+            and columns is None
+            and dtype is None
+            and copy is False
+            and isinstance(data, (BlockManager, ArrayManager))
+        ):
+            # GH#33357 fastpath
+            NDFrame.__init__(self, data)
+            return
 
+        manager = get_option("mode.data_manager")
+
+        if isinstance(data, (BlockManager, ArrayManager)):
             mgr = self._init_mgr(
                 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
             )
 
         elif isinstance(data, dict):
-            mgr = init_dict(data, index, columns, dtype=dtype)
+            mgr = init_dict(data, index, columns, dtype=dtype, typ=manager)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
-                mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
+                mgr = masked_rec_array_to_mgr(
+                    data, index, columns, dtype, copy, typ=manager
+                )
 
             # a masked array
             else:
                 data = sanitize_masked_array(data)
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         elif isinstance(data, (np.ndarray, Series, Index)):
             if data.dtype.names:
                 data_columns = list(data.dtype.names)
                 data = {k: data[k] for k in data_columns}
                 if columns is None:
                     columns = data_columns
-                mgr = init_dict(data, index, columns, dtype=dtype)
+                mgr = init_dict(data, index, columns, dtype=dtype, typ=manager)
             elif getattr(data, "name", None) is not None:
-                mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
+                mgr = init_dict(
+                    {data.name: data}, index, columns, dtype=dtype, typ=manager
+                )
             else:
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         # For data is list-like, or Iterable (will consume into list)
         elif is_list_like(data):
@@ -611,11 +629,15 @@ def __init__(
                     arrays, columns, index = nested_data_to_arrays(
                         data, columns, index, dtype
                     )
-                    mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
+                    mgr = arrays_to_mgr(
+                        arrays, columns, index, columns, dtype=dtype, typ=manager
+                    )
                 else:
-                    mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                    mgr = init_ndarray(
+                        data, index, columns, dtype=dtype, copy=copy, typ=manager
+                    )
             else:
-                mgr = init_dict({}, index, columns, dtype=dtype)
+                mgr = init_dict({}, index, columns, dtype=dtype, typ=manager)
         # For data is scalar
         else:
             if index is None or columns is None:
@@ -632,18 +654,19 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
+                mgr = arrays_to_mgr(
+                    values, columns, index, columns, dtype=None, typ=manager
+                )
             else:
                 values = construct_2d_arraylike_from_scalar(
                     data, len(index), len(columns), dtype, copy
                 )
 
                 mgr = init_ndarray(
-                    values, index, columns, dtype=values.dtype, copy=False
+                    values, index, columns, dtype=values.dtype, copy=False, typ=manager
                 )
 
         # ensure correct Manager type according to settings
-        manager = get_option("mode.data_manager")
         mgr = mgr_to_mgr(mgr, typ=manager)
 
         NDFrame.__init__(self, mgr)
@@ -1971,7 +1994,8 @@ def from_records(
             arr_columns = arr_columns.drop(arr_exclude)
             columns = columns.drop(exclude)
 
-        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns)
+        manager = get_option("mode.data_manager")
+        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns, typ=manager)
 
         return cls(mgr)
 
@@ -2178,13 +2202,15 @@ def _from_arrays(
         if dtype is not None:
             dtype = pandas_dtype(dtype)
 
+        manager = get_option("mode.data_manager")
         mgr = arrays_to_mgr(
             arrays,
             columns,
             index,
             columns,
             dtype=dtype,
             verify_integrity=verify_integrity,
+            typ=manager,
         )
         return cls(mgr)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -138,6 +138,7 @@
     ArrayManager,
     BlockManager,
 )
+from pandas.core.internals.construction import mgr_to_mgr
 from pandas.core.missing import find_valid_index
 from pandas.core.ops import align_method_FRAME
 from pandas.core.reshape.concat import concat
@@ -5752,6 +5753,8 @@ def _to_dict_of_blocks(self, copy: bool_t = True):
         Internal ONLY - only works for BlockManager
         """
         mgr = self._mgr
+        # convert to BlockManager if needed -> this way support ArrayManager as well
+        mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
         return {
             k: self._constructor(v).__finalize__(self)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -816,7 +816,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
 
         value = extract_array(value, extract_numpy=True)
         if value.ndim == 2:
-            value = value[0, :]
+            if value.shape[0] == 1:
+                value = value[0, :]
+            else:
+                raise ValueError(
+                    f"Expected a 1D array, got an array with shape {value.shape}"
+                )
+
         # TODO self.arrays can be empty
         # assert len(value) == len(self.arrays[0])
 

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -61,6 +61,7 @@
 )
 from pandas.core.arrays import Categorical
 from pandas.core.construction import (
+    ensure_wrapped_if_datetimelike,
     extract_array,
     sanitize_array,
 )
@@ -71,7 +72,9 @@
     get_objs_combined_axis,
     union_indexes,
 )
+from pandas.core.internals.array_manager import ArrayManager
 from pandas.core.internals.managers import (
+    BlockManager,
     create_block_manager_from_arrays,
     create_block_manager_from_blocks,
 )
@@ -90,6 +93,7 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
+    typ: Optional[str] = None,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -107,7 +111,8 @@ def arrays_to_mgr(
 
         # don't force copy because getting jammed in an ndarray anyway
         arrays = _homogenize(arrays, index, dtype)
-
+        if typ == "array":
+            arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
         columns = ensure_index(columns)
     else:
         columns = ensure_index(columns)
@@ -116,11 +121,16 @@ def arrays_to_mgr(
     # from BlockManager perspective
     axes = [columns, index]
 
-    return create_block_manager_from_arrays(arrays, arr_names, axes)
+    if typ == "block":
+        return create_block_manager_from_arrays(arrays, arr_names, axes)
+    elif typ == "array":
+        return ArrayManager(arrays, [index, columns])
+    else:
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
 
 
 def masked_rec_array_to_mgr(
-    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool
+    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
 ):
     """
     Extract from a masked rec array and create the manager.
@@ -154,7 +164,7 @@ def masked_rec_array_to_mgr(
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype)
+    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype, typ=typ)
 
     if copy:
         mgr = mgr.copy()
@@ -166,19 +176,14 @@ def mgr_to_mgr(mgr, typ: str):
     Convert to specific type of Manager. Does not copy if the type is already
     correct. Does not guarantee a copy otherwise.
     """
-    from pandas.core.internals import (
-        ArrayManager,
-        BlockManager,
-    )
-
     new_mgr: Manager
 
     if typ == "block":
         if isinstance(mgr, BlockManager):
             new_mgr = mgr
         else:
             new_mgr = arrays_to_mgr(
-                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], dtype=None
+                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], typ="block"
             )
     elif typ == "array":
         if isinstance(mgr, ArrayManager):
@@ -187,15 +192,17 @@ def mgr_to_mgr(mgr, typ: str):
             arrays = [mgr.iget_values(i).copy() for i in range(len(mgr.axes[0]))]
             new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]])
     else:
-        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{type}'")
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
     return new_mgr
 
 
 # ---------------------------------------------------------------------
 # DataFrame Constructor Interface
 
 
-def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
+def init_ndarray(
+    values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
+):
     # input must be a ndarray, list, Series, index
 
     if isinstance(values, ABCSeries):
@@ -224,7 +231,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
         if columns is None:
             columns = Index(range(len(values)))
 
-        return arrays_to_mgr(values, columns, index, columns, dtype=dtype)
+        return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
 
     # by definition an array here
     # the dtypes will be coerced to a single dtype
@@ -277,7 +284,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     return create_block_manager_from_blocks(block_values, [columns, index])
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
+def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str):
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.
@@ -321,7 +328,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         arrays = [
             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
         ]
-    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
+    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ)
 
 
 def nested_data_to_arrays(
@@ -415,6 +422,11 @@ def _homogenize(data, index: Index, dtype: Optional[DtypeObj]):
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
+            # TODO extract_array should be preferred, but that gives failures for
+            # `extension/test_numpy.py` (extract_array will convert numpy arrays
+            # to PandasArray), see https://github.com/pandas-dev/pandas/issues/40021
+            # val = extract_array(val, extract_numpy=True)
+            val = val._values
         else:
             if isinstance(val, dict):
                 if oindex is None:

diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
@@ -6,6 +6,7 @@
 import pytz
 
 from pandas.compat import is_platform_little_endian
+import pandas.util._test_decorators as td
 
 from pandas import (
     CategoricalIndex,
@@ -118,6 +119,8 @@ def test_from_records_sequencelike(self):
         tm.assert_series_equal(result["C"], df["C"])
         tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
+    def test_from_records_sequencelike_empty(self):
         # empty case
         result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
         assert len(result) == 0
@@ -184,7 +187,12 @@ def test_from_records_bad_index_column(self):
         tm.assert_index_equal(df1.index, Index(df.C))
 
         # should fail
-        msg = r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 10 vs 1",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(df, index=[2])
         with pytest.raises(KeyError, match=r"^2$"):
@@ -208,6 +216,7 @@ def __iter__(self):
         expected = DataFrame.from_records(tups)
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_len0_with_columns(self):
         # GH#2633
         result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
@@ -259,7 +268,12 @@ def test_from_records_to_records(self):
         tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
 
         # wrong length
-        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 2 vs 1",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(arr, index=index[:-1])
 
@@ -386,6 +400,7 @@ def create_dict(order_id):
         result = DataFrame.from_records(documents, index=["order_id", "quantity"])
         assert result.index.names == ("order_id", "quantity")
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_misc_brokenness(self):
         # GH#2179
 
@@ -424,6 +439,7 @@ def test_from_records_misc_brokenness(self):
         )
         tm.assert_series_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_empty(self):
         # GH#3562
         result = DataFrame.from_records([], columns=["a", "b", "c"])