From 6cd1c4dbbe1dfe742f65625b4a8ff7c75d034f06 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Feb 2021 12:11:41 +0100
Subject: [PATCH 01/21] [ArrayManager] Implement concat with reindexing

---
 .github/workflows/ci.yml                      |   1 +
 pandas/core/dtypes/concat.py                  | 160 +++++++++++++++++-
 pandas/core/internals/array_manager.py        |  26 ++-
 pandas/core/internals/concat.py               |  55 ++++--
 pandas/tests/frame/methods/test_append.py     |  21 ++-
 pandas/tests/frame/methods/test_drop.py       |   2 +-
 pandas/tests/frame/methods/test_explode.py    |   2 +-
 pandas/tests/frame/methods/test_join.py       |   9 +-
 pandas/tests/io/formats/test_printing.py      |   2 +-
 pandas/tests/io/test_fsspec.py                |   2 +-
 pandas/tests/reshape/concat/test_append.py    |   6 +
 pandas/tests/reshape/concat/test_concat.py    |   6 +-
 pandas/tests/reshape/concat/test_datetimes.py |   6 +
 13 files changed, 256 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b551e7ded0178..341ed8ef75cdd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -157,3 +157,4 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/reshape/concat/ --array-manager
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 5b46bee96d4b3..1afc97b0adc78 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -5,14 +5,19 @@
 
 import numpy as np
 
+from pandas._libs import NaT, lib
 from pandas._typing import ArrayLike, DtypeObj
 
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
+    is_bool_dtype,
     is_categorical_dtype,
+    is_datetime64_ns_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
+    is_integer_dtype,
     is_sparse,
+    is_timedelta64_ns_dtype,
 )
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries
 
@@ -21,11 +26,78 @@
 from pandas.core.construction import array, ensure_wrapped_if_datetimelike
 
 
+class NullArrayProxy:
+    """
+    Proxy object for an all-NA array.
+
+    Only stores the length of the array, and not the dtype. The dtype
+    will only be known when actually concatenating (after determining the
+    common dtype, for which this proxy is ignored).
+    Using this object avoids that the internals/concat.py needs to determine
+    the proper dtype and array type.
+    """
+
+    ndim = 1
+
+    def __init__(self, n: int):
+        self.n = n
+
+    @property
+    def shape(self):
+        return (self.n,)
+
+
+def _array_from_proxy(arr, dtype: DtypeObj, fill_value=lib.no_default):
+    """
+    Helper function to create the actual all-NA array from the NullArrayProxy object.
+
+    Parameters
+    ----------
+    arr : NullArrayProxy
+    dtype : the dtype for the resulting array
+    fill_value : scalar NA-like value
+        By default uses the ExtensionDtype's na_value or np.nan. For numpy
+        arrays, this can be overridden to be something else (eg None).
+
+    Returns
+    -------
+    np.ndarray or ExtensionArray
+    """
+    if is_extension_array_dtype(dtype):
+        return dtype.construct_array_type()._from_sequence(
+            [dtype.na_value] * arr.n, dtype=dtype
+        )
+    elif is_datetime64_ns_dtype(dtype):
+        from pandas.core.arrays import DatetimeArray
+
+        return DatetimeArray._from_sequence([NaT] * arr.n, dtype=dtype)
+    elif is_timedelta64_ns_dtype(dtype):
+        from pandas.core.arrays import TimedeltaArray
+
+        return TimedeltaArray._from_sequence([NaT] * arr.n, dtype=dtype)
+    else:
+        if is_integer_dtype(dtype):
+            dtype = "float64"
+            fill_value = np.nan
+        elif is_bool_dtype(dtype):
+            dtype = object
+
+        if fill_value is lib.no_default:
+            fill_value = np.nan
+
+        arr = np.empty(arr.n, dtype=dtype)
+        arr.fill(fill_value)
+        return arr
+
+
 def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     """
     Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
+    if isinstance(arr, NullArrayProxy):
+        return _array_from_proxy(arr, dtype)
+
     if (
         is_categorical_dtype(arr.dtype)
         and isinstance(dtype, np.dtype)
@@ -132,6 +204,75 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
+def concat_arrays(to_concat):
+    """
+    Alternative for concat_compat but specialized for use in the ArrayManager.
+
+    Differences: only deals with 1D arrays (no axis keyword) and does not skip
+    empty arrays to determine the dtype.
+    In addition ensures that all NullArrayProxies get replaced with actual
+    arrays.
+
+    Parameters
+    ----------
+    to_concat : list of arrays
+
+    Returns
+    -------
+    np.ndarray or ExtensionArray
+    """
+    # ignore the all-NA proxies to determine the resulting dtype
+    to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
+
+    kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
+    single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
+    any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat_no_proxy)
+
+    if any_ea:
+        if not single_dtype:
+            target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
+            to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
+        else:
+            target_dtype = to_concat_no_proxy[0].dtype
+            to_concat = [
+                _array_from_proxy(arr, target_dtype)
+                if isinstance(arr, NullArrayProxy)
+                else arr
+                for arr in to_concat
+            ]
+
+        if isinstance(to_concat[0], ExtensionArray):
+            cls = type(to_concat[0])
+            return cls._concat_same_type(to_concat)
+        else:
+            return np.concatenate(to_concat)
+
+    elif any(kind in ["m", "M"] for kind in kinds):
+        return _concat_datetime(to_concat)
+
+    if not single_dtype:
+        target_dtype = np.find_common_type(
+            [arr.dtype for arr in to_concat_no_proxy], []
+        )
+    else:
+        target_dtype = to_concat_no_proxy[0].dtype
+    to_concat = [
+        _array_from_proxy(arr, target_dtype) if isinstance(arr, NullArrayProxy) else arr
+        for arr in to_concat
+    ]
+
+    result = np.concatenate(to_concat)
+
+    # TODO(ArrayManager) this is currently inconsistent between Series and DataFrame
+    # so we should decide whether to keep the below special case or remove it
+    if len(result) == 0:
+        # all empties -> check for bool to not coerce to float
+        if len(kinds) != 1:
+            if "b" in kinds:
+                result = result.astype(object)
+    return result
+
+
 def union_categoricals(
     to_union, sort_categories: bool = False, ignore_order: bool = False
 ):
@@ -322,20 +463,35 @@ def _concat_datetime(to_concat, axis=0):
     a single array, preserving the combined dtypes
     """
     to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
+    to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
 
-    single_dtype = len({x.dtype for x in to_concat}) == 1
+    single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
 
     # multiple types, need to coerce to object
     if not single_dtype:
         # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
         #  in Timestamp/Timedelta
+        to_concat = [
+            _array_from_proxy(arr, dtype=object, fill_value=None)
+            if isinstance(arr, NullArrayProxy)
+            else arr
+            for arr in to_concat
+        ]
+
         return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
 
     if axis == 1:
         # TODO(EA2D): kludge not necessary with 2D EAs
         to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
+    else:
+        to_concat = [
+            _array_from_proxy(arr, dtype=to_concat_no_proxy[0].dtype)
+            if isinstance(arr, NullArrayProxy)
+            else arr
+            for arr in to_concat
+        ]
 
-    result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
+    result = type(to_concat_no_proxy[0])._concat_same_type(to_concat, axis=axis)
 
     if result.ndim == 2 and is_extension_array_dtype(result.dtype):
         # TODO(EA2D): kludge not necessary with 2D EAs
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 0f677ff3180be..10648057b28a7 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -18,6 +18,7 @@
     is_extension_array_dtype,
     is_numeric_dtype,
 )
+from pandas.core.dtypes.concat import NullArrayProxy
 from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -725,10 +726,20 @@ def reindex_indexer(
         # ignored keywords
         consolidate: bool = True,
         only_slice: bool = False,
+        # ArrayManager specific keywords
+        do_integrity_check=True,
+        use_na_proxy=False,
     ) -> T:
         axis = self._normalize_axis(axis)
         return self._reindex_indexer(
-            new_axis, indexer, axis, fill_value, allow_dups, copy
+            new_axis,
+            indexer,
+            axis,
+            fill_value,
+            allow_dups,
+            copy,
+            do_integrity_check,
+            use_na_proxy,
         )
 
     def _reindex_indexer(
@@ -739,6 +750,8 @@ def _reindex_indexer(
         fill_value=None,
         allow_dups: bool = False,
         copy: bool = True,
+        do_integrity_check=True,
+        use_na_proxy=False,
     ) -> T:
         """
         Parameters
@@ -773,7 +786,9 @@ def _reindex_indexer(
             new_arrays = []
             for i in indexer:
                 if i == -1:
-                    arr = self._make_na_array(fill_value=fill_value)
+                    arr = self._make_na_array(
+                        fill_value=fill_value, use_na_proxy=use_na_proxy
+                    )
                 else:
                     arr = self.arrays[i]
                 new_arrays.append(arr)
@@ -793,7 +808,7 @@ def _reindex_indexer(
         new_axes = list(self._axes)
         new_axes[axis] = new_axis
 
-        return type(self)(new_arrays, new_axes)
+        return type(self)(new_arrays, new_axes, do_integrity_check=do_integrity_check)
 
     def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
         """
@@ -820,10 +835,11 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
             new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
         )
 
-    def _make_na_array(self, fill_value=None):
+    def _make_na_array(self, fill_value=None, use_na_proxy=False):
+        if use_na_proxy:
+            return NullArrayProxy(self.shape_proper[0])
         if fill_value is None:
             fill_value = np.nan
-
         dtype, fill_value = infer_dtype_from_scalar(fill_value)
         values = np.empty(self.shape_proper[0], dtype=dtype)
         values.fill(fill_value)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 3dcfa85ed5c08..51732505c1bc0 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -23,7 +23,7 @@
     is_sparse,
     is_timedelta64_dtype,
 )
-from pandas.core.dtypes.concat import concat_compat
+from pandas.core.dtypes.concat import concat_arrays, concat_compat
 from pandas.core.dtypes.missing import isna_all
 
 import pandas.core.algorithms as algos
@@ -37,6 +37,45 @@
     from pandas.core.arrays.sparse.dtype import SparseDtype
 
 
+def concatenate_array_managers(
+    mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool
+) -> Manager:
+    """
+    Concatenate array managers into one.
+
+    Parameters
+    ----------
+    mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
+    axes : list of Index
+    concat_axis : int
+    copy : bool
+
+    Returns
+    -------
+    ArrayManager
+    """
+    # reindex all arrays
+    mgrs = []
+    for mgr, indexers in mgrs_indexers:
+        for ax, indexer in indexers.items():
+            mgr = mgr.reindex_indexer(
+                axes[ax], indexer, axis=ax, do_integrity_check=False, use_na_proxy=True
+            )
+        mgrs.append(mgr)
+
+    # concatting along the rows -> concat the reindexed arrays
+    if concat_axis == 1:
+        arrays = [
+            concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))])
+            for j in range(len(mgrs[0].arrays))
+        ]
+        return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False)
+    # concatting along the columns -> combine reindexed arrays in a single manager
+    elif concat_axis == 0:
+        arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
+        return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False)
+
+
 def concatenate_block_managers(
     mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool
 ) -> Manager:
@@ -55,19 +94,7 @@ def concatenate_block_managers(
     BlockManager
     """
     if isinstance(mgrs_indexers[0][0], ArrayManager):
-
-        if concat_axis == 1:
-            # TODO for now only fastpath without indexers
-            mgrs = [t[0] for t in mgrs_indexers]
-            arrays = [
-                concat_compat([mgrs[i].arrays[j] for i in range(len(mgrs))], axis=0)
-                for j in range(len(mgrs[0].arrays))
-            ]
-            return ArrayManager(arrays, [axes[1], axes[0]])
-        elif concat_axis == 0:
-            mgrs = [t[0] for t in mgrs_indexers]
-            arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
-            return ArrayManager(arrays, [axes[1], axes[0]])
+        return concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)
 
     concat_plans = [
         _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
index 36c875b8abe6f..5d94bc63a5365 100644
--- a/pandas/tests/frame/methods/test_append.py
+++ b/pandas/tests/frame/methods/test_append.py
@@ -1,15 +1,10 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import DataFrame, Series, Timestamp, date_range, timedelta_range
 import pandas._testing as tm
 
-# TODO td.skip_array_manager_not_yet_implemented
-# appending with reindexing not yet working
-
 
 class TestDataFrameAppend:
     def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series):
@@ -37,7 +32,6 @@ def test_append_empty_list(self):
         tm.assert_frame_equal(result, expected)
         assert result is not df  # .append() should return a new object
 
-    @td.skip_array_manager_not_yet_implemented
     def test_append_series_dict(self):
         df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
 
@@ -78,7 +72,6 @@ def test_append_series_dict(self):
         expected = df.append(df[-1:], ignore_index=True)
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     def test_append_list_of_series_dicts(self):
         df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
 
@@ -97,7 +90,6 @@ def test_append_list_of_series_dicts(self):
         expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     def test_append_missing_cols(self):
         # GH22252
         # exercise the conditional branch in append method where the data
@@ -142,8 +134,7 @@ def test_append_empty_dataframe(self):
         expected = df1.copy()
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
-    def test_append_dtypes(self):
+    def test_append_dtypes(self, using_array_manager):
 
         # GH 5754
         # row appends of different dtypes (so need to do by-item)
@@ -167,6 +158,9 @@ def test_append_dtypes(self):
         expected = DataFrame(
             {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
         )
+        if using_array_manager:
+            # With ArrayManager, all-NaN float is not ignored
+            expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
@@ -175,6 +169,9 @@ def test_append_dtypes(self):
         expected = DataFrame(
             {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
         )
+        if using_array_manager:
+            # With ArrayManager, all-NaN float is not ignored
+            expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": np.nan}, index=range(1))
@@ -183,6 +180,9 @@ def test_append_dtypes(self):
         expected = DataFrame(
             {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
         )
+        if using_array_manager:
+            # With ArrayManager, all-NaN float is not ignored
+            expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
@@ -202,7 +202,6 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
         expected = Series(Timestamp(timestamp, tz=tz), name=0)
         tm.assert_series_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize(
         "data, dtype",
         [
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
index 4568cda24d5cf..2af5c8dd29842 100644
--- a/pandas/tests/frame/methods/test_drop.py
+++ b/pandas/tests/frame/methods/test_drop.py
@@ -155,7 +155,7 @@ def test_drop(self):
         assert return_value is None
         tm.assert_frame_equal(df, expected)
 
-    @td.skip_array_manager_not_yet_implemented
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
     def test_drop_multiindex_not_lexsorted(self):
         # GH#11640
 
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
index be80dd49ff1fb..c5a4fcc3cc3c2 100644
--- a/pandas/tests/frame/methods/test_explode.py
+++ b/pandas/tests/frame/methods/test_explode.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pandas._testing as tm
 
-# TODO(ArrayManager) concat with reindexing
+# TODO(ArrayManager) concat reindex with duplicates
 pytestmark = td.skip_array_manager_not_yet_implemented
 
 
diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py
index 42694dc3ff37c..20658a3dcf0b2 100644
--- a/pandas/tests/frame/methods/test_join.py
+++ b/pandas/tests/frame/methods/test_join.py
@@ -9,9 +9,6 @@
 from pandas import DataFrame, Index, MultiIndex, date_range, period_range
 import pandas._testing as tm
 
-# TODO(ArrayManager) concat with reindexing
-pytestmark = td.skip_array_manager_not_yet_implemented
-
 
 @pytest.fixture
 def frame_with_period_index():
@@ -183,6 +180,7 @@ def test_join_period_index(frame_with_period_index):
     tm.assert_frame_equal(joined, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat with duplicates
 def test_join_left_sequence_non_unique_index():
     # https://github.com/pandas-dev/pandas/issues/19607
     df1 = DataFrame({"a": [0, 10, 20]}, index=[1, 2, 3])
@@ -234,8 +232,9 @@ def test_join(self, multiindex_dataframe_random_data):
         b = frame.loc[frame.index[2:], ["B", "C"]]
 
         joined = a.join(b, how="outer").reindex(frame.index)
-        expected = frame.copy()
-        expected.values[np.isnan(joined.values)] = np.nan
+        expected = frame.copy().values
+        expected[np.isnan(joined.values)] = np.nan
+        expected = DataFrame(expected, index=frame.index, columns=frame.columns)
 
         assert not np.isnan(joined.values).all()
 
diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py
index 2339e21288bb5..24d1973eeda6d 100644
--- a/pandas/tests/io/formats/test_printing.py
+++ b/pandas/tests/io/formats/test_printing.py
@@ -121,7 +121,7 @@ def test_ambiguous_width(self):
         assert adjoined == expected
 
 
-@td.skip_array_manager_not_yet_implemented
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) JSON
 class TestTableSchemaRepr:
     @classmethod
     def setup_class(cls):
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index d9575a6ad81e5..3131131682ccd 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -247,7 +247,7 @@ def test_pickle_options(fsspectest):
     tm.assert_frame_equal(df, out)
 
 
-@td.skip_array_manager_not_yet_implemented
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) JSON
 def test_json_options(fsspectest):
     df = DataFrame({"a": [0]})
     df.to_json("testmem://afile", storage_options={"test": "json_write"})
diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
index dd6dbd79113e5..d4e16b0a9196c 100644
--- a/pandas/tests/reshape/concat/test_append.py
+++ b/pandas/tests/reshape/concat/test_append.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import DataFrame, Index, Series, Timestamp, concat, isna
 import pandas._testing as tm
@@ -331,6 +333,10 @@ def test_append_missing_column_proper_upcast(self, sort):
         assert appended["A"].dtype == "f8"
         assert appended["B"].dtype == "O"
 
+    # TODO(ArrayManager) DataFrame.append reindexes a Series itself (giving
+    # float dtype) -> delay reindexing until concat_array_managers which properly
+    # takes care of all-null dtype inference
+    @td.skip_array_manager_not_yet_implemented
     def test_append_empty_frame_to_series_with_dateutil_tz(self):
         # GH 23682
         date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index 575903de8f946..f755ec3f76f39 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
 import pandas._testing as tm
@@ -14,7 +16,9 @@
 
 
 class TestConcatenate:
-    def test_concat_copy(self):
+    # TODO(ArrayManager) using block internals to verify, needs rewrite
+    @td.skip_array_manager_invalid_test
+    def test_concat_copy(self, using_array_manager):
         df = DataFrame(np.random.randn(4, 3))
         df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
         df3 = DataFrame({5: "foo"}, index=range(4))
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 92181e7dffc50..7e2522b6dcf36 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -32,6 +34,9 @@ def test_concat_datetime64_block(self):
         assert (result.iloc[:10]["time"] == rng).all()
         assert (result.iloc[10:]["time"] == rng).all()
 
+    # TODO(ArrayManager) concat with mixed managers
+    # (or, fix DataFrame.from_records to honor option)
+    @td.skip_array_manager_not_yet_implemented
     def test_concat_datetime_datetime64_frame(self):
         # GH#2624
         rows = []
@@ -46,6 +51,7 @@ def test_concat_datetime_datetime64_frame(self):
         # it works!
         pd.concat([df1, df2_obj])
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
     def test_concat_datetime_timezone(self):
         # GH 18523
         idx1 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")

From 73d9de2b1795bf2a89b76a07acda8be6e5878e37 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Feb 2021 13:57:14 +0100
Subject: [PATCH 02/21] fix mypy

---
 pandas/core/internals/concat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 51732505c1bc0..efdbd0a739c4a 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -71,7 +71,8 @@ def concatenate_array_managers(
         ]
         return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False)
     # concatting along the columns -> combine reindexed arrays in a single manager
-    elif concat_axis == 0:
+    else:
+        assert concat_axis == 0
         arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
         return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False)
 

From 272d67409a8e3126e0a1df9de928a1a733f0b807 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Feb 2021 14:27:45 +0100
Subject: [PATCH 03/21] pass through allow dups

---
 .github/workflows/ci.yml                      | 2 +-
 pandas/core/internals/concat.py               | 7 ++++++-
 pandas/tests/frame/methods/test_explode.py    | 5 -----
 pandas/tests/frame/methods/test_join.py       | 3 ---
 pandas/tests/reshape/merge/test_join.py       | 3 +++
 pandas/tests/reshape/merge/test_merge.py      | 8 +++++++-
 pandas/tests/reshape/test_crosstab.py         | 4 ++++
 pandas/tests/reshape/test_pivot.py            | 4 ++++
 pandas/tests/reshape/test_pivot_multilevel.py | 7 +++++--
 9 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 341ed8ef75cdd..a4a0d3b779565 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -157,4 +157,4 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
-        pytest pandas/tests/reshape/concat/ --array-manager
+        pytest pandas/tests/reshape --array-manager
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index efdbd0a739c4a..793bc9bda30fd 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -59,7 +59,12 @@ def concatenate_array_managers(
     for mgr, indexers in mgrs_indexers:
         for ax, indexer in indexers.items():
             mgr = mgr.reindex_indexer(
-                axes[ax], indexer, axis=ax, do_integrity_check=False, use_na_proxy=True
+                axes[ax],
+                indexer,
+                axis=ax,
+                allow_dups=True,
+                do_integrity_check=False,
+                use_na_proxy=True,
             )
         mgrs.append(mgr)
 
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
index c5a4fcc3cc3c2..bd0901387eeed 100644
--- a/pandas/tests/frame/methods/test_explode.py
+++ b/pandas/tests/frame/methods/test_explode.py
@@ -1,14 +1,9 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 import pandas._testing as tm
 
-# TODO(ArrayManager) concat reindex with duplicates
-pytestmark = td.skip_array_manager_not_yet_implemented
-
 
 def test_error():
     df = pd.DataFrame(
diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py
index 20658a3dcf0b2..2bbc73da24033 100644
--- a/pandas/tests/frame/methods/test_join.py
+++ b/pandas/tests/frame/methods/test_join.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, date_range, period_range
 import pandas._testing as tm
@@ -180,7 +178,6 @@ def test_join_period_index(frame_with_period_index):
     tm.assert_frame_equal(joined, expected)
 
 
-@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat with duplicates
 def test_join_left_sequence_non_unique_index():
     # https://github.com/pandas-dev/pandas/issues/19607
     df1 = DataFrame({"a": [0, 10, 20]}, index=[1, 2, 3])
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 500d7000817af..242fc8cade9e6 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -547,6 +549,7 @@ def test_join_non_unique_period_index(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
     def test_mixed_type_join_with_suffix(self):
         # GH #916
         df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"])
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index da3ac81c4aa17..5d3f5046fdbf6 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -277,6 +279,7 @@ def test_merge_copy(self):
         merged["d"] = "peekaboo"
         assert (right["d"] == "bar").all()
 
+    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) join copy behaviour
     def test_merge_nocopy(self):
         left = DataFrame({"a": 0, "b": 1}, index=range(10))
         right = DataFrame({"c": "foo", "d": "bar"}, index=range(10))
@@ -656,7 +659,7 @@ def _constructor(self):
 
         assert isinstance(result, NotADataFrame)
 
-    def test_join_append_timedeltas(self):
+    def test_join_append_timedeltas(self, using_array_manager):
         # timedelta64 issues with join/merge
         # GH 5695
 
@@ -670,6 +673,8 @@ def test_join_append_timedeltas(self):
                 "t": [timedelta(0, 22500), timedelta(0, 22500)],
             }
         )
+        if using_array_manager:
+            expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         td = np.timedelta64(300000000)
@@ -1362,6 +1367,7 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self):
         expected = expected.reindex(columns=["a", "key", "b"])
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) rewrite test
     def test_merge_readonly(self):
         # https://github.com/pandas-dev/pandas/issues/27943
         data1 = DataFrame(
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 6faf64789c687..10aebdccc04a5 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -1,11 +1,15 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.common import is_categorical_dtype
 
 from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, Series, crosstab
 import pandas._testing as tm
 
+pytestmark = td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
+
 
 class TestCrosstab:
     def setup_method(self, method):
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index f9b2a02920841..4d67dd7813ce7 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -19,6 +21,8 @@
 from pandas.api.types import CategoricalDtype as CDT
 from pandas.core.reshape.pivot import pivot_table
 
+pytestmark = td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
+
 
 @pytest.fixture(params=[True, False])
 def dropna(request):
diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py
index f59a469c05d15..a2e59e04a6eb8 100644
--- a/pandas/tests/reshape/test_pivot_multilevel.py
+++ b/pandas/tests/reshape/test_pivot_multilevel.py
@@ -192,7 +192,7 @@ def test_pivot_list_like_columns(
     tm.assert_frame_equal(result, expected)
 
 
-def test_pivot_multiindexed_rows_and_cols():
+def test_pivot_multiindexed_rows_and_cols(using_array_manager):
     # GH 36360
 
     df = pd.DataFrame(
@@ -214,11 +214,14 @@ def test_pivot_multiindexed_rows_and_cols():
     )
 
     expected = pd.DataFrame(
-        data=[[5.0, np.nan], [10.0, 7.0]],
+        data=[[5, np.nan], [10, 7.0]],
         columns=MultiIndex.from_tuples(
             [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"]
         ),
         index=Int64Index([0, 1], dtype="int64", name="idx_L0"),
     )
+    if not using_array_manager:
+        # BlockManager does not preserve the dtypes
+        expected = expected.astype("float64")
 
     tm.assert_frame_equal(res, expected)

From 555d7ac16296966f6ea0d5425e42df737f76cd26 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Feb 2021 16:43:12 +0100
Subject: [PATCH 04/21] simplify _array_from_proxy

---
 pandas/core/dtypes/concat.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 1afc97b0adc78..ed7763e844c9e 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -5,21 +5,20 @@
 
 import numpy as np
 
-from pandas._libs import NaT, lib
+from pandas._libs import lib
 from pandas._typing import ArrayLike, DtypeObj
 
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_categorical_dtype,
-    is_datetime64_ns_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
     is_integer_dtype,
     is_sparse,
-    is_timedelta64_ns_dtype,
 )
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries
+from pandas.core.dtypes.missing import na_value_for_dtype
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
@@ -67,27 +66,18 @@ def _array_from_proxy(arr, dtype: DtypeObj, fill_value=lib.no_default):
         return dtype.construct_array_type()._from_sequence(
             [dtype.na_value] * arr.n, dtype=dtype
         )
-    elif is_datetime64_ns_dtype(dtype):
-        from pandas.core.arrays import DatetimeArray
-
-        return DatetimeArray._from_sequence([NaT] * arr.n, dtype=dtype)
-    elif is_timedelta64_ns_dtype(dtype):
-        from pandas.core.arrays import TimedeltaArray
-
-        return TimedeltaArray._from_sequence([NaT] * arr.n, dtype=dtype)
     else:
         if is_integer_dtype(dtype):
-            dtype = "float64"
-            fill_value = np.nan
+            dtype = np.dtype("float64")
         elif is_bool_dtype(dtype):
-            dtype = object
+            dtype = np.dtype(object)
 
         if fill_value is lib.no_default:
-            fill_value = np.nan
+            fill_value = na_value_for_dtype(dtype)
 
         arr = np.empty(arr.n, dtype=dtype)
         arr.fill(fill_value)
-        return arr
+        return ensure_wrapped_if_datetimelike(arr)
 
 
 def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:

From 19c7f751b0c4d93d81f07752698c049470cc0b64 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 9 Feb 2021 15:02:54 +0100
Subject: [PATCH 05/21] fix creation empty + turn into method

---
 pandas/core/dtypes/concat.py | 77 ++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index ed7763e844c9e..632bc0c34d78b 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -45,39 +45,40 @@ def __init__(self, n: int):
     def shape(self):
         return (self.n,)
 
+    def to_array(self, dtype: DtypeObj, fill_value=lib.no_default) -> ArrayLike:
+        """
+        Helper function to create the actual all-NA array from the NullArrayProxy
+        object.
+
+        Parameters
+        ----------
+        arr : NullArrayProxy
+        dtype : the dtype for the resulting array
+        fill_value : scalar NA-like value
+            By default uses the ExtensionDtype's na_value or np.nan. For numpy
+            arrays, this can be overridden to be something else (eg None).
+
+        Returns
+        -------
+        np.ndarray or ExtensionArray
+        """
+        if is_extension_array_dtype(dtype):
+            empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
+            indexer = -np.ones(self.n, dtype=np.intp)
+            return empty.take(indexer, allow_fill=True)
+        else:
+            # when introducing missing values, int becomes float, bool becomes object
+            if is_integer_dtype(dtype):
+                dtype = np.dtype("float64")
+            elif is_bool_dtype(dtype):
+                dtype = np.dtype(object)
 
-def _array_from_proxy(arr, dtype: DtypeObj, fill_value=lib.no_default):
-    """
-    Helper function to create the actual all-NA array from the NullArrayProxy object.
-
-    Parameters
-    ----------
-    arr : NullArrayProxy
-    dtype : the dtype for the resulting array
-    fill_value : scalar NA-like value
-        By default uses the ExtensionDtype's na_value or np.nan. For numpy
-        arrays, this can be overridden to be something else (eg None).
-
-    Returns
-    -------
-    np.ndarray or ExtensionArray
-    """
-    if is_extension_array_dtype(dtype):
-        return dtype.construct_array_type()._from_sequence(
-            [dtype.na_value] * arr.n, dtype=dtype
-        )
-    else:
-        if is_integer_dtype(dtype):
-            dtype = np.dtype("float64")
-        elif is_bool_dtype(dtype):
-            dtype = np.dtype(object)
-
-        if fill_value is lib.no_default:
-            fill_value = na_value_for_dtype(dtype)
+            if fill_value is lib.no_default:
+                fill_value = na_value_for_dtype(dtype)
 
-        arr = np.empty(arr.n, dtype=dtype)
-        arr.fill(fill_value)
-        return ensure_wrapped_if_datetimelike(arr)
+            arr = np.empty(self.n, dtype=dtype)
+            arr.fill(fill_value)
+            return ensure_wrapped_if_datetimelike(arr)
 
 
 def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
@@ -86,7 +87,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     cases.
     """
     if isinstance(arr, NullArrayProxy):
-        return _array_from_proxy(arr, dtype)
+        return arr.to_array(dtype)
 
     if (
         is_categorical_dtype(arr.dtype)
@@ -194,7 +195,7 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_arrays(to_concat):
+def concat_arrays(to_concat) -> ArrayLike:
     """
     Alternative for concat_compat but specialized for use in the ArrayManager.
 
@@ -225,9 +226,7 @@ def concat_arrays(to_concat):
         else:
             target_dtype = to_concat_no_proxy[0].dtype
             to_concat = [
-                _array_from_proxy(arr, target_dtype)
-                if isinstance(arr, NullArrayProxy)
-                else arr
+                arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
                 for arr in to_concat
             ]
 
@@ -247,7 +246,7 @@ def concat_arrays(to_concat):
     else:
         target_dtype = to_concat_no_proxy[0].dtype
     to_concat = [
-        _array_from_proxy(arr, target_dtype) if isinstance(arr, NullArrayProxy) else arr
+        arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
         for arr in to_concat
     ]
 
@@ -462,7 +461,7 @@ def _concat_datetime(to_concat, axis=0):
         # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
         #  in Timestamp/Timedelta
         to_concat = [
-            _array_from_proxy(arr, dtype=object, fill_value=None)
+            arr.to_array(object, fill_value=None)
             if isinstance(arr, NullArrayProxy)
             else arr
             for arr in to_concat
@@ -475,7 +474,7 @@ def _concat_datetime(to_concat, axis=0):
         to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
     else:
         to_concat = [
-            _array_from_proxy(arr, dtype=to_concat_no_proxy[0].dtype)
+            arr.to_array(to_concat_no_proxy[0].dtype)
             if isinstance(arr, NullArrayProxy)
             else arr
             for arr in to_concat

From 42e1b059da543737815b21ef8a2c99b755ea0b1f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 10 Feb 2021 08:30:46 +0100
Subject: [PATCH 06/21] remove overriding of fill_value

---
 pandas/core/dtypes/concat.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 632bc0c34d78b..ed0fb67380eca 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 
-from pandas._libs import lib
 from pandas._typing import ArrayLike, DtypeObj
 
 from pandas.core.dtypes.cast import find_common_type
@@ -45,7 +44,7 @@ def __init__(self, n: int):
     def shape(self):
         return (self.n,)
 
-    def to_array(self, dtype: DtypeObj, fill_value=lib.no_default) -> ArrayLike:
+    def to_array(self, dtype: DtypeObj) -> ArrayLike:
         """
         Helper function to create the actual all-NA array from the NullArrayProxy
         object.
@@ -54,9 +53,6 @@ def to_array(self, dtype: DtypeObj, fill_value=lib.no_default) -> ArrayLike:
         ----------
         arr : NullArrayProxy
         dtype : the dtype for the resulting array
-        fill_value : scalar NA-like value
-            By default uses the ExtensionDtype's na_value or np.nan. For numpy
-            arrays, this can be overridden to be something else (eg None).
 
         Returns
         -------
@@ -73,9 +69,7 @@ def to_array(self, dtype: DtypeObj, fill_value=lib.no_default) -> ArrayLike:
             elif is_bool_dtype(dtype):
                 dtype = np.dtype(object)
 
-            if fill_value is lib.no_default:
-                fill_value = na_value_for_dtype(dtype)
-
+            fill_value = na_value_for_dtype(dtype)
             arr = np.empty(self.n, dtype=dtype)
             arr.fill(fill_value)
             return ensure_wrapped_if_datetimelike(arr)
@@ -461,9 +455,7 @@ def _concat_datetime(to_concat, axis=0):
         # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
         #  in Timestamp/Timedelta
         to_concat = [
-            arr.to_array(object, fill_value=None)
-            if isinstance(arr, NullArrayProxy)
-            else arr
+            arr.to_array(object) if isinstance(arr, NullArrayProxy) else arr
             for arr in to_concat
         ]
 

From a2aa388c7bab7b6925fffa145fc1cfd6cb55db17 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 12 Feb 2021 14:10:17 +0100
Subject: [PATCH 07/21] use ensure_dtype_can_hold_na

---
 pandas/core/dtypes/concat.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index d191d905c3071..35587c5496f21 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -7,13 +7,11 @@
 
 from pandas._typing import ArrayLike, DtypeObj
 
-from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.cast import ensure_dtype_can_hold_na, find_common_type
 from pandas.core.dtypes.common import (
-    is_bool_dtype,
     is_categorical_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
-    is_integer_dtype,
     is_sparse,
 )
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries
@@ -64,11 +62,7 @@ def to_array(self, dtype: DtypeObj) -> ArrayLike:
             return empty.take(indexer, allow_fill=True)
         else:
             # when introducing missing values, int becomes float, bool becomes object
-            if is_integer_dtype(dtype):
-                dtype = np.dtype("float64")
-            elif is_bool_dtype(dtype):
-                dtype = np.dtype(object)
-
+            dtype = ensure_dtype_can_hold_na(dtype)
             fill_value = na_value_for_dtype(dtype)
             arr = np.empty(self.n, dtype=dtype)
             arr.fill(fill_value)

From 6bdd1754ac9608305fb330d54556a118ba746f0c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 12 Feb 2021 15:27:12 +0100
Subject: [PATCH 08/21] add type annotation

---
 pandas/core/dtypes/concat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 35587c5496f21..cbc7db4aa6574 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -1,7 +1,7 @@
 """
 Utility functions related to concat.
 """
-from typing import cast
+from typing import Any, List, cast
 
 import numpy as np
 
@@ -183,7 +183,7 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_arrays(to_concat) -> ArrayLike:
+def concat_arrays(to_concat: List[Any]) -> ArrayLike:
     """
     Alternative for concat_compat but specialized for use in the ArrayManager.
 

From cab90f6b8fa3bc905fcb02157a5e92e40429fcd4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 15 Feb 2021 11:10:54 +0100
Subject: [PATCH 09/21] address review

---
 pandas/core/dtypes/concat.py              |  2 --
 pandas/core/internals/array_manager.py    | 11 +++++++----
 pandas/tests/frame/methods/test_append.py |  1 +
 pandas/tests/reshape/merge/test_merge.py  | 23 +++++++++++++++++------
 4 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 767cb5fcf78fc..89637f9da112e 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -244,8 +244,6 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
 
     result = np.concatenate(to_concat)
 
-    # TODO(ArrayManager) this is currently inconsistent between Series and DataFrame
-    # so we should decide whether to keep the below special case or remove it
     if len(result) == 0:
         # all empties -> check for bool to not coerce to float
         if len(kinds) != 1:
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 06b752726ef12..ac391f8750e0e 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -727,8 +727,8 @@ def reindex_indexer(
         consolidate: bool = True,
         only_slice: bool = False,
         # ArrayManager specific keywords
-        do_integrity_check=True,
-        use_na_proxy=False,
+        do_integrity_check: bool = True,
+        use_na_proxy: bool = False,
     ) -> T:
         axis = self._normalize_axis(axis)
         return self._reindex_indexer(
@@ -750,8 +750,8 @@ def _reindex_indexer(
         fill_value=None,
         allow_dups: bool = False,
         copy: bool = True,
-        do_integrity_check=True,
-        use_na_proxy=False,
+        do_integrity_check: bool = True,
+        use_na_proxy: bool = False,
     ) -> T:
         """
         Parameters
@@ -837,9 +837,12 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
 
     def _make_na_array(self, fill_value=None, use_na_proxy=False):
         if use_na_proxy:
+            assert fill_value is None
             return NullArrayProxy(self.shape_proper[0])
+
         if fill_value is None:
             fill_value = np.nan
+
         dtype, fill_value = infer_dtype_from_scalar(fill_value)
         values = np.empty(self.shape_proper[0], dtype=dtype)
         values.fill(fill_value)
diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
index 5d94bc63a5365..598366923aa80 100644
--- a/pandas/tests/frame/methods/test_append.py
+++ b/pandas/tests/frame/methods/test_append.py
@@ -159,6 +159,7 @@ def test_append_dtypes(self, using_array_manager):
             {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
         )
         if using_array_manager:
+            # TODO(ArrayManager) decide on exact casting rules in concat
             # With ArrayManager, all-NaN float is not ignored
             expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 5d3f5046fdbf6..b3e6b64e3b4a2 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -279,18 +279,28 @@ def test_merge_copy(self):
         merged["d"] = "peekaboo"
         assert (right["d"] == "bar").all()
 
-    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) join copy behaviour
-    def test_merge_nocopy(self):
+    def test_merge_nocopy(self, using_array_manager):
         left = DataFrame({"a": 0, "b": 1}, index=range(10))
         right = DataFrame({"c": "foo", "d": "bar"}, index=range(10))
 
         merged = merge(left, right, left_index=True, right_index=True, copy=False)
 
-        merged["a"] = 6
-        assert (left["a"] == 6).all()
+        if using_array_manager:
+            # With ArrayManager, setting a column doesn't change the values inplace
+            # and thus does not propagate the changes to the original left/right
+            # dataframes -> need to check that no copy was made in a different way
+            # TODO(ArrayManager) we should be able to simplify this with a .loc
+            #  setitem test: merged.loc[0, "a"] = 10; assert left.loc[0, "a"] == 10
+            #  but this currently replaces the array (_setitem_with_indexer_split_path)
+            merged.loc[0, "a"] = 10
+            assert merged._mgr.arrays[0] is left._mgr.arrays[0]
+            assert merged._mgr.arrays[2] is right._mgr.arrays[0]
+        else:
+            merged["a"] = 6
+            assert (left["a"] == 6).all()
 
-        merged["d"] = "peekaboo"
-        assert (right["d"] == "peekaboo").all()
+            merged["d"] = "peekaboo"
+            assert (right["d"] == "peekaboo").all()
 
     def test_intelligently_handle_join_key(self):
         # #733, be a bit more 1337 about not returning unconsolidated DataFrame
@@ -674,6 +684,7 @@ def test_join_append_timedeltas(self, using_array_manager):
             }
         )
         if using_array_manager:
+            # TODO(ArrayManager) decide on exact casting rules in concat
             expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 

From c22a0102ab2d7e1fd333d7f00b77b5e916bec345 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 15 Feb 2021 11:40:58 +0100
Subject: [PATCH 10/21] update comment

---
 pandas/core/dtypes/concat.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 89637f9da112e..1f334293ac6a4 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -244,6 +244,8 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
 
     result = np.concatenate(to_concat)
 
+    # TODO decide on exact behaviour (we shouldn't do this only for empty result)
+    # see https://github.com/pandas-dev/pandas/issues/39817
     if len(result) == 0:
         # all empties -> check for bool to not coerce to float
         if len(kinds) != 1:

From eec01615a773c7390771f966635dd88b1eb22cdc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 15 Feb 2021 11:41:51 +0100
Subject: [PATCH 11/21] fixup test

---
 pandas/tests/reshape/merge/test_merge.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index b3e6b64e3b4a2..c7a3aed7edce5 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -292,7 +292,6 @@ def test_merge_nocopy(self, using_array_manager):
             # TODO(ArrayManager) we should be able to simplify this with a .loc
             #  setitem test: merged.loc[0, "a"] = 10; assert left.loc[0, "a"] == 10
             #  but this currently replaces the array (_setitem_with_indexer_split_path)
-            merged.loc[0, "a"] = 10
             assert merged._mgr.arrays[0] is left._mgr.arrays[0]
             assert merged._mgr.arrays[2] is right._mgr.arrays[0]
         else:

From 04ead63e832b501978ee0a7a8cf27b961cb1c0de Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Mar 2021 10:24:29 +0100
Subject: [PATCH 12/21] update/remove skips

---
 pandas/tests/reshape/concat/test_datetimes.py | 6 ------
 pandas/tests/reshape/merge/test_join.py       | 3 ---
 pandas/tests/reshape/merge/test_merge.py      | 3 ---
 pandas/tests/reshape/test_crosstab.py         | 9 ++++-----
 pandas/tests/reshape/test_pivot.py            | 9 ++++-----
 5 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index d0653de964556..2b8233388d328 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -34,9 +32,6 @@ def test_concat_datetime64_block(self):
         assert (result.iloc[:10]["time"] == rng).all()
         assert (result.iloc[10:]["time"] == rng).all()
 
-    # TODO(ArrayManager) concat with mixed managers
-    # (or, fix DataFrame.from_records to honor option)
-    @td.skip_array_manager_not_yet_implemented
     def test_concat_datetime_datetime64_frame(self):
         # GH#2624
         rows = []
@@ -51,7 +46,6 @@ def test_concat_datetime_datetime64_frame(self):
         # it works!
         concat([df1, df2_obj])
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
     def test_concat_datetime_timezone(self):
         # GH 18523
         idx1 = date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index d31930aa233cd..fb161e38c7155 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -553,7 +551,6 @@ def test_join_non_unique_period_index(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
     def test_mixed_type_join_with_suffix(self):
         # GH #916
         df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"])
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 4ea5fc6af1c91..9699a0dec4891 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_object_dtype,
@@ -1387,7 +1385,6 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self):
         expected = expected.reindex(columns=["a", "key", "b"])
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) rewrite test
     def test_merge_readonly(self):
         # https://github.com/pandas-dev/pandas/issues/27943
         data1 = DataFrame(
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index dfc336ffb907e..44299d51a878f 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import is_categorical_dtype
 
 from pandas import (
@@ -15,8 +13,6 @@
 )
 import pandas._testing as tm
 
-pytestmark = td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
-
 
 class TestCrosstab:
     def setup_method(self, method):
@@ -442,7 +438,7 @@ def test_crosstab_normalize_arrays(self):
         )
         tm.assert_frame_equal(test_case, norm_sum)
 
-    def test_crosstab_with_empties(self):
+    def test_crosstab_with_empties(self, using_array_manager):
         # Check handling of empties
         df = DataFrame(
             {
@@ -467,6 +463,9 @@ def test_crosstab_with_empties(self):
             index=Index([1, 2], name="a", dtype="int64"),
             columns=Index([3, 4], name="b"),
         )
+        if using_array_manager:
+            # INFO(ArrayManager) column without NaNs can preserve int dtype
+            nans[3] = nans[3].astype("int64")
 
         calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False)
         tm.assert_frame_equal(nans, calculated)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 04e7db127307d..20aa0c9e2ee9a 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -25,8 +23,6 @@
 from pandas.api.types import CategoricalDtype as CDT
 from pandas.core.reshape.pivot import pivot_table
 
-pytestmark = td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) groupby
-
 
 @pytest.fixture(params=[True, False])
 def dropna(request):
@@ -1201,7 +1197,7 @@ def test_pivot_table_with_margins_set_margin_name(self, margin_name):
                 margins_name=margin_name,
             )
 
-    def test_pivot_timegrouper(self):
+    def test_pivot_timegrouper(self, using_array_manager):
         df = DataFrame(
             {
                 "Branch": "A A A A A A A B".split(),
@@ -1255,6 +1251,9 @@ def test_pivot_timegrouper(self):
         )
         expected.index.name = "Date"
         expected.columns.name = "Buyer"
+        if using_array_manager:
+            # INFO(ArrayManager) column without NaNs can preserve int dtype
+            expected["Carl"] = expected["Carl"].astype("int64")
 
         result = pivot_table(
             df,

From 427b6f4cbea8568f1c7b955102df3c5f7208a78a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Mar 2021 11:07:56 +0100
Subject: [PATCH 13/21] move logic into internals

---
 pandas/core/dtypes/concat.py           | 151 +------------------------
 pandas/core/internals/array_manager.py |  51 ++++++++-
 pandas/core/internals/concat.py        |  95 +++++++++++++++-
 3 files changed, 149 insertions(+), 148 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 8a9ce71192046..f51f8faae5604 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -1,11 +1,7 @@
 """
 Utility functions related to concat.
 """
-from typing import (
-    Any,
-    List,
-    cast,
-)
+from typing import cast
 
 import numpy as np
 
@@ -14,14 +10,10 @@
     DtypeObj,
 )
 
-from pandas.core.dtypes.cast import (
-    ensure_dtype_can_hold_na,
-    find_common_type,
-)
+from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_dtype_equal,
-    is_extension_array_dtype,
     is_sparse,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -29,7 +21,6 @@
     ABCCategoricalIndex,
     ABCSeries,
 )
-from pandas.core.dtypes.missing import na_value_for_dtype
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
@@ -39,61 +30,11 @@
 )
 
 
-class NullArrayProxy:
-    """
-    Proxy object for an all-NA array.
-
-    Only stores the length of the array, and not the dtype. The dtype
-    will only be known when actually concatenating (after determining the
-    common dtype, for which this proxy is ignored).
-    Using this object avoids that the internals/concat.py needs to determine
-    the proper dtype and array type.
-    """
-
-    ndim = 1
-
-    def __init__(self, n: int):
-        self.n = n
-
-    @property
-    def shape(self):
-        return (self.n,)
-
-    def to_array(self, dtype: DtypeObj) -> ArrayLike:
-        """
-        Helper function to create the actual all-NA array from the NullArrayProxy
-        object.
-
-        Parameters
-        ----------
-        arr : NullArrayProxy
-        dtype : the dtype for the resulting array
-
-        Returns
-        -------
-        np.ndarray or ExtensionArray
-        """
-        if is_extension_array_dtype(dtype):
-            empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
-            indexer = -np.ones(self.n, dtype=np.intp)
-            return empty.take(indexer, allow_fill=True)
-        else:
-            # when introducing missing values, int becomes float, bool becomes object
-            dtype = ensure_dtype_can_hold_na(dtype)
-            fill_value = na_value_for_dtype(dtype)
-            arr = np.empty(self.n, dtype=dtype)
-            arr.fill(fill_value)
-            return ensure_wrapped_if_datetimelike(arr)
-
-
-def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
+def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     """
     Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
-    if isinstance(arr, NullArrayProxy):
-        return arr.to_array(dtype)
-
     if (
         is_categorical_dtype(arr.dtype)
         and isinstance(dtype, np.dtype)
@@ -180,7 +121,7 @@ def is_nonempty(x) -> bool:
         # for axis=0
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat])
-            to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
+            to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
             cls = type(to_concat[0])
@@ -207,73 +148,6 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_arrays(to_concat: List[Any]) -> ArrayLike:
-    """
-    Alternative for concat_compat but specialized for use in the ArrayManager.
-
-    Differences: only deals with 1D arrays (no axis keyword) and does not skip
-    empty arrays to determine the dtype.
-    In addition ensures that all NullArrayProxies get replaced with actual
-    arrays.
-
-    Parameters
-    ----------
-    to_concat : list of arrays
-
-    Returns
-    -------
-    np.ndarray or ExtensionArray
-    """
-    # ignore the all-NA proxies to determine the resulting dtype
-    to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
-
-    kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
-    single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
-    any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat_no_proxy)
-
-    if any_ea:
-        if not single_dtype:
-            target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
-            to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
-        else:
-            target_dtype = to_concat_no_proxy[0].dtype
-            to_concat = [
-                arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
-                for arr in to_concat
-            ]
-
-        if isinstance(to_concat[0], ExtensionArray):
-            cls = type(to_concat[0])
-            return cls._concat_same_type(to_concat)
-        else:
-            return np.concatenate(to_concat)
-
-    elif any(kind in ["m", "M"] for kind in kinds):
-        return _concat_datetime(to_concat)
-
-    if not single_dtype:
-        target_dtype = np.find_common_type(
-            [arr.dtype for arr in to_concat_no_proxy], []
-        )
-    else:
-        target_dtype = to_concat_no_proxy[0].dtype
-    to_concat = [
-        arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
-        for arr in to_concat
-    ]
-
-    result = np.concatenate(to_concat)
-
-    # TODO decide on exact behaviour (we shouldn't do this only for empty result)
-    # see https://github.com/pandas-dev/pandas/issues/39817
-    if len(result) == 0:
-        # all empties -> check for bool to not coerce to float
-        if len(kinds) != 1:
-            if "b" in kinds:
-                result = result.astype(object)
-    return result
-
-
 def union_categoricals(
     to_union, sort_categories: bool = False, ignore_order: bool = False
 ):
@@ -464,33 +338,20 @@ def _concat_datetime(to_concat, axis=0):
     a single array, preserving the combined dtypes
     """
     to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
-    to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
 
-    single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
+    single_dtype = len({x.dtype for x in to_concat}) == 1
 
     # multiple types, need to coerce to object
     if not single_dtype:
         # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
         #  in Timestamp/Timedelta
-        to_concat = [
-            arr.to_array(object) if isinstance(arr, NullArrayProxy) else arr
-            for arr in to_concat
-        ]
-
         return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
 
     if axis == 1:
         # TODO(EA2D): kludge not necessary with 2D EAs
         to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
-    else:
-        to_concat = [
-            arr.to_array(to_concat_no_proxy[0].dtype)
-            if isinstance(arr, NullArrayProxy)
-            else arr
-            for arr in to_concat
-        ]
 
-    result = type(to_concat_no_proxy[0])._concat_same_type(to_concat, axis=axis)
+    result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
 
     if result.ndim == 2 and isinstance(result.dtype, ExtensionDtype):
         # TODO(EA2D): kludge not necessary with 2D EAs
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 9dc549eb25ed1..cc1526b78ec9f 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -22,12 +22,14 @@
 )
 from pandas._typing import (
     ArrayLike,
+    DtypeObj,
     Hashable,
 )
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
     astype_array_safe,
+    ensure_dtype_can_hold_na,
     infer_dtype_from_scalar,
     soft_convert_objects,
 )
@@ -41,7 +43,6 @@
     is_object_dtype,
     is_timedelta64_ns_dtype,
 )
-from pandas.core.dtypes.concat import NullArrayProxy
 from pandas.core.dtypes.dtypes import (
     ExtensionDtype,
     PandasDtype,
@@ -54,6 +55,7 @@
 from pandas.core.dtypes.missing import (
     array_equals,
     isna,
+    na_value_for_dtype,
 )
 
 import pandas.core.algorithms as algos
@@ -1299,3 +1301,50 @@ def set_values(self, values: ArrayLike):
         valid for the current SingleArrayManager (length, dtype, etc).
         """
         self.arrays[0] = values
+
+
+class NullArrayProxy:
+    """
+    Proxy object for an all-NA array.
+
+    Only stores the length of the array, and not the dtype. The dtype
+    will only be known when actually concatenating (after determining the
+    common dtype, for which this proxy is ignored).
+    Using this object avoids that the internals/concat.py needs to determine
+    the proper dtype and array type.
+    """
+
+    ndim = 1
+
+    def __init__(self, n: int):
+        self.n = n
+
+    @property
+    def shape(self):
+        return (self.n,)
+
+    def to_array(self, dtype: DtypeObj) -> ArrayLike:
+        """
+        Helper function to create the actual all-NA array from the NullArrayProxy
+        object.
+
+        Parameters
+        ----------
+        arr : NullArrayProxy
+        dtype : the dtype for the resulting array
+
+        Returns
+        -------
+        np.ndarray or ExtensionArray
+        """
+        if is_extension_array_dtype(dtype):
+            empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
+            indexer = -np.ones(self.n, dtype=np.intp)
+            return empty.take(indexer, allow_fill=True)
+        else:
+            # when introducing missing values, int becomes float, bool becomes object
+            dtype = ensure_dtype_can_hold_na(dtype)
+            fill_value = na_value_for_dtype(dtype)
+            arr = np.empty(self.n, dtype=dtype)
+            arr.fill(fill_value)
+            return ensure_wrapped_if_datetimelike(arr)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 11446264ce747..ecec4d04942fb 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -4,6 +4,7 @@
 import itertools
 from typing import (
     TYPE_CHECKING,
+    Any,
     Dict,
     List,
     Sequence,
@@ -31,7 +32,7 @@
     is_sparse,
 )
 from pandas.core.dtypes.concat import (
-    concat_arrays,
+    cast_to_common_type,
     concat_compat,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -45,7 +46,10 @@
     DatetimeArray,
     ExtensionArray,
 )
-from pandas.core.internals.array_manager import ArrayManager
+from pandas.core.internals.array_manager import (
+    ArrayManager,
+    NullArrayProxy,
+)
 from pandas.core.internals.blocks import (
     ensure_block_shape,
     new_block,
@@ -97,6 +101,93 @@ def _concatenate_array_managers(
         return ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
 
 
+def concat_arrays(to_concat: List[Any]) -> ArrayLike:
+    """
+    Alternative for concat_compat but specialized for use in the ArrayManager.
+
+    Differences: only deals with 1D arrays (no axis keyword), assumes
+    ensure_wrapped_if_datetimelike and does not skip empty arrays to determine
+    the dtype.
+    In addition ensures that all NullArrayProxies get replaced with actual
+    arrays.
+
+    Parameters
+    ----------
+    to_concat : list of arrays
+
+    Returns
+    -------
+    np.ndarray or ExtensionArray
+    """
+    # ignore the all-NA proxies to determine the resulting dtype
+    to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
+
+    kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
+    single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
+    any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat_no_proxy)
+
+    if any_ea:
+        if not single_dtype:
+            target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
+            to_concat = [
+                arr.to_array(target_dtype)
+                if isinstance(arr, NullArrayProxy)
+                else cast_to_common_type(arr, target_dtype)
+                for arr in to_concat
+            ]
+        else:
+            target_dtype = to_concat_no_proxy[0].dtype
+            to_concat = [
+                arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
+                for arr in to_concat
+            ]
+
+        if isinstance(to_concat[0], ExtensionArray):
+            cls = type(to_concat[0])
+            return cls._concat_same_type(to_concat)
+        else:
+            return np.concatenate(to_concat)
+
+    if not single_dtype:
+        if any(kind in ["m", "M"] for kind in kinds):
+            # multiple types, need to coerce to object
+            target_dtype = np.dtype(object)
+        else:
+            target_dtype = np.find_common_type(
+                [arr.dtype for arr in to_concat_no_proxy], []
+            )
+    else:
+        target_dtype = to_concat_no_proxy[0].dtype
+
+    if target_dtype.kind in ["m", "M"]:
+        # for datetimelike use DatetimeArray/TimedeltaArray concatenation
+        # don't use arr.astype(target_dtype, copy=False), because that doesn't
+        # work for DatetimeArray/TimedeltaArray (returns ndarray)
+        to_concat = [
+            arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
+            for arr in to_concat
+        ]
+        return type(to_concat_no_proxy[0])._concat_same_type(to_concat, axis=0)
+
+    to_concat = [
+        arr.to_array(target_dtype)
+        if isinstance(arr, NullArrayProxy)
+        else arr.astype(target_dtype, copy=False)
+        for arr in to_concat
+    ]
+
+    result = np.concatenate(to_concat)
+
+    # TODO decide on exact behaviour (we shouldn't do this only for empty result)
+    # see https://github.com/pandas-dev/pandas/issues/39817
+    if len(result) == 0:
+        # all empties -> check for bool to not coerce to float
+        if len(kinds) != 1:
+            if "b" in kinds:
+                result = result.astype(object)
+    return result
+
+
 def concatenate_managers(
     mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool
 ) -> Manager:

From 8c10a53ebc0c2f1ca8ca9c0c2eeb944e1c6df963 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Mar 2021 15:39:12 +0100
Subject: [PATCH 14/21] fix typing

---
 pandas/core/internals/array_manager.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index cc1526b78ec9f..581a7cfc861c1 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -12,6 +12,7 @@
     Tuple,
     TypeVar,
     Union,
+    cast,
 )
 
 import numpy as np
@@ -1338,10 +1339,12 @@ def to_array(self, dtype: DtypeObj) -> ArrayLike:
         np.ndarray or ExtensionArray
         """
         if is_extension_array_dtype(dtype):
+            dtype = cast(ExtensionDtype, dtype)
             empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
             indexer = -np.ones(self.n, dtype=np.intp)
             return empty.take(indexer, allow_fill=True)
         else:
+            dtype = cast(np.dtype, dtype)
             # when introducing missing values, int becomes float, bool becomes object
             dtype = ensure_dtype_can_hold_na(dtype)
             fill_value = na_value_for_dtype(dtype)

From f0061f79a49710030e3c75944e1b6a65faff7aec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Mar 2021 11:46:37 +0100
Subject: [PATCH 15/21] update type check

---
 pandas/core/internals/array_manager.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 5da319f4c28f2..0081ea061e128 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -1334,8 +1334,7 @@ def to_array(self, dtype: DtypeObj) -> ArrayLike:
         -------
         np.ndarray or ExtensionArray
         """
-        if is_extension_array_dtype(dtype):
-            dtype = cast(ExtensionDtype, dtype)
+        if isinstance(dtype, ExtensionDtype):
             empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
             indexer = -np.ones(self.n, dtype=np.intp)
             return empty.take(indexer, allow_fill=True)

From 9ba88540e788791c205c73d38a1607c0f7eee9c1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Mar 2021 11:58:58 +0100
Subject: [PATCH 16/21] simply casting to_concat + fix skips

---
 pandas/core/dtypes/concat.py             |  2 ++
 pandas/core/internals/concat.py          | 17 +++++++----------
 pandas/tests/extension/base/reshaping.py |  3 ---
 pandas/tests/reshape/concat/__init__.py  |  4 ----
 4 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f51f8faae5604..cfadb3e9f45c5 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -35,6 +35,8 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
+    if is_dtype_equal(arr.dtype, dtype):
+        return arr
     if (
         is_categorical_dtype(arr.dtype)
         and isinstance(dtype, np.dtype)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index ecec4d04942fb..bb590dec84c5c 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -129,18 +129,15 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
     if any_ea:
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
-            to_concat = [
-                arr.to_array(target_dtype)
-                if isinstance(arr, NullArrayProxy)
-                else cast_to_common_type(arr, target_dtype)
-                for arr in to_concat
-            ]
         else:
             target_dtype = to_concat_no_proxy[0].dtype
-            to_concat = [
-                arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
-                for arr in to_concat
-            ]
+
+        to_concat = [
+            arr.to_array(target_dtype)
+            if isinstance(arr, NullArrayProxy)
+            else cast_to_common_type(arr, target_dtype)
+            for arr in to_concat
+        ]
 
         if isinstance(to_concat[0], ExtensionArray):
             cls = type(to_concat[0])
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 5a2d928eea744..de3af31ece7b0 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas.api.extensions import ExtensionArray
 from pandas.core.internals import ExtensionBlock
@@ -111,7 +109,6 @@ def test_concat_extension_arrays_copy_false(self, data, na_value):
         result = pd.concat([df1, df2], axis=1, copy=False)
         self.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat reindex
     def test_concat_with_reindex(self, data):
         # GH-33027
         a = pd.DataFrame({"a": data[:5]})
diff --git a/pandas/tests/reshape/concat/__init__.py b/pandas/tests/reshape/concat/__init__.py
index 777923be02398..e69de29bb2d1d 100644
--- a/pandas/tests/reshape/concat/__init__.py
+++ b/pandas/tests/reshape/concat/__init__.py
@@ -1,4 +0,0 @@
-import pandas.util._test_decorators as td
-
-# TODO(ArrayManager) concat axis=0
-pytestmark = td.skip_array_manager_not_yet_implemented

From ad61f2f84abc66b161879b2e11677c7048dd949b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Mar 2021 12:06:45 +0100
Subject: [PATCH 17/21] further simplify concat_arrays

---
 pandas/core/internals/concat.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index bb590dec84c5c..b7e86245c655b 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -131,21 +131,7 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
             target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
         else:
             target_dtype = to_concat_no_proxy[0].dtype
-
-        to_concat = [
-            arr.to_array(target_dtype)
-            if isinstance(arr, NullArrayProxy)
-            else cast_to_common_type(arr, target_dtype)
-            for arr in to_concat
-        ]
-
-        if isinstance(to_concat[0], ExtensionArray):
-            cls = type(to_concat[0])
-            return cls._concat_same_type(to_concat)
-        else:
-            return np.concatenate(to_concat)
-
-    if not single_dtype:
+    elif not single_dtype:
         if any(kind in ["m", "M"] for kind in kinds):
             # multiple types, need to coerce to object
             target_dtype = np.dtype(object)
@@ -169,15 +155,19 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
     to_concat = [
         arr.to_array(target_dtype)
         if isinstance(arr, NullArrayProxy)
-        else arr.astype(target_dtype, copy=False)
+        else cast_to_common_type(arr, target_dtype)
         for arr in to_concat
     ]
 
+    if isinstance(to_concat[0], ExtensionArray):
+        cls = type(to_concat[0])
+        return cls._concat_same_type(to_concat)
+
     result = np.concatenate(to_concat)
 
     # TODO decide on exact behaviour (we shouldn't do this only for empty result)
     # see https://github.com/pandas-dev/pandas/issues/39817
-    if len(result) == 0:
+    if len(result) == 0 and not any_ea:
         # all empties -> check for bool to not coerce to float
         if len(kinds) != 1:
             if "b" in kinds:

From a3c2662a21097e77c59ebed3a268f9ff27644d43 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Mar 2021 10:25:06 +0100
Subject: [PATCH 18/21] remove redundant cast

---
 pandas/core/internals/array_manager.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index b9dadb392c533..d01028a9d3b8a 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -12,7 +12,6 @@
     Tuple,
     TypeVar,
     Union,
-    cast,
 )
 
 import numpy as np
@@ -1336,7 +1335,6 @@ def to_array(self, dtype: DtypeObj) -> ArrayLike:
             indexer = -np.ones(self.n, dtype=np.intp)
             return empty.take(indexer, allow_fill=True)
         else:
-            dtype = cast(np.dtype, dtype)
             # when introducing missing values, int becomes float, bool becomes object
             dtype = ensure_dtype_can_hold_na(dtype)
             fill_value = na_value_for_dtype(dtype)

From f67e9e24ec096a9fcedad60c28d04ec9381ad5fc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 31 Mar 2021 11:22:09 +0200
Subject: [PATCH 19/21] simplify usage of find_common_type

---
 pandas/core/internals/concat.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 6b9f20d2a5716..5e78990a73a6a 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -123,23 +123,10 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
     # ignore the all-NA proxies to determine the resulting dtype
     to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
 
-    kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
     single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
-    any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat_no_proxy)
 
-    if any_ea:
-        if not single_dtype:
-            target_dtype = find_common_type([x.dtype for x in to_concat_no_proxy])
-        else:
-            target_dtype = to_concat_no_proxy[0].dtype
-    elif not single_dtype:
-        if any(kind in ["m", "M"] for kind in kinds):
-            # multiple types, need to coerce to object
-            target_dtype = np.dtype(object)
-        else:
-            target_dtype = np.find_common_type(
-                [arr.dtype for arr in to_concat_no_proxy], []
-            )
+    if not single_dtype:
+        target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
     else:
         target_dtype = to_concat_no_proxy[0].dtype
 
@@ -168,8 +155,9 @@ def concat_arrays(to_concat: List[Any]) -> ArrayLike:
 
     # TODO decide on exact behaviour (we shouldn't do this only for empty result)
     # see https://github.com/pandas-dev/pandas/issues/39817
-    if len(result) == 0 and not any_ea:
+    if len(result) == 0:
         # all empties -> check for bool to not coerce to float
+        kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
         if len(kinds) != 1:
             if "b" in kinds:
                 result = result.astype(object)

From d21bd3aaccbecd29fa2b794c609cd005c645f44f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 31 Mar 2021 20:29:52 +0200
Subject: [PATCH 20/21] update annotation

---
 pandas/core/internals/concat.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 5e78990a73a6a..ff6332f6c2572 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -4,7 +4,6 @@
 import itertools
 from typing import (
     TYPE_CHECKING,
-    Any,
     Dict,
     List,
     Sequence,
@@ -102,7 +101,7 @@ def _concatenate_array_managers(
         return ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
 
 
-def concat_arrays(to_concat: List[Any]) -> ArrayLike:
+def concat_arrays(to_concat: List) -> ArrayLike:
     """
     Alternative for concat_compat but specialized for use in the ArrayManager.
 

From 77b05f46f26b4dc1907dcf7273bfbbbacf7cb4a0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 7 Apr 2021 15:49:03 +0200
Subject: [PATCH 21/21] fixup typing

---
 pandas/core/internals/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index e2cc074d0052a..687c8768fb251 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -100,7 +100,7 @@ def _concatenate_array_managers(
         return ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
 
 
-def concat_arrays(to_concat: List) -> ArrayLike:
+def concat_arrays(to_concat: list) -> ArrayLike:
     """
     Alternative for concat_compat but specialized for use in the ArrayManager.