From e130060b381ee836f47b06d21bcc6101a86814fc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 May 2020 09:00:32 +0200
Subject: [PATCH 1/8] BUG: Fix concat of frames with extension types (no
 reindexed columns)

---
 pandas/core/arrays/integer.py       |  8 ++++++--
 pandas/core/dtypes/concat.py        |  2 +-
 pandas/core/internals/concat.py     |  9 +++++++++
 pandas/tests/reshape/test_concat.py | 14 ++++++++++++++
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 59954f548fd33..4eec3bda11b39 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -94,10 +94,14 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
 
     def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         # for now only handle other integer types
-        if not all(isinstance(t, _IntegerDtype) for t in dtypes):
+        if not all(
+            isinstance(t, _IntegerDtype)
+            or (isinstance(t, np.dtype) and np.issubdtype(t, np.integer))
+            for t in dtypes
+        ):
             return None
         np_dtype = np.find_common_type(
-            [t.numpy_dtype for t in dtypes], []  # type: ignore
+            [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], []  # type: ignore
         )
         if np.issubdtype(np_dtype, np.integer):
             return _dtypes[str(np_dtype)]
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index ca3a41813f3d3..eb4ae832a07de 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -138,7 +138,7 @@ def is_nonempty(x) -> bool:
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
 
-    if any_ea and axis == 0:
+    if any_ea:
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index e25c4c2341217..4138251b39344 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -319,6 +319,15 @@ def _concatenate_join_units(join_units, concat_axis, copy):
                     concat_values = concat_values.copy()
             else:
                 concat_values = concat_values.copy()
+    elif any(isinstance(t, ExtensionArray) for t in to_concat):
+        # concatting with at least one EA means we are concatting a single column
+        # the non-EA values are 2D arrays with shape (1, n)
+        to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
+        concat_values = concat_compat(to_concat, axis=concat_axis)
+        if not isinstance(concat_values, ExtensionArray):
+            # if the result of concat is not an EA but an ndarray, reshape to
+            # 2D to put it a non-EA Block
+            concat_values = np.atleast_2d(concat_values)
     else:
         concat_values = concat_compat(to_concat, axis=concat_axis)
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 8d6d93df6f776..9ba85cb48b476 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2832,3 +2832,17 @@ def test_concat_preserves_subclass(obj):
 
     result = concat([obj, obj])
     assert isinstance(result, type(obj))
+
+
+def test_concat_frame_axis0_extension_dtypes():
+    # preserve extension dtype (through common_dtype mechanism)
+    df1 = pd.DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")})
+    df2 = pd.DataFrame({"a": np.array([4, 5, 6])})
+
+    result = pd.concat([df1, df2], ignore_index=True)
+    expected = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)
+
+    result = pd.concat([df2, df1], ignore_index=True)
+    expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)

From f83338a2b2204a47cebe7972c37e5ad63069adec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 May 2020 09:25:20 +0200
Subject: [PATCH 2/8] clean-up

---
 pandas/core/dtypes/concat.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index eb4ae832a07de..6274905afaadc 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -152,10 +152,6 @@ def is_nonempty(x) -> bool:
     elif _contains_datetime or "timedelta" in typs:
         return concat_datetime(to_concat, axis=axis, typs=typs)
 
-    elif any_ea and axis == 1:
-        to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
-        return np.concatenate(to_concat, axis=axis)
-
     elif all_empty:
         # we have all empties, but may need to coerce the result dtype to
         # object if we have non-numeric type operands (numpy would otherwise

From 58226948ec6550dc021c1b06a81f8157e7c8d696 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 May 2020 13:48:21 +0200
Subject: [PATCH 3/8] fix concat of DatetimeBlock / DatetimeTZBlock to not take
 'uniform' path

---
 pandas/core/internals/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 4138251b39344..fd8c5f5e27c02 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -452,7 +452,7 @@ def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
     #  cannot necessarily join
     return (
         # all blocks need to have the same type
-        all(isinstance(ju.block, type(join_units[0].block)) for ju in join_units)
+        all(type(ju.block) is type(join_units[0].block) for ju in join_units)
         and  # noqa
         # no blocks that would get missing values (can lead to type upcasts)
         # unless we're an extension dtype.

From 3d32bc813d498349f6c7490281a86d175f36f33e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 May 2020 13:48:50 +0200
Subject: [PATCH 4/8] fix base extension tests

---
 pandas/tests/extension/base/reshaping.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index c9445ceec2c77..cd932e842e00c 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -62,11 +62,11 @@ def test_concat_mixed_dtypes(self, data):
         self.assert_series_equal(result, expected)
 
         # simple test for just EA and one other
-        result = pd.concat([df1, df2])
+        result = pd.concat([df1, df2.astype(object)])
         expected = pd.concat([df1.astype("object"), df2.astype("object")])
         self.assert_frame_equal(result, expected)
 
-        result = pd.concat([df1["A"], df2["A"]])
+        result = pd.concat([df1["A"], df2["A"].astype(object)])
         expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")])
         self.assert_series_equal(result, expected)
 

From d269229486bd6748b5c4e2f6e8dd67de39ee3e4d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 6 Jun 2020 09:55:46 +0200
Subject: [PATCH 5/8] fix test

---
 pandas/core/arrays/integer.py          |  2 +-
 pandas/tests/indexing/test_indexing.py | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 3dc1849fc586a..df43b5d6115ba 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -100,7 +100,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         ):
             return None
         np_dtype = np.find_common_type(
-            [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], []  # type: ignore
+            [t.numpy_dtype if isinstance(t, BaseMaskedDtype) else t for t in dtypes], []
         )
         if np.issubdtype(np_dtype, np.integer):
             return _dtypes[str(np_dtype)]
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 51a7aa9bb586b..66b61bda74ef0 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -1006,12 +1006,24 @@ def test_extension_array_cross_section():
 
 
 def test_extension_array_cross_section_converts():
+    # all numeric columns -> numeric series
     df = pd.DataFrame(
-        {"A": pd.core.arrays.integer_array([1, 2]), "B": np.array([1, 2])},
+        {"A": pd.array([1, 2], dtype="Int64"), "B": np.array([1, 2])}, index=["a", "b"],
+    )
+    result = df.loc["a"]
+    expected = pd.Series([1, 1], dtype="Int64", index=["A", "B"], name="a")
+    tm.assert_series_equal(result, expected)
+
+    result = df.iloc[0]
+    tm.assert_series_equal(result, expected)
+
+    # mixed columns -> object series
+    df = pd.DataFrame(
+        {"A": pd.array([1, 2], dtype="Int64"), "B": np.array(["a", "b"])},
         index=["a", "b"],
     )
     result = df.loc["a"]
-    expected = pd.Series([1, 1], dtype=object, index=["A", "B"], name="a")
+    expected = pd.Series([1, "a"], dtype=object, index=["A", "B"], name="a")
     tm.assert_series_equal(result, expected)
 
     result = df.iloc[0]

From 63a6570462c1826763e6dc13ab6f21e30c426d2e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 6 Jun 2020 10:56:11 +0200
Subject: [PATCH 6/8] add whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 2243790a663df..954ccd4752697 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -291,6 +291,7 @@ Other enhancements
 - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
 - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
 - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
+- :func:`concat` and `~DataFrame.append` now preserve extension dtypes (:issue:`33607`, :issue:`34339`).
 
 .. ---------------------------------------------------------------------------
 

From d8faef53dbd6320d75d5db217d1481862a25119b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 15 Jun 2020 09:02:45 +0200
Subject: [PATCH 7/8] update whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index b486c6b242dcf..108b8c01e8e36 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -293,7 +293,9 @@ Other enhancements
 - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
 - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
 - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
-- :func:`concat` and `~DataFrame.append` now preserve extension dtypes (:issue:`33607`, :issue:`34339`).
+- :func:`concat` and `~DataFrame.append` now preserve extension dtypes, for example
+  combining a nullable integer column with a numpy integer column will no longer
+  result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
 - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
 
 .. ---------------------------------------------------------------------------

From 2e1f1a6bb7c0d265843dc8034fa56b7838c09f52 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Jun 2020 08:51:53 +0200
Subject: [PATCH 8/8] Update doc/source/whatsnew/v1.1.0.rst

Co-authored-by: Tom Augspurger <TomAugspurger@users.noreply.github.com>
---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 108b8c01e8e36..d8d6ef6fb33e8 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -293,7 +293,7 @@ Other enhancements
 - :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
 - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
 - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
-- :func:`concat` and `~DataFrame.append` now preserve extension dtypes, for example
+- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
   combining a nullable integer column with a numpy integer column will no longer
   result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
 - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).