From ac23879dcf5853a595a8017c4edf8628d41e6585 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 14 Dec 2021 15:07:24 -0800
Subject: [PATCH 1/3] BUG: ArrayManager not respecting copy keyword

---
 pandas/core/internals/construction.py            | 16 +---------------
 pandas/tests/frame/indexing/test_setitem.py      |  4 ----
 pandas/tests/frame/test_constructors.py          | 13 ++++---------
 pandas/tests/frame/test_reductions.py            | 12 ++----------
 pandas/tests/indexing/multiindex/test_setitem.py |  8 +++-----
 5 files changed, 10 insertions(+), 43 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 532309dfc40b3..6a76aa11b9c81 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -35,7 +35,6 @@
 )
 from pandas.core.dtypes.common import (
     is_1d_only_ea_dtype,
-    is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -44,7 +43,6 @@
     is_named_tuple,
     is_object_dtype,
 )
-from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCSeries,
@@ -475,23 +473,11 @@ def dict_to_mgr(
         keys = list(data.keys())
         columns = Index(keys)
         arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
-        # GH#24096 need copy to be deep for datetime64tz case
-        # TODO: See if we can avoid these copies
         arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
-        arrays = [
-            arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
-        ]
 
     if copy:
-        # arrays_to_mgr (via form_blocks) won't make copies for EAs
         # dtype attr check to exclude EADtype-castable strs
-        arrays = [
-            x
-            if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype)
-            else x.copy()
-            for x in arrays
-        ]
-        # TODO: can we get rid of the dt64tz special case above?
+        arrays = [x if not hasattr(x, "dtype") else x.copy() for x in arrays]
 
     return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
 
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index cd0a0a0467742..c8a22e48d16c1 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -421,10 +421,6 @@ def test_setitem_frame_duplicate_columns(self, using_array_manager, request):
             # TODO(ArrayManager) .loc still overwrites
             expected["B"] = expected["B"].astype("int64")
 
-            mark = pytest.mark.xfail(
-                reason="Both 'A' columns get set with 3 instead of 0 and 3"
-            )
-            request.node.add_marker(mark)
         else:
             # set these with unique columns to be extra-unambiguous
             expected[2] = expected[2].astype(np.int64)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index e7c2e17bc9ac1..f2d1bf59c82d1 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2143,8 +2143,6 @@ def test_constructor_ndarray_copy(self, float_frame, using_array_manager):
             arr[0, 0] = 1000
             assert df.iloc[0, 0] == 1000
 
-    # TODO(ArrayManager) keep view on Series?
-    @td.skip_array_manager_not_yet_implemented
     def test_constructor_series_copy(self, float_frame):
         series = float_frame._series
 
@@ -2499,13 +2497,10 @@ def test_constructor_list_str_na(self, string_dtype):
     def test_dict_nocopy(
         self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager
     ):
-        if using_array_manager and not (
-            (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES))
-            or (
-                any_numpy_dtype
-                in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES)
-                and copy
-            )
+        if (
+            using_array_manager
+            and not copy
+            and not (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES))
         ):
             # TODO(ArrayManager) properly honor copy keyword for dict input
             td.mark_array_manager_not_yet_implemented(request)
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 6429544869ac2..96e3d2b860a3d 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -784,11 +784,7 @@ def test_operators_timedelta64(self):
     def test_std_timedelta64_skipna_false(self):
         # GH#37392
         tdi = pd.timedelta_range("1 Day", periods=10)
-        df = DataFrame({"A": tdi, "B": tdi})
-        # Copy is needed for ArrayManager case, otherwise setting df.iloc
-        #  below edits tdi, alterting both df['A'] and df['B']
-        #  FIXME: passing copy=True to constructor does not fix this
-        df = df.copy()
+        df = DataFrame({"A": tdi, "B": tdi}, copy=True)
         df.iloc[-2, -1] = pd.NaT
 
         result = df.std(skipna=False)
@@ -1075,11 +1071,7 @@ def test_idxmax_idxmin_convert_dtypes(self, op, expected_value):
 
     def test_idxmax_dt64_multicolumn_axis1(self):
         dti = date_range("2016-01-01", periods=3)
-        df = DataFrame({3: dti, 4: dti[::-1]})
-        # FIXME: copy needed for ArrayManager, otherwise setting with iloc
-        #  below also sets df.iloc[-1, 1]; passing copy=True to DataFrame
-        #  does not solve this.
-        df = df.copy()
+        df = DataFrame({3: dti, 4: dti[::-1]}, copy=True)
         df.iloc[0, 0] = pd.NaT
 
         df._consolidate_inplace()
diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py
index 2a12d690ff0bd..af23d2f2ac51c 100644
--- a/pandas/tests/indexing/multiindex/test_setitem.py
+++ b/pandas/tests/indexing/multiindex/test_setitem.py
@@ -214,11 +214,9 @@ def test_multiindex_assignment_single_dtype(self, using_array_manager):
         exp = Series(arr, index=[8, 10], name="c", dtype="int64")
         result = df.loc[4, "c"]
         tm.assert_series_equal(result, exp)
-        if not using_array_manager:
-            # FIXME(ArrayManager): this correctly preserves dtype,
-            #  but incorrectly is not inplace.
-            # extra check for inplace-ness
-            tm.assert_numpy_array_equal(view, exp.values)
+
+        # extra check for inplace-ness
+        tm.assert_numpy_array_equal(view, exp.values)
 
         # arr + 0.5 cannot be cast losslessly to int, so we upcast
         df.loc[4, "c"] = arr + 0.5

From 01c6ad2caf414ab0611771fea3708417dd747eb8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 27 Dec 2021 17:28:18 -0800
Subject: [PATCH 2/3] avoid double-copy

---
 pandas/core/internals/construction.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 6a76aa11b9c81..fbe498b4e08c2 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -476,8 +476,12 @@ def dict_to_mgr(
         arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
 
     if copy:
-        # dtype attr check to exclude EADtype-castable strs
-        arrays = [x if not hasattr(x, "dtype") else x.copy() for x in arrays]
+        if typ == "block":
+            # We only need to copy arrays that will not get consolidated, i.e.
+            #  only EA arrays
+            arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays]
+        else:
+            arrays = [x.copy() for x in arrays]
 
     return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
 

From 1bfdcc6cc25c959fd5095e99008600b796d7f7e0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 29 Dec 2021 12:58:46 -0800
Subject: [PATCH 3/3] copy conditionally

---
 pandas/core/internals/construction.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index fbe498b4e08c2..4bec30cad22cc 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -481,7 +481,8 @@ def dict_to_mgr(
             #  only EA arrays
             arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays]
         else:
-            arrays = [x.copy() for x in arrays]
+            # dtype check to exclude e.g. range objects, scalars
+            arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]
 
     return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)