From 2bf9fb510082bda7c460e51dd8d9490f5ed0d172 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 8 Dec 2015 07:02:44 -0500
Subject: [PATCH 1/2] BUG: bug in .copy of datetime tz-aware objects, #11794

Not always deep-copying the underlying impl, which is a DatetimeIndex where
shallow copies are views
---
 doc/source/whatsnew/v0.18.0.txt |  2 +-
 pandas/core/dtypes.py           |  3 +++
 pandas/core/internals.py        | 36 ++++++++++++++++++---------------
 pandas/tests/test_internals.py  | 36 +++++++++++++++++++++++----------
 pandas/tests/test_series.py     | 23 +++++++++++++++++----
 5 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index c1b7ff82f4c76..94ca376df8436 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -170,7 +170,7 @@ Bug Fixes
 - Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
 - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
 - Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
-
+- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)
 
 
 
diff --git a/pandas/core/dtypes.py b/pandas/core/dtypes.py
index 0b13471aadcfb..69957299aa9bb 100644
--- a/pandas/core/dtypes.py
+++ b/pandas/core/dtypes.py
@@ -65,6 +65,9 @@ def __hash__(self):
     def __eq__(self, other):
         raise NotImplementedError("sub-classes should implement an __eq__ method")
 
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     @classmethod
     def is_dtype(cls, dtype):
         """ Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 28c845f63f9d4..cb8d0986489cc 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -168,17 +168,11 @@ def make_block(self, values, placement=None, ndim=None, **kwargs):
 
         return make_block(values, placement=placement, ndim=ndim, **kwargs)
 
-    def make_block_same_class(self, values, placement, copy=False, fastpath=True,
-                              **kwargs):
-        """
-        Wrap given values in a block of same type as self.
-
-        `kwargs` are used in SparseBlock override.
-
-        """
-        if copy:
-            values = values.copy()
-        return make_block(values, placement, klass=self.__class__,
+    def make_block_same_class(self, values, placement=None, fastpath=True, **kwargs):
+        """ Wrap given values in a block of same type as self. """
+        if placement is None:
+            placement = self.mgr_locs
+        return make_block(values, placement=placement, klass=self.__class__,
                           fastpath=fastpath, **kwargs)
 
     @mgr_locs.setter
@@ -573,12 +567,11 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs):
 
     # block actions ####
     def copy(self, deep=True, mgr=None):
+        """ copy constructor """
         values = self.values
         if deep:
             values = values.copy()
-        return self.make_block(values,
-                               klass=self.__class__,
-                               fastpath=True)
+        return self.make_block_same_class(values)
 
     def replace(self, to_replace, value, inplace=False, filter=None,
                 regex=False, convert=True, mgr=None):
@@ -2140,6 +2133,13 @@ def __init__(self, values, placement, ndim=2,
                                               placement=placement,
                                               ndim=ndim,
                                               **kwargs)
+    def copy(self, deep=True, mgr=None):
+        """ copy constructor """
+        values = self.values
+        if deep:
+            values = values.copy(deep=True)
+        return self.make_block_same_class(values)
+
     def external_values(self):
         """ we internally represent the data as a DatetimeIndex, but for external
         compat with ndarray, export as a ndarray of Timestamps """
@@ -3257,10 +3257,14 @@ def get_scalar(self, tup):
         full_loc = list(ax.get_loc(x)
                         for ax, x in zip(self.axes, tup))
         blk = self.blocks[self._blknos[full_loc[0]]]
-        full_loc[0] = self._blklocs[full_loc[0]]
+        values = blk.values
 
         # FIXME: this may return non-upcasted types?
-        return blk.values[tuple(full_loc)]
+        if values.ndim == 1:
+            return values[full_loc[1]]
+
+        full_loc[0] = self._blklocs[full_loc[0]]
+        return values[tuple(full_loc)]
 
     def delete(self, item):
         """
diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
index fbab0d2a92203..0f46c1106ed08 100644
--- a/pandas/tests/test_internals.py
+++ b/pandas/tests/test_internals.py
@@ -147,6 +147,8 @@ def create_mgr(descr, item_shape=None):
     block_placements = OrderedDict()
     for d in descr.split(';'):
         d = d.strip()
+        if not len(d):
+            continue
         names, blockstr = d.partition(':')[::2]
         blockstr = blockstr.strip()
         names = names.strip().split(',')
@@ -324,7 +326,8 @@ class TestBlockManager(tm.TestCase):
 
     def setUp(self):
         self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;'
-                              'f: bool; g: i8; h: complex')
+                              'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
+                              'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
 
     def test_constructor_corner(self):
         pass
@@ -476,16 +479,24 @@ def test_set_change_dtype_slice(self): # GH8850
                 DataFrame([[3], [6]], columns=cols[2:]))
 
     def test_copy(self):
-        shallow = self.mgr.copy(deep=False)
-
-        # we don't guaranteee block ordering
-        for blk in self.mgr.blocks:
-            found = False
-            for cp_blk in shallow.blocks:
-                if cp_blk.values is blk.values:
-                    found = True
-                    break
-            self.assertTrue(found)
+        cp = self.mgr.copy(deep=False)
+        for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
+
+            # view assertion
+            self.assertTrue(cp_blk.equals(blk))
+            self.assertTrue(cp_blk.values.base is blk.values.base)
+
+        cp = self.mgr.copy(deep=True)
+        for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
+
+            # copy assertion
+            # we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz),
+            # but was copied
+            self.assertTrue(cp_blk.equals(blk))
+            if cp_blk.values.base is not None and blk.values.base is not None:
+                self.assertFalse(cp_blk.values.base is blk.values.base)
+            else:
+                self.assertTrue(cp_blk.values.base is None and blk.values.base is None)
 
     def test_sparse(self):
         mgr = create_mgr('a: sparse-1; b: sparse-2')
@@ -688,7 +699,10 @@ def test_consolidate_ordering_issues(self):
         self.mgr.set('g', randn(N))
         self.mgr.set('h', randn(N))
 
+        # we have datetime/tz blocks in self.mgr
         cons = self.mgr.consolidate()
+        self.assertEqual(cons.nblocks, 4)
+        cons = self.mgr.consolidate().get_numeric_data()
         self.assertEqual(cons.nblocks, 1)
         assert_almost_equal(cons.blocks[0].mgr_locs,
                             np.arange(len(cons.items)))
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 0fb66ee2dfa7c..e49aef2778c1e 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5111,12 +5111,27 @@ def test_cov(self):
         self.assertTrue(isnull(ts1.cov(ts2, min_periods=12)))
 
     def test_copy(self):
-        ts = self.ts.copy()
 
-        ts[::2] = np.NaN
+        for deep in [False, True]:
+            s = Series(np.arange(10),dtype='float64')
+            s2 = s.copy(deep=deep)
+            s2[::2] = np.NaN
+
+            # Did not modify original Series
+            self.assertTrue(np.isnan(s2[0]))
+            self.assertFalse(np.isnan(s[0]))
 
-        # Did not modify original Series
-        self.assertFalse(np.isnan(self.ts[0]))
+        # GH 11794
+        # copy of tz-aware
+        expected = Series([Timestamp('2012/01/01', tz='UTC')])
+        expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
+
+        for deep in [False, True]:
+            s = Series([Timestamp('2012/01/01', tz='UTC')])
+            s2 = s.copy()
+            s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
+            assert_series_equal(s, expected)
+            assert_series_equal(s2, expected2)
 
     def test_count(self):
         self.assertEqual(self.ts.count(), len(self.ts))

From d526a4f7d9e87190c7ad4a84ff0cf34861c7ca58 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 8 Dec 2015 08:30:34 -0500
Subject: [PATCH 2/2] COMPAT: avoid warnings from numeric/string-like
 comparisons

---
 pandas/core/common.py       | 23 ++++++++++++++++-----
 pandas/core/internals.py    | 13 +++++++-----
 pandas/tests/test_series.py | 41 +++++++++++++++++++++++++++++--------
 3 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index b3a42335e14da..e81b58a3f7eef 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -232,7 +232,7 @@ def _isnull_ndarraylike(obj):
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if dtype.kind in ('O', 'S', 'U'):
+    if is_string_dtype(dtype):
         if is_categorical_dtype(values):
             from pandas import Categorical
             if not isinstance(values, Categorical):
@@ -243,7 +243,7 @@ def _isnull_ndarraylike(obj):
             # Working around NumPy ticket 1542
             shape = values.shape
 
-            if dtype.kind in ('S', 'U'):
+            if is_string_like_dtype(dtype):
                 result = np.zeros(values.shape, dtype=bool)
             else:
                 result = np.empty(shape, dtype=bool)
@@ -267,11 +267,11 @@ def _isnull_ndarraylike_old(obj):
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if dtype.kind in ('O', 'S', 'U'):
+    if is_string_dtype(dtype):
         # Working around NumPy ticket 1542
         shape = values.shape
 
-        if values.dtype.kind in ('S', 'U'):
+        if is_string_like_dtype(dtype):
             result = np.zeros(values.shape, dtype=bool)
         else:
             result = np.empty(shape, dtype=bool)
@@ -2208,13 +2208,17 @@ def is_numeric_v_string_like(a, b):
 
     is_a_numeric_array = is_a_array and is_numeric_dtype(a)
     is_b_numeric_array = is_b_array and is_numeric_dtype(b)
+    is_a_string_array = is_a_array and is_string_like_dtype(a)
+    is_b_string_array = is_b_array and is_string_like_dtype(b)
 
     is_a_scalar_string_like = not is_a_array and is_string_like(a)
     is_b_scalar_string_like = not is_b_array and is_string_like(b)
 
     return (
         is_a_numeric_array and is_b_scalar_string_like) or (
-        is_b_numeric_array and is_a_scalar_string_like
+        is_b_numeric_array and is_a_scalar_string_like) or (
+        is_a_numeric_array and is_b_string_array) or (
+        is_b_numeric_array and is_a_string_array
         )
 
 def is_datetimelike_v_numeric(a, b):
@@ -2257,6 +2261,15 @@ def is_numeric_dtype(arr_or_dtype):
             and not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
+def is_string_dtype(arr_or_dtype):
+    dtype = _get_dtype(arr_or_dtype)
+    return dtype.kind in ('O', 'S', 'U')
+
+def is_string_like_dtype(arr_or_dtype):
+    # exclude object as its a mixed dtype
+    dtype = _get_dtype(arr_or_dtype)
+    return dtype.kind in ('S', 'U')
+
 def is_float_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.floating)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index cb8d0986489cc..efcb6eb818087 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4419,11 +4419,14 @@ def _putmask_smart(v, m, n):
     try:
         nn = n[m]
         nn_at = nn.astype(v.dtype)
-        comp = (nn == nn_at)
-        if is_list_like(comp) and comp.all():
-            nv = v.copy()
-            nv[m] = nn_at
-            return nv
+
+        # avoid invalid dtype comparisons
+        if not is_numeric_v_string_like(nn, nn_at):
+            comp = (nn == nn_at)
+            if is_list_like(comp) and comp.all():
+                nv = v.copy()
+                nv[m] = nn_at
+                return nv
     except (ValueError, IndexError, TypeError):
         pass
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index e49aef2778c1e..bbdd7c3637981 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -5112,26 +5112,49 @@ def test_cov(self):
 
     def test_copy(self):
 
-        for deep in [False, True]:
+        for deep in [None, False, True]:
             s = Series(np.arange(10),dtype='float64')
-            s2 = s.copy(deep=deep)
+
+            # default deep is True
+            if deep is None:
+                s2 = s.copy()
+            else:
+                s2 = s.copy(deep=deep)
+
             s2[::2] = np.NaN
 
-            # Did not modify original Series
-            self.assertTrue(np.isnan(s2[0]))
-            self.assertFalse(np.isnan(s[0]))
+            if deep is None or deep is True:
+                # Did not modify original Series
+                self.assertTrue(np.isnan(s2[0]))
+                self.assertFalse(np.isnan(s[0]))
+            else:
+
+                # we DID modify the original Series
+                self.assertTrue(np.isnan(s2[0]))
+                self.assertTrue(np.isnan(s[0]))
 
         # GH 11794
         # copy of tz-aware
         expected = Series([Timestamp('2012/01/01', tz='UTC')])
         expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
 
-        for deep in [False, True]:
+        for deep in [None, False, True]:
             s = Series([Timestamp('2012/01/01', tz='UTC')])
-            s2 = s.copy()
+
+            if deep is None:
+                s2 = s.copy()
+            else:
+                s2 = s.copy(deep=deep)
+
             s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
-            assert_series_equal(s, expected)
-            assert_series_equal(s2, expected2)
+
+            # default deep is True
+            if deep is None or deep is True:
+                assert_series_equal(s, expected)
+                assert_series_equal(s2, expected2)
+            else:
+                assert_series_equal(s, expected2)
+                assert_series_equal(s2, expected2)
 
     def test_count(self):
         self.assertEqual(self.ts.count(), len(self.ts))