pandas-dev · jreback · Mar 14, 2013 · Mar 13, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -148,6 +148,7 @@ pandas 0.11.0
   - Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
   - Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from 
     other values), (GH2850_)
+  - Unstack of a frame with no nans would always cause dtype upcasting (GH2929_)
 
 .. _GH622: https://github.com/pydata/pandas/issues/622
 .. _GH797: https://github.com/pydata/pandas/issues/797
@@ -169,6 +170,7 @@ pandas 0.11.0
 .. _GH2892: https://github.com/pydata/pandas/issues/2892
 .. _GH2909: https://github.com/pydata/pandas/issues/2909
 .. _GH2922: https://github.com/pydata/pandas/issues/2922
+.. _GH2929: https://github.com/pydata/pandas/issues/2929
 .. _GH2931: https://github.com/pydata/pandas/issues/2931
 .. _GH2973: https://github.com/pydata/pandas/issues/2973
 .. _GH2967: https://github.com/pydata/pandas/issues/2967

diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -144,15 +144,23 @@ def get_result(self):
 
     def get_new_values(self):
         values = self.values
+
         # place the values
         length, width = self.full_shape
         stride = values.shape[1]
         result_width = width * stride
+        result_shape = (length, result_width)
 
-        dtype, fill_value = _maybe_promote(values.dtype)
-        new_values = np.empty((length, result_width), dtype=dtype)
-        new_values.fill(fill_value)
-        new_mask = np.zeros((length, result_width), dtype=bool)
+        # if our mask is all True, then we can use our existing dtype
+        if self.mask.all():
+            dtype = values.dtype
+            new_values = np.empty(result_shape, dtype=dtype)
+        else:
+            dtype, fill_value = _maybe_promote(values.dtype)
+            new_values = np.empty(result_shape, dtype=dtype)
+            new_values.fill(fill_value)
+
+        new_mask = np.zeros(result_shape, dtype=bool)
 
         # is there a simpler / faster way of doing this?
         for i in xrange(values.shape[1]):

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -8242,6 +8242,41 @@ def test_unstack_to_series(self):
             data = data.unstack()
         assert_frame_equal(old_data, data)
 
+    def test_unstack_dtypes(self):
+
+        # GH 2929
+        rows = [[1, 1, 3, 4],
+                [1, 2, 3, 4],
+                [2, 1, 3, 4],
+                [2, 2, 3, 4]]
+
+        df = DataFrame(rows, columns=list('ABCD'))
+        result = df.get_dtype_counts()
+        expected = Series({'int64' : 4})
+        assert_series_equal(result, expected)
+
+        # single dtype
+        df2 = df.set_index(['A','B'])
+        df3 = df2.unstack('B')
+        result = df3.get_dtype_counts()
+        expected = Series({'int64' : 4})
+        assert_series_equal(result, expected)
+
+        # mixed
+        df2 = df.set_index(['A','B'])
+        df2['C'] = 3.
+        df3 = df2.unstack('B')
+        result = df3.get_dtype_counts()
+        expected = Series({'int64' : 2, 'float64' : 2})
+        assert_series_equal(result, expected)
+
+        df2['D'] = 'foo'
+        df3 = df2.unstack('B')
+        result = df3.get_dtype_counts()
+        expected = Series({'float64' : 2, 'object' : 2})
+        assert_series_equal(result, expected)
+
+
     def test_reset_index(self):
         stacked = self.frame.stack()[::2]
         stacked = DataFrame({'foo': stacked, 'bar': stacked})

diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -1346,7 +1346,7 @@ def test_unstack_group_index_overflow(self):
 
         # test roundtrip
         stacked = result.stack()
-        assert_series_equal(s.astype(np.float64),
+        assert_series_equal(s,
                             stacked.reindex(s.index))
 
         # put it at beginning