diff --git a/RELEASE.rst b/RELEASE.rst index 9deafd56ccc10..970b89c99a7b1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -141,6 +141,8 @@ pandas 0.11.0 - Fixed printing of ``NaT` in an index - Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__) - Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_) + - Bug in DataFrame column insertion when the column creation fails, existing frame is left in + an irrecoverable state (GH3010_) .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 @@ -166,6 +168,7 @@ pandas 0.11.0 .. _GH2982: https://github.com/pydata/pandas/issues/2982 .. _GH2989: https://github.com/pydata/pandas/issues/2989 .. _GH3002: https://github.com/pydata/pandas/issues/3002 +.. _GH3010: https://github.com/pydata/pandas/issues/3010 .. _GH3012: https://github.com/pydata/pandas/issues/3012 diff --git a/doc/source/io.rst b/doc/source/io.rst index 01ed06cd6a60f..850c6a2841ef5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1138,11 +1138,14 @@ defaults to `nan`. .. ipython:: python - df_mixed = df.copy() - df_mixed['string'] = 'string' - df_mixed['int'] = 1 - df_mixed['bool'] = True - df_mixed['datetime64'] = Timestamp('20010102') + df_mixed = DataFrame({ 'A' : randn(8), + 'B' : randn(8), + 'C' : np.array(randn(8),dtype='float32'), + 'string' :'string', + 'int' : 1, + 'bool' : True, + 'datetime64' : Timestamp('20010102')}, + index=range(8)) df_mixed.ix[3:5,['A', 'B', 'string', 'datetime64']] = np.nan store.append('df_mixed', df_mixed, min_itemsize = {'values': 50}) @@ -1445,8 +1448,7 @@ may not be installed (by Python) by default. Compression for all objects within the file - - ``store_compressed = HDFStore('store_compressed.h5', complevel=9, - complib='blosc')`` + - ``store_compressed = HDFStore('store_compressed.h5', complevel=9, complib='blosc')`` Or on-the-fly compression (this only applies to tables). You can turn off file compression for a specific table by passing ``complevel=0`` diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 75605fae4e39f..5c0f0935346cc 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1334,11 +1334,22 @@ def insert(self, loc, item, value): if item in self.items: raise Exception('cannot insert %s, already exists' % item) - new_items = self.items.insert(loc, item) - self.set_items_norename(new_items) + try: + new_items = self.items.insert(loc, item) + self.set_items_norename(new_items) + + # new block + self._add_new_block(item, value, loc=loc) - # new block - self._add_new_block(item, value, loc=loc) + except: + + # so our insertion operation failed, so back out of the new items + # GH 3010 + new_items = self.items.delete(loc) + self.set_items_norename(new_items) + + # re-raise + raise if len(self.blocks) > 100: self._consolidate_inplace() diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0729f0e03782e..85f66148eba8a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1968,6 +1968,19 @@ def test_constructor_cast_failure(self): foo = DataFrame({'a': ['a', 'b', 'c']}, dtype=np.float64) self.assert_(foo['a'].dtype == object) + # GH 3010, constructing with odd arrays + df = DataFrame(np.ones((4,2))) + + # this is ok + df['foo'] = np.ones((4,2)).tolist() + + # this is not ok + self.assertRaises(AssertionError, df.__setitem__, tuple(['test']), np.ones((4,2))) + + # this is ok + df['foo2'] = np.ones((4,2)).tolist() + + def test_constructor_dtype_nocast_view(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype)