From 3c5d6cb71c216d4ffab8f787cf55a31c61c0d291 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 1 Jul 2013 15:28:20 -0400 Subject: [PATCH] BUG: (GH 4096) block ordering is somewhat non-deterministic in HDFStore, so reorder to the existing store if needed --- doc/source/release.rst | 2 ++ pandas/io/pytables.py | 15 ++++++++++++++- pandas/io/tests/test_pytables.py | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 36e86629c385a..691c7312dde72 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -299,6 +299,8 @@ pandas 0.12 rewritten in an incompatible way (:issue:`4062`, :issue:`4063`) - Fixed bug where sharex and sharey were not being passed to grouped_hist (:issue:`4089`) + - Fix bug where ``HDFStore`` will fail to append because of a different block + ordering on-disk (:issue:`4096`) pandas 0.11.0 diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d22009be05429..5bf309edffa74 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2651,7 +2651,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, obj = obj.reindex_axis(a[1], axis=a[0], copy=False) # figure out data_columns and get out blocks - block_obj = self.get_object(obj) + block_obj = self.get_object(obj).consolidate() blocks = block_obj._data.blocks if len(self.non_index_axes): axis, axis_labels = self.non_index_axes[0] @@ -2663,6 +2663,19 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, blocks.extend(block_obj.reindex_axis( [c], axis=axis, copy=False)._data.blocks) + # reorder the blocks in the same order as the existing_table if we can + if existing_table is not None: + by_items = dict([ (tuple(b.items.tolist()),b) for b in blocks ]) + new_blocks = [] + for ea in existing_table.values_axes: + items = tuple(ea.values) + try: + b = by_items.pop(items) + new_blocks.append(b) + except: + raise ValueError("cannot match existing table structure for [%s] on appending data" % items) + blocks = new_blocks + # add my values self.values_axes = [] for i, b in enumerate(blocks): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index f062216986c98..00d8089ad2ee7 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -596,6 +596,31 @@ def test_append_frame_column_oriented(self): expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result) + def test_append_with_different_block_ordering(self): + + #GH 4096; using same frames, but different block orderings + with ensure_clean(self.path) as store: + + for i in range(10): + + df = DataFrame(np.random.randn(10,2),columns=list('AB')) + df['index'] = range(10) + df['index'] += i*10 + df['int64'] = Series([1]*len(df),dtype='int64') + df['int16'] = Series([1]*len(df),dtype='int16') + + if i % 2 == 0: + del df['int64'] + df['int64'] = Series([1]*len(df),dtype='int64') + if i % 3 == 0: + a = df.pop('A') + df['A'] = a + + df.set_index('index',inplace=True) + + store.append('df',df) + + def test_ndim_indexables(self): """ test using ndim tables in new ways"""