Skip to content

Commit a16f243

Browse files
committed
Merge pull request #4100 from jreback/hdf_buglet
BUG: (GH 4096) block ordering is somewhat non-deterministic in HDFStore; reorder to the existing store
2 parents b8c5c67 + 3c5d6cb commit a16f243

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ pandas 0.12
299299
rewritten in an incompatible way (:issue:`4062`, :issue:`4063`)
300300
- Fixed bug where sharex and sharey were not being passed to grouped_hist
301301
(:issue:`4089`)
302+
- Fix bug where ``HDFStore`` will fail to append because of a different block
303+
ordering on-disk (:issue:`4096`)
302304

303305

304306
pandas 0.11.0

pandas/io/pytables.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2651,7 +2651,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
26512651
obj = obj.reindex_axis(a[1], axis=a[0], copy=False)
26522652

26532653
# figure out data_columns and get out blocks
2654-
block_obj = self.get_object(obj)
2654+
block_obj = self.get_object(obj).consolidate()
26552655
blocks = block_obj._data.blocks
26562656
if len(self.non_index_axes):
26572657
axis, axis_labels = self.non_index_axes[0]
@@ -2663,6 +2663,19 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
26632663
blocks.extend(block_obj.reindex_axis(
26642664
[c], axis=axis, copy=False)._data.blocks)
26652665

2666+
# reorder the blocks in the same order as the existing_table if we can
2667+
if existing_table is not None:
2668+
by_items = dict([ (tuple(b.items.tolist()),b) for b in blocks ])
2669+
new_blocks = []
2670+
for ea in existing_table.values_axes:
2671+
items = tuple(ea.values)
2672+
try:
2673+
b = by_items.pop(items)
2674+
new_blocks.append(b)
2675+
except:
2676+
raise ValueError("cannot match existing table structure for [%s] on appending data" % items)
2677+
blocks = new_blocks
2678+
26662679
# add my values
26672680
self.values_axes = []
26682681
for i, b in enumerate(blocks):

pandas/io/tests/test_pytables.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,31 @@ def test_append_frame_column_oriented(self):
596596
expected = df.reindex(columns=['A'], index=df.index[0:4])
597597
tm.assert_frame_equal(expected, result)
598598

599+
def test_append_with_different_block_ordering(self):
600+
601+
#GH 4096; using same frames, but different block orderings
602+
with ensure_clean(self.path) as store:
603+
604+
for i in range(10):
605+
606+
df = DataFrame(np.random.randn(10,2),columns=list('AB'))
607+
df['index'] = range(10)
608+
df['index'] += i*10
609+
df['int64'] = Series([1]*len(df),dtype='int64')
610+
df['int16'] = Series([1]*len(df),dtype='int16')
611+
612+
if i % 2 == 0:
613+
del df['int64']
614+
df['int64'] = Series([1]*len(df),dtype='int64')
615+
if i % 3 == 0:
616+
a = df.pop('A')
617+
df['A'] = a
618+
619+
df.set_index('index',inplace=True)
620+
621+
store.append('df',df)
622+
623+
599624
def test_ndim_indexables(self):
600625
""" test using ndim tables in new ways"""
601626

0 commit comments

Comments
 (0)