pandas-dev · jreback · Sep 7, 2013 · Sep 7, 2013 · Sep 7, 2013
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -667,7 +667,7 @@ def func(_start, _stop):
             axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
 
             # concat and return
-            return concat(objs, axis=axis, verify_integrity=True)
+            return concat(objs, axis=axis, verify_integrity=True).consolidate()
 
         if iterator or chunksize is not None:
             return TableIterator(self, func, nrows=nrows, start=start, stop=stop, chunksize=chunksize, auto_close=auto_close)
@@ -2910,9 +2910,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
 
         # reindex by our non_index_axes & compute data_columns
         for a in self.non_index_axes:
-            labels = _ensure_index(a[1])
-            if not labels.equals(obj._get_axis(a[0])):
-                obj = obj.reindex_axis(labels, axis=a[0])
+            obj = _reindex_axis(obj, a[0], a[1])
 
         # figure out data_columns and get out blocks
         block_obj = self.get_object(obj).consolidate()
@@ -3000,11 +2998,7 @@ def process_axes(self, obj, columns=None):
 
         # reorder by any non_index_axes & limit to the select columns
         for axis, labels in self.non_index_axes:
-            if columns is not None:
-                labels = Index(labels) & Index(columns)
-            labels = _ensure_index(labels)
-            if not labels.equals(obj._get_axis(axis)):
-                obj = obj.reindex_axis(labels, axis=axis)
+            obj = _reindex_axis(obj, axis, labels, columns)
 
         # apply the selection filters (but keep in the same order)
         if self.selection.filter:
@@ -3219,7 +3213,7 @@ def read(self, where=None, columns=None, **kwargs):
         if len(objs) == 1:
             wp = objs[0]
         else:
-            wp = concat(objs, axis=0, verify_integrity=False)
+            wp = concat(objs, axis=0, verify_integrity=False).consolidate()
 
         # apply the selection filters & axis orderings
         wp = self.process_axes(wp, columns=columns)
@@ -3510,7 +3504,7 @@ def read(self, where=None, columns=None, **kwargs):
         if len(frames) == 1:
             df = frames[0]
         else:
-            df = concat(frames, axis=1, verify_integrity=False)
+            df = concat(frames, axis=1, verify_integrity=False).consolidate()
 
         # apply the selection filters & axis orderings
         df = self.process_axes(df, columns=columns)
@@ -3683,6 +3677,26 @@ class AppendableNDimTable(AppendablePanelTable):
     obj_type = Panel4D
 
 
+def _reindex_axis(obj, axis, labels, other=None):
+    ax = obj._get_axis(axis)
+    labels = _ensure_index(labels)
+
+    # try not to reindex even if other is provided
+    # if it equals our current index
+    if other is not None:
+        other = _ensure_index(other)
+    if (other is None or labels.equals(other)) and labels.equals(ax):
+        return obj
+
+    labels = _ensure_index(labels.unique())
+    if other is not None:
+        labels = labels & _ensure_index(other.unique())
+    if not labels.equals(ax):
+        slicer = [ slice(None, None) ] * obj.ndim
+        slicer[axis] = labels
+        obj = obj.loc[tuple(slicer)]
+    return obj
+
 def _get_info(info, name):
     """ get/create the info for this name """
     try:

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -2298,15 +2298,24 @@ def test_wide_table(self):
 
     def test_select_with_dups(self):
 
-
         # single dtypes
         df = DataFrame(np.random.randn(10,4),columns=['A','A','B','B'])
         df.index = date_range('20130101 9:30',periods=10,freq='T')
 
         with ensure_clean(self.path) as store:
             store.append('df',df)
+
             result = store.select('df')
-            assert_frame_equal(result,df)
+            expected = df
+            assert_frame_equal(result,expected,by_blocks=True)
+
+            result = store.select('df',columns=df.columns)
+            expected = df
+            assert_frame_equal(result,expected,by_blocks=True)
+
+            result = store.select('df',columns=['A'])
+            expected = df.loc[:,['A']]
+            assert_frame_equal(result,expected)
 
         # dups accross dtypes
         df = concat([DataFrame(np.random.randn(10,4),columns=['A','A','B','B']),
@@ -2316,8 +2325,22 @@ def test_select_with_dups(self):
 
         with ensure_clean(self.path) as store:
             store.append('df',df)
+
             result = store.select('df')
-            assert_frame_equal(result,df)
+            expected = df
+            assert_frame_equal(result,expected,by_blocks=True)
+
+            result = store.select('df',columns=df.columns)
+            expected = df
+            assert_frame_equal(result,expected,by_blocks=True)
+
+            expected = df.loc[:,['A']]
+            result = store.select('df',columns=['A'])
+            assert_frame_equal(result,expected,by_blocks=True)
+
+            expected = df.loc[:,['B','A']]
+            result = store.select('df',columns=['B','A'])
+            assert_frame_equal(result,expected,by_blocks=True)
 
     def test_wide_table_dups(self):
         wp = tm.makePanel()

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -258,27 +258,41 @@ def assert_frame_equal(left, right, check_dtype=True,
                        check_column_type=False,
                        check_frame_type=False,
                        check_less_precise=False,
-                       check_names=True):
+                       check_names=True,
+                       by_blocks=False):
     if check_frame_type:
         assert_isinstance(left, type(right))
     assert_isinstance(left, DataFrame)
     assert_isinstance(right, DataFrame)
 
     if check_less_precise:
-        assert_almost_equal(left.columns, right.columns)
+        if not by_blocks:
+            assert_almost_equal(left.columns, right.columns)
         assert_almost_equal(left.index, right.index)
     else:
-        assert_index_equal(left.columns, right.columns)
+        if not by_blocks:
+            assert_index_equal(left.columns, right.columns)
         assert_index_equal(left.index, right.index)
 
-    for i, col in enumerate(left.columns):
-        assert col in right
-        lcol = left.icol(i)
-        rcol = right.icol(i)
-        assert_series_equal(lcol, rcol,
-                            check_dtype=check_dtype,
-                            check_index_type=check_index_type,
-                            check_less_precise=check_less_precise)
+    # compare by blocks
+    if by_blocks:
+        rblocks = right.blocks
+        lblocks = left.blocks
+        for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
+            assert dtype in lblocks
+            assert dtype in rblocks
+            assert_frame_equal(lblocks[dtype],rblocks[dtype],check_dtype=check_dtype)
+
+    # compare by columns
+    else:
+        for i, col in enumerate(left.columns):
+            assert col in right
+            lcol = left.icol(i)
+            rcol = right.icol(i)
+            assert_series_equal(lcol, rcol,
+                                check_dtype=check_dtype,
+                                check_index_type=check_index_type,
+                                check_less_precise=check_less_precise)
 
     if check_index_type:
         assert_isinstance(left.index, type(right.index))