Skip to content

Commit 314b574

Browse files
committed
ENH: added ability to read in generic PyTables flavor tables to allow compatiblity between other HDF5 systems
1 parent 50eb561 commit 314b574

File tree

5 files changed

+92
-11
lines changed

5 files changed

+92
-11
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pandas 0.10.1
5252
- added method ``unique`` to select the unique values in an indexable or data column
5353
- added method ``copy`` to copy an existing store (and possibly upgrade)
5454
- show the shape of the data on disk for non-table stores when printing the store
55+
- added ability to read PyTables flavor tables (allows compatiblity to other HDF5 systems)
5556
- Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565)
5657
- Support reading gzipped data from file-like object
5758
- ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_)

pandas/core/reshape.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -834,5 +834,5 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None):
834834

835835
def factor_indexer(shape, labels):
836836
""" given a tuple of shape and a list of Factor lables, return the expanded label indexer """
837-
mult = np.array(shape)[::-1].cumprod()[::-1]
838-
return np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T
837+
mult = np.array(shape, dtype = 'i8')[::-1].cumprod()[::-1]
838+
return np.sum(np.array(labels, dtype = 'i8').T * np.append(mult, [1]), axis=1).T

pandas/io/pytables.py

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pandas.core.common import _asarray_tuplesafe, _try_sort
2525
from pandas.core.internals import BlockManager, make_block, form_blocks
2626
from pandas.core.reshape import block2d_to_block3d, block2d_to_blocknd, factor_indexer
27+
from pandas.core.index import Int64Index
2728
import pandas.core.common as com
2829
from pandas.tools.merge import concat
2930

@@ -71,6 +72,7 @@ class IncompatibilityWarning(Warning): pass
7172

7273
# table class map
7374
_TABLE_MAP = {
75+
'generic_table' : 'GenericTable',
7476
'appendable_frame' : 'AppendableFrameTable',
7577
'appendable_multiframe' : 'AppendableMultiFrameTable',
7678
'appendable_panel' : 'AppendablePanelTable',
@@ -609,7 +611,7 @@ def create_table_index(self, key, **kwargs):
609611

610612
def groups(self):
611613
""" return a list of all the top-level nodes (that are not themselves a pandas storage object) """
612-
return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ]
614+
return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) ]
613615

614616
def get_node(self, key):
615617
""" return the node with the key or None if it does not exist """
@@ -684,16 +686,22 @@ def error(t):
684686
# infer the pt from the passed value
685687
if pt is None:
686688
if value is None:
687-
raise Exception("cannot create a storer if the object is not existing nor a value are passed")
688689

689-
try:
690-
pt = _TYPE_MAP[type(value)]
691-
except:
692-
error('_TYPE_MAP')
690+
if getattr(group,'table',None):
691+
pt = 'frame_table'
692+
tt = 'generic_table'
693+
else:
694+
raise Exception("cannot create a storer if the object is not existing nor a value are passed")
695+
else:
696+
697+
try:
698+
pt = _TYPE_MAP[type(value)]
699+
except:
700+
error('_TYPE_MAP')
693701

694-
# we are actually a table
695-
if table or append:
696-
pt += '_table'
702+
# we are actually a table
703+
if table or append:
704+
pt += '_table'
697705

698706
# a storer node
699707
if 'table' not in pt:
@@ -959,6 +967,24 @@ def set_attr(self):
959967
""" set the kind for this colummn """
960968
setattr(self.attrs, self.kind_attr, self.kind)
961969

970+
class GenericIndexCol(IndexCol):
971+
""" an index which is not represented in the data of the table """
972+
973+
@property
974+
def is_indexed(self):
975+
return False
976+
977+
def convert(self, values, nan_rep):
978+
""" set the values from this selection: take = take ownership """
979+
980+
self.values = Int64Index(np.arange(self.table.nrows))
981+
return self
982+
983+
def get_attr(self):
984+
pass
985+
986+
def set_attr(self):
987+
pass
962988

963989
class DataCol(IndexCol):
964990
""" a data holding column, by definition this is not indexable
@@ -1194,6 +1220,12 @@ def get_atom_data(self, block):
11941220
def get_atom_datetime64(self, block):
11951221
return _tables().Int64Col()
11961222

1223+
class GenericDataIndexableCol(DataIndexableCol):
1224+
""" represent a generic pytables data column """
1225+
1226+
def get_attr(self):
1227+
pass
1228+
11971229
class Storer(object):
11981230
""" represent an object in my store
11991231
facilitate read/write of various types of objects
@@ -2632,6 +2664,47 @@ def read(self, where=None, columns=None, **kwargs):
26322664
return df
26332665

26342666

2667+
class GenericTable(AppendableFrameTable):
2668+
""" a table that read/writes the generic pytables table format """
2669+
pandas_kind = 'frame_table'
2670+
table_type = 'generic_table'
2671+
ndim = 2
2672+
obj_type = DataFrame
2673+
2674+
@property
2675+
def pandas_type(self):
2676+
return self.pandas_kind
2677+
2678+
def get_attrs(self):
2679+
""" retrieve our attributes """
2680+
self.non_index_axes = []
2681+
self.nan_rep = None
2682+
self.levels = []
2683+
t = self.table
2684+
self.index_axes = [ a.infer(t) for a in self.indexables if a.is_an_indexable ]
2685+
self.values_axes = [ a.infer(t) for a in self.indexables if not a.is_an_indexable ]
2686+
self.data_columns = [ a.name for a in self.values_axes ]
2687+
2688+
@property
2689+
def indexables(self):
2690+
""" create the indexables from the table description """
2691+
if self._indexables is None:
2692+
2693+
d = self.description
2694+
2695+
# the index columns is just a simple index
2696+
self._indexables = [ GenericIndexCol(name='index',axis=0) ]
2697+
2698+
for i, n in enumerate(d._v_names):
2699+
2700+
dc = GenericDataIndexableCol(name = n, pos=i, values = [ n ], version = self.version)
2701+
self._indexables.append(dc)
2702+
2703+
return self._indexables
2704+
2705+
def write(self, **kwargs):
2706+
raise NotImplementedError("cannot write on an generic table")
2707+
26352708
class AppendableMultiFrameTable(AppendableFrameTable):
26362709
""" a frame with a multi-index """
26372710
table_type = 'appendable_multiframe'

pandas/io/tests/pytables_native.h5

12 KB
Binary file not shown.

pandas/io/tests/test_pytables.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,13 @@ def _check_roundtrip_table(self, obj, comparator, compression=False):
17451745
store.close()
17461746
os.remove(self.scratchpath)
17471747

1748+
def test_pytables_native_read(self):
1749+
pth = curpath()
1750+
store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r')
1751+
d = store['detector']
1752+
str(store)
1753+
store.close()
1754+
17481755
def test_legacy_read(self):
17491756
pth = curpath()
17501757
store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')

0 commit comments

Comments
 (0)