Skip to content

can't select a specific column from a HDFStore table with a MultiIndex DataFrame #6169

Closed
@glyg

Description

@glyg

I'm running in what seems to be a bug.
I'm using pandas version '0.13.0rc1-29-ga0a527b' from github, python 3.3 on a linux Mint 15 64 bits.

Here's a minimal example that fails:

import numpy as np
import pandas as pd


index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                              ['one', 'two', 'three']],
                      labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                      names=['foo_name', 'bar_name'])


df_mi = pd.DataFrame(np.random.randn(10, 3), index=index,
                     columns=['A', 'B', 'C'])

with pd.get_store('minimal_io.h5') as store:
    store.put('df_mi', df_mi, format='table')

with pd.get_store('minimal_io.h5') as store:
    ixs = store.select('df_mi', "columns=['A']")

And here is the error message:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-32-005cf4e724e0> in <module>()
     17 
     18 with pd.get_store('minimal_io.h5') as store:
---> 19     ixs = store.select('df_mi', "columns=['A']")

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/io/pytables.py in select(self, key, where, start, stop, columns, iterator, chunksize, auto_close, **kwargs)
    622 
    623         return TableIterator(self, func, nrows=s.nrows, start=start, stop=stop,
--> 624                              auto_close=auto_close).get_values()
    625 
    626     def select_as_coordinates(

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/io/pytables.py in get_values(self)
   1252 
   1253     def get_values(self):
-> 1254         results = self.func(self.start, self.stop)
   1255         self.close()
   1256         return results

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/io/pytables.py in func(_start, _stop)
    611         def func(_start, _stop):
    612             return s.read(where=where, start=_start, stop=_stop,
--> 613                           columns=columns, **kwargs)
    614 
    615         if iterator or chunksize is not None:

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/io/pytables.py in read(self, columns, **kwargs)
   3796         df = super(AppendableMultiFrameTable, self).read(
   3797             columns=columns, **kwargs)
-> 3798         df = df.set_index(self.levels)
   3799 
   3800         # remove names for 'level_%d'

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   2327                 names.append(None)
   2328             else:
-> 2329                 level = frame[col].values
   2330                 names.append(col)
   2331                 if drop:

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/frame.py in __getitem__(self, key)
   1626             return self._getitem_multilevel(key)
   1627         else:
-> 1628             return self._getitem_column(key)
   1629 
   1630     def _getitem_column(self, key):

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/frame.py in _getitem_column(self, key)
   1633         # get column
   1634         if self.columns.is_unique:
-> 1635             return self._get_item_cache(key)
   1636 
   1637         # duplicate columns & possible reduce dimensionaility

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/generic.py in _get_item_cache(self, item)
    976         res = cache.get(item)
    977         if res is None:
--> 978             values = self._data.get(item)
    979             res = self._box_item_values(item, values)
    980             cache[item] = res

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/internals.py in get(self, item)
   2738                 return self.get_for_nan_indexer(indexer)
   2739 
-> 2740             _, block = self._find_block(item)
   2741             return block.get(item)
   2742         else:

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/internals.py in _find_block(self, item)
   3049 
   3050     def _find_block(self, item):
-> 3051         self._check_have(item)
   3052         for i, block in enumerate(self.blocks):
   3053             if item in block:

/home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/internals.py in _check_have(self, item)
   3056     def _check_have(self, item):
   3057         if item not in self.items:
-> 3058             raise KeyError('no item named %s' % com.pprint_thing(item))
   3059 
   3060     def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,

KeyError: 'no item named foo_name'

> /home/guillaume/python3/lib/python3.3/site-packages/pandas-0.13.0rc1_29_ga0a527b-py3.3-linux-x86_64.egg/pandas/core/internals.py(3058)_check_have()
   3057         if item not in self.items:
-> 3058             raise KeyError('no item named %s' % com.pprint_thing(item))
   3059

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions