Skip to content

min_itemsize not working on MultiIndex columns for Series, with format="table" #11412

Closed
@toobaz

Description

@toobaz

If I do

ddf = pd.DataFrame([['a', 'b', 1],
                    ['a', 'b', 2]],
                    columns=['A', 'B', 'C']).set_index(['A', 'B'])

and then

ddf['C'].to_hdf('/tmp/store.hdf', 'test',
          format="table",
          min_itemsize={'A' : 3})

I get the following:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-46-66f05c11146d> in <module>()
      1 ddf['C'].to_hdf('/tmp/store.hdf', 'test',
      2           format="table",
----> 3           min_itemsize={'A' : 3})

/usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in to_hdf(self, path_or_buf, key, **kwargs)
    936 
    937         from pandas.io import pytables
--> 938         return pytables.to_hdf(path_or_buf, key, self, **kwargs)
    939 
    940     def to_msgpack(self, path_or_buf=None, **kwargs):

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
    268         with HDFStore(path_or_buf, mode=mode, complevel=complevel,
    269                        complib=complib) as store:
--> 270             f(store)
    271     else:
    272         f(path_or_buf)

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in <lambda>(store)
    263         f = lambda store: store.append(key, value, **kwargs)
    264     else:
--> 265         f = lambda store: store.put(key, value, **kwargs)
    266 
    267     if isinstance(path_or_buf, string_types):

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in put(self, key, value, format, append, **kwargs)
    825             format = get_option("io.hdf.default_format") or 'fixed'
    826         kwargs = self._validate_format(format, kwargs)
--> 827         self._write_to_group(key, value, append=append, **kwargs)
    828 
    829     def remove(self, key, where=None, start=None, stop=None):

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
   1263 
   1264         # write the object
-> 1265         s.write(obj=value, append=append, complib=complib, **kwargs)
   1266 
   1267         if s.is_table and index:

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in write(self, obj, **kwargs)
   4104         cols.append(name)
   4105         obj.columns = cols
-> 4106         return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs)
   4107 
   4108 

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in write(self, obj, data_columns, **kwargs)
   4071             obj.columns = [name]
   4072         return super(AppendableSeriesTable, self).write(
-> 4073             obj=obj, data_columns=obj.columns, **kwargs)
   4074 
   4075     def read(self, columns=None, **kwargs):

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
   3769         self.create_axes(axes=axes, obj=obj, validate=append,
   3770                          min_itemsize=min_itemsize,
-> 3771                          **kwargs)
   3772 
   3773         for a in self.axes:

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
   3371             axis, axis_labels = self.non_index_axes[0]
   3372             data_columns = self.validate_data_columns(
-> 3373                 data_columns, min_itemsize)
   3374             if len(data_columns):
   3375                 mgr = block_obj.reindex_axis(

/usr/lib/python2.7/dist-packages/pandas/io/pytables.pyc in validate_data_columns(self, data_columns, min_itemsize)
   3247 
   3248             existing_data_columns = set(data_columns)
-> 3249             data_columns.extend([
   3250                 k for k in min_itemsize.keys()
   3251                 if k != 'values' and k not in existing_data_columns

AttributeError: 'Index' object has no attribute 'extend'

All goes smoothly instead if I don't specify "format=table", or if I don't specify the min_itemsize, or if I save as DataFrame (ddf[['C']]) rather than a as Series.

Tested with up to date pandas from git and pytables 3.2.2-1.

Metadata

Metadata

Assignees

No one assigned

    Labels

    IO HDF5read_hdf, HDFStore

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions