From 02f32cc78aaec4e2c01b0b9355cf8111e1692b40 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 11 Jul 2013 09:00:40 -0400 Subject: [PATCH] DOC: more prominent HDFStore store docs about storer/table formats --- doc/source/io.rst | 39 +++++++++++++++++++++++++++++---------- doc/source/release.rst | 2 +- pandas/io/pytables.py | 6 ++++-- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5e5943f066c3e..27488f3c5916d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1651,11 +1651,6 @@ Closing a Store, Context Manager import os os.remove('store.h5') - -These stores are **not** appendable once written (though you can simply -remove them and rewrite). Nor are they **queryable**; they must be -retrieved in their entirety. - Read/Write API ~~~~~~~~~~~~~~ @@ -1674,10 +1669,33 @@ similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) os.remove('store_tl.h5') +.. _io.hdf5-storer: + +Storer Format +~~~~~~~~~~~~~ + +The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called +the ``storer`` format. These types of stores are are **not** appendable once written (though you can simply +remove them and rewrite). Nor are they **queryable**; they must be +retrieved in their entirety. These offer very fast writing and slightly faster reading than ``table`` stores. + +.. warning:: + + A ``storer`` format will raise a ``TypeError`` if you try to retrieve using a ``where`` . + + .. code-block:: python + + DataFrame(randn(10,2)).to_hdf('test_storer.h5','df') + + pd.read_hdf('test_storer.h5','df',where='index>5') + TypeError: cannot pass a where specification when reading a non-table + this store must be selected in its entirety + + .. _io.hdf5-table: -Storing in Table format -~~~~~~~~~~~~~~~~~~~~~~~ +Table Format +~~~~~~~~~~~~ ``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` format. Conceptually a ``table`` is shaped very much like a DataFrame, @@ -1708,6 +1726,10 @@ supported. # the type of stored data store.root.df._v_attrs.pandas_type +.. note:: + + You can also create a ``table`` by passing ``table=True`` to a ``put`` operation. + .. _io.hdf5-keys: Hierarchical Keys @@ -2121,9 +2143,6 @@ Notes & Caveats in a string, or a ``NaT`` in a datetime-like column counts as having a value), then those rows **WILL BE DROPPED IMPLICITLY**. This limitation *may* be addressed in the future. - - You can not append/select/delete to a non-table (table creation is - determined on the first append, or by passing ``table=True`` in a - put operation) - ``HDFStore`` is **not-threadsafe for writing**. The underlying ``PyTables`` only supports concurrent reads (via threading or processes). If you need reading and writing *at the same time*, you diff --git a/doc/source/release.rst b/doc/source/release.rst index c40e1804fe3c0..2379dc29fbce7 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -113,7 +113,7 @@ pandas 0.12 - When removing an object, ``remove(key)`` raises ``KeyError`` if the key is not a valid store object. - raise a ``TypeError`` on passing ``where`` or ``columns`` - to select with a Storer; these are invalid parameters at this time + to select with a Storer; these are invalid parameters at this time (:issue:`4189`) - can now specify an ``encoding`` option to ``append/put`` to enable alternate encodings (:issue:`3750`) - enable support for ``iterator/chunksize`` with ``read_hdf`` diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 43b3197667d2b..d6ad6aa0c351a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1746,9 +1746,11 @@ def f(values, freq=None, tz=None): def validate_read(self, kwargs): if kwargs.get('columns') is not None: - raise TypeError("cannot pass a column specification when reading a Storer") + raise TypeError("cannot pass a column specification when reading a non-table " + "this store must be selected in its entirety") if kwargs.get('where') is not None: - raise TypeError("cannot pass a where specification when reading a Storer") + raise TypeError("cannot pass a where specification when reading from a non-table " + "this store must be selected in its entirety") @property def is_exists(self):