diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index 0037d4a4410c3..e755ce94812bb 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -83,6 +83,11 @@ HDFStore: PyTables (HDF5) HDFStore.groups HDFStore.walk +.. warning:: + + One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, + but the type of the subclass is lost upon storing. + Feather ~~~~~~~ .. autosummary:: diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 3545dd8a89159..d12fefbab8797 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -241,7 +241,8 @@ I/O - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) - Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) -- +- Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply + for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`). Period ^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9b0c3caa0b407..fbf502ffa280c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2505,6 +2505,11 @@ def to_hdf( In order to add another DataFrame or Series to an existing HDF file please use append mode and a different a key. + .. warning:: + + One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, + but the type of the subclass is lost upon storing. + For more information see the :ref:`user guide `. Parameters diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3fe251d300856..c22f6806e932e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1646,8 +1646,10 @@ def error(t): "nor a value are passed" ) else: - _TYPE_MAP = {Series: "series", DataFrame: "frame"} - pt = _TYPE_MAP[type(value)] + if isinstance(value, Series): + pt = "series" + else: + pt = "frame" # we are actually a table if format == "table": diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py new file mode 100644 index 0000000000000..196f729cd6eb2 --- /dev/null +++ b/pandas/tests/io/pytables/test_subclass.py @@ -0,0 +1,44 @@ +import numpy as np + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.tests.io.pytables.common import ensure_clean_path + +from pandas.io.pytables import HDFStore, read_hdf + + +class TestHDFStoreSubclass: + # GH 33748 + def test_supported_for_subclass_dataframe(self): + data = {"a": [1, 2], "b": [3, 4]} + sdf = tm.SubclassedDataFrame(data, dtype=np.intp) + + expected = DataFrame(data, dtype=np.intp) + + with ensure_clean_path("temp.h5") as path: + sdf.to_hdf(path, "df") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + with ensure_clean_path("temp.h5") as path: + with HDFStore(path) as store: + store.put("df", sdf) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + def test_supported_for_subclass_series(self): + data = [1, 2, 3] + sser = tm.SubclassedSeries(data, dtype=np.intp) + + expected = Series(data, dtype=np.intp) + + with ensure_clean_path("temp.h5") as path: + sser.to_hdf(path, "ser") + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected) + + with ensure_clean_path("temp.h5") as path: + with HDFStore(path) as store: + store.put("ser", sser) + result = read_hdf(path, "ser") + tm.assert_series_equal(result, expected)