diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0682e179a7640..8c745d01aba71 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -576,6 +576,8 @@ I/O - Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) - Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`) - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) +- :func:`pandas.read_hdf` has a more explicit error message when loading an + unsupported HDF file (:issue:`9539`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3dd87ae6ed758..425118694fa02 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -387,7 +387,10 @@ def read_hdf( if key is None: groups = store.groups() if len(groups) == 0: - raise ValueError("No dataset in HDF5 file.") + raise ValueError( + "Dataset(s) incompatible with Pandas data types, " + "not table, or no datasets found in HDF5 file." + ) candidate_only_group = groups[0] # For the HDF file to have only one dataset, all other groups diff --git a/pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 b/pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 new file mode 100644 index 0000000000000..50fbee0f5018b Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/incompatible_dataset.h5 differ diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 536f4aa760b9c..6b6ae8e5f0ca2 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4776,3 +4776,14 @@ def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path): with ensure_clean_path(setup_path) as path: with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): df.to_hdf(path, "df") + + def test_unsuppored_hdf_file_error(self, datapath): + # GH 9539 + data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5") + message = ( + r"Dataset\(s\) incompatible with Pandas data types, " + "not table, or no datasets found in HDF5 file." + ) + + with pytest.raises(ValueError, match=message): + pd.read_hdf(data_path)