diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 64f0cb3f2e26d..0ac26ca65f468 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -184,6 +184,7 @@ I/O - Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for ``coerce_timestamps``; following pyarrow's default allows writing nanosecond timestamps with ``version="2.0"`` (:issue:`31652`). +- Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c1e12887b0150..0e2b909d5cdc7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2722,7 +2722,7 @@ def read_array( if isinstance(node, tables.VLArray): ret = node[0][start:stop] else: - dtype = getattr(attrs, "value_type", None) + dtype = _ensure_decoded(getattr(attrs, "value_type", None)) shape = getattr(attrs, "shape", None) if shape is not None: diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_datetime_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_datetime_py2.h5 new file mode 100644 index 0000000000000..18cfae15a3a78 Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_datetime_py2.h5 differ diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f56d042093886..547de39eec5e0 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -4074,6 +4074,21 @@ def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path): ) tm.assert_frame_equal(expected, result) + def test_legacy_table_fixed_format_read_datetime_py2(self, datapath, setup_path): + # GH 31750 + # legacy table with fixed format and datetime64 column written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"), + mode="r", + ) as store: + result = store.select("df") + expected = pd.DataFrame( + [[pd.Timestamp("2020-02-06T18:00")]], + columns=["A"], + index=pd.Index(["date"]), + ) + tm.assert_frame_equal(expected, result) + def test_legacy_table_read_py2(self, datapath, setup_path): # issue: 24925 # legacy table written in Python 2