From dcfd647a9be0a128013de8b4cecac52e692b7663 Mon Sep 17 00:00:00 2001 From: John Evans Date: Wed, 28 Aug 2019 20:17:38 -0400 Subject: [PATCH 1/2] Fix read of py27 pytables tz attribute, gh#26443 When created by python 2.7, the "tz" attribute will be created with CSET H5T_CSET_ASCII instead of H5T_CSET_UTF8, therefore it is read as bytes when string is expected. --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/io/pytables.py | 7 ++++++- pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes pandas/tests/io/pytables/test_pytables.py | 15 +++++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5 diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 050a26cc86d42..ff74965812c8c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -97,6 +97,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in :meth:`HDFStore.__getitem__` incorrectly reading tz attribute created in Py2 (:issue:`26443`) - diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fbe413f820c90..1ff3400323e54 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None): kwargs["freq"] = node._v_attrs["freq"] if "tz" in node._v_attrs: - kwargs["tz"] = node._v_attrs["tz"] + if isinstance(node._v_attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = node._v_attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = node._v_attrs["tz"] if kind in ("date", "datetime"): index = factory( diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5 new file mode 100644 index 0000000000000000000000000000000000000000..45aa64324530f943b48fa5c63390392af1110c6b GIT binary patch literal 7168 zcmeHL%}*0S6rU|$3s$KgL?j-FryfiNg2safsuUrqZPQjF2UAwM;0C&_y9GgG;!*z- zkNycBIdb>n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^ zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb% zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cHh4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY` zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZwdvY+W| zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6 zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc);hzIeH3Ha-vc6q4TI}Q6D zT{k-JCp7Lj+=Td`d#Q${yzv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index d67f2c3b7bd66..8dca807afec64 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -5446,3 +5446,18 @@ def test_read_with_where_tz_aware_index(self): store.append(key, expected, format="table", append=True) result = pd.read_hdf(path, key, where="DATE > 20151130") assert_frame_equal(result, expected) + + def test_py2_created_with_datetimez(self, datapath): + """ + The test HDF5 file was created in Python2, but could not be read in + Python3. + + GH26443 + """ + index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + assert_frame_equal(result, expected) From 1820bcc4ef8d09b80be2b8beba5765498c1ec807 Mon Sep 17 00:00:00 2001 From: John Evans Date: Wed, 28 Aug 2019 20:17:38 -0400 Subject: [PATCH 2/2] Fix read of py27 pytables tz attribute, gh#26443 When created by python 2.7, the "tz" attribute will be created with CSET H5T_CSET_ASCII instead of H5T_CSET_UTF8, therefore it is read as bytes when string is expected. --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/io/pytables.py | 7 ++++++- pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes pandas/tests/io/pytables/test_pytables.py | 13 +++++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5 diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 050a26cc86d42..c146b2782b028 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -97,6 +97,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) - diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fbe413f820c90..1ff3400323e54 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None): kwargs["freq"] = node._v_attrs["freq"] if "tz" in node._v_attrs: - kwargs["tz"] = node._v_attrs["tz"] + if isinstance(node._v_attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = node._v_attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = node._v_attrs["tz"] if kind in ("date", "datetime"): index = factory( diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5 new file mode 100644 index 0000000000000000000000000000000000000000..45aa64324530f943b48fa5c63390392af1110c6b GIT binary patch literal 7168 zcmeHL%}*0S6rU|$3s$KgL?j-FryfiNg2safsuUrqZPQjF2UAwM;0C&_y9GgG;!*z- zkNycBIdb>n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^ zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb% zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cHh4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY` zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZwdvY+W| zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6 zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc);hzIeH3Ha-vc6q4TI}Q6D zT{k-JCp7Lj+=Td`d#Q${yzv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index d67f2c3b7bd66..9a241f0f14744 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -5446,3 +5446,16 @@ def test_read_with_where_tz_aware_index(self): store.append(key, expected, format="table", append=True) result = pd.read_hdf(path, key, where="DATE > 20151130") assert_frame_equal(result, expected) + + def test_py2_created_with_datetimez(self, datapath): + # The test HDF5 file was created in Python 2, but could not be read in + # Python 3. + # + # GH26443 + index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + assert_frame_equal(result, expected)