From dcfd647a9be0a128013de8b4cecac52e692b7663 Mon Sep 17 00:00:00 2001
From: John Evans <john.g.evans.ne@gmail.com>
Date: Wed, 28 Aug 2019 20:17:38 -0400
Subject: [PATCH 1/2] Fix read of py27 pytables tz attribute, gh#26443

When created by python 2.7, the "tz" attribute will be created with CSET
H5T_CSET_ASCII instead of H5T_CSET_UTF8, therefore it is read as bytes
when string is expected.
---
 doc/source/whatsnew/v1.0.0.rst             |   1 +
 pandas/io/pytables.py                      |   7 ++++++-
 pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes
 pandas/tests/io/pytables/test_pytables.py  |  15 +++++++++++++++
 4 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 050a26cc86d42..ff74965812c8c 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -97,6 +97,7 @@ Datetimelike
 ^^^^^^^^^^^^
 - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`)
 - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
+- Bug in :meth:`HDFStore.__getitem__` incorrectly reading tz attribute created in Py2 (:issue:`26443`)
 -
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index fbe413f820c90..1ff3400323e54 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None):
             kwargs["freq"] = node._v_attrs["freq"]
 
         if "tz" in node._v_attrs:
-            kwargs["tz"] = node._v_attrs["tz"]
+            if isinstance(node._v_attrs["tz"], bytes):
+                # created by python2
+                kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
+            else:
+                # created by python3
+                kwargs["tz"] = node._v_attrs["tz"]
 
         if kind in ("date", "datetime"):
             index = factory(
diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5
new file mode 100644
index 0000000000000000000000000000000000000000..45aa64324530f943b48fa5c63390392af1110c6b
GIT binary patch
literal 7168
zcmeHL%}*0S6rU|$3s$KgL?j-FryfiNg2safsuUrqZPQjF2UAwM;0C&_y9GgG;!*z-
zkNycBIdb>n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n
zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^
zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR
z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb%
zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cH<LvYsd6K~1aq_r0HyViA
zDWOuS7W;2egthJXfLx$}z!w|MmGz0*mbI~I`(fQ_+YP7&cprz#Ay1`HoHaBT!&RBR
z#3Xhy!SzyP9fwSHuAAh19vvYyfG>h4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY`
zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZ<?AP<fr8CS?qPbjYs
zSL)WC2K0bmC9KXMql}Z|EF5<U<)OW3^Y9$I4JRDP8_<6l=h<tU8<wZ*!_>wdvY+W|
zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx
z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6
zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc)<CR*ztsTb
z72q)p3=1j7ggUl%#&H8W)-u&xKD$^fQ(W1Z-*7h@Er?&ke8mNfi`$;%`mLsK*J(ls
zS7~mFl0}ZtFm8m1)l}<_iz6Y9?Tp|3wue!Al;5T6K-aik;^zJ9w4M?Xy;iwDgmL-g
z3F7i?j!uu$XOGT1!M8^53EkuK+Z>;hzIeH3Ha-vc<FxQ1`*sj7kg^h7>6q4TI}Q6D
zT{k-JCp7Lj+=Td`d#Q${y<J<`uzVl41n>zv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y
zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW
zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi
a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
index d67f2c3b7bd66..8dca807afec64 100644
--- a/pandas/tests/io/pytables/test_pytables.py
+++ b/pandas/tests/io/pytables/test_pytables.py
@@ -5446,3 +5446,18 @@ def test_read_with_where_tz_aware_index(self):
                 store.append(key, expected, format="table", append=True)
             result = pd.read_hdf(path, key, where="DATE > 20151130")
             assert_frame_equal(result, expected)
+
+    def test_py2_created_with_datetimez(self, datapath):
+        """
+        The test HDF5 file was created in Python2, but could not be read in
+        Python3.
+
+        GH26443
+        """
+        index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
+        expected = DataFrame({"data": 123}, index=index)
+        with ensure_clean_store(
+            datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
+        ) as store:
+            result = store["key"]
+            assert_frame_equal(result, expected)

From 1820bcc4ef8d09b80be2b8beba5765498c1ec807 Mon Sep 17 00:00:00 2001
From: John Evans <john.g.evans.ne@gmail.com>
Date: Wed, 28 Aug 2019 20:17:38 -0400
Subject: [PATCH 2/2] Fix read of py27 pytables tz attribute, gh#26443

When created by python 2.7, the "tz" attribute will be created with CSET
H5T_CSET_ASCII instead of H5T_CSET_UTF8, therefore it is read as bytes
when string is expected.
---
 doc/source/whatsnew/v1.0.0.rst             |   1 +
 pandas/io/pytables.py                      |   7 ++++++-
 pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes
 pandas/tests/io/pytables/test_pytables.py  |  13 +++++++++++++
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 050a26cc86d42..c146b2782b028 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -97,6 +97,7 @@ Datetimelike
 ^^^^^^^^^^^^
 - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`)
 - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
+- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
 -
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index fbe413f820c90..1ff3400323e54 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None):
             kwargs["freq"] = node._v_attrs["freq"]
 
         if "tz" in node._v_attrs:
-            kwargs["tz"] = node._v_attrs["tz"]
+            if isinstance(node._v_attrs["tz"], bytes):
+                # created by python2
+                kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
+            else:
+                # created by python3
+                kwargs["tz"] = node._v_attrs["tz"]
 
         if kind in ("date", "datetime"):
             index = factory(
diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5
new file mode 100644
index 0000000000000000000000000000000000000000..45aa64324530f943b48fa5c63390392af1110c6b
GIT binary patch
literal 7168
zcmeHL%}*0S6rU|$3s$KgL?j-FryfiNg2safsuUrqZPQjF2UAwM;0C&_y9GgG;!*z-
zkNycBIdb>n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n
zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^
zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR
z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb%
zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cH<LvYsd6K~1aq_r0HyViA
zDWOuS7W;2egthJXfLx$}z!w|MmGz0*mbI~I`(fQ_+YP7&cprz#Ay1`HoHaBT!&RBR
z#3Xhy!SzyP9fwSHuAAh19vvYyfG>h4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY`
zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZ<?AP<fr8CS?qPbjYs
zSL)WC2K0bmC9KXMql}Z|EF5<U<)OW3^Y9$I4JRDP8_<6l=h<tU8<wZ*!_>wdvY+W|
zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx
z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6
zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc)<CR*ztsTb
z72q)p3=1j7ggUl%#&H8W)-u&xKD$^fQ(W1Z-*7h@Er?&ke8mNfi`$;%`mLsK*J(ls
zS7~mFl0}ZtFm8m1)l}<_iz6Y9?Tp|3wue!Al;5T6K-aik;^zJ9w4M?Xy;iwDgmL-g
z3F7i?j!uu$XOGT1!M8^53EkuK+Z>;hzIeH3Ha-vc<FxQ1`*sj7kg^h7>6q4TI}Q6D
zT{k-JCp7Lj+=Td`d#Q${y<J<`uzVl41n>zv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y
zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW
zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi
a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
index d67f2c3b7bd66..9a241f0f14744 100644
--- a/pandas/tests/io/pytables/test_pytables.py
+++ b/pandas/tests/io/pytables/test_pytables.py
@@ -5446,3 +5446,16 @@ def test_read_with_where_tz_aware_index(self):
                 store.append(key, expected, format="table", append=True)
             result = pd.read_hdf(path, key, where="DATE > 20151130")
             assert_frame_equal(result, expected)
+
+    def test_py2_created_with_datetimez(self, datapath):
+        # The test HDF5 file was created in Python 2, but could not be read in
+        # Python 3.
+        #
+        # GH26443
+        index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
+        expected = DataFrame({"data": 123}, index=index)
+        with ensure_clean_store(
+            datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
+        ) as store:
+            result = store["key"]
+            assert_frame_equal(result, expected)