diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 60103024909a0..2215062a09a01 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -344,3 +344,5 @@ Bug Fixes - Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue `12723`) + +- Bug in ``pd.read_html`` when using bs4 flavor and parsing table with a header and only one column (:issue `9178`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 90bbeb161442f..e350a40bfa805 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -356,14 +356,16 @@ def _parse_raw_thead(self, table): res = [] if thead: res = lmap(self._text_getter, self._parse_th(thead[0])) - return np.array(res).squeeze() if res and len(res) == 1 else res + return np.atleast_1d( + np.array(res).squeeze()) if res and len(res) == 1 else res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) res = [] if tfoot: res = lmap(self._text_getter, self._parse_td(tfoot[0])) - return np.array(res).squeeze() if res and len(res) == 1 else res + return np.atleast_1d( + np.array(res).squeeze()) if res and len(res) == 1 else res def _parse_raw_tbody(self, table): tbody = self._parse_tbody(table) diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index cb625a26e40f9..21d0748fb6aba 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -416,6 +416,31 @@ def test_empty_tables(self): res2 = self.read_html(StringIO(data2)) assert_framelist_equal(res1, res2) + def test_header_and_one_column(self): + """ + Don't fail with bs4 when there is a header and only one column + as described in issue #9178 + """ + data = StringIO(''' + + + + + + + + + + + + +
Header
first
+ + ''') + expected = DataFrame(data={'Header': 'first'}, index=[0]) + result = self.read_html(data)[0] + tm.assert_frame_equal(result, expected) + def test_tfoot_read(self): """ Make sure that read_html reads tfoot, containing td or th.