diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 413dbb9cd0850..f026822bd9d2b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -454,6 +454,7 @@ Other API changes - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`) - Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`) - Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`) +- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`) - Changed the ``name`` attribute of several holidays in ``USFederalHolidayCalendar`` to match `official federal holiday names `_ diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 5584730be90e8..b769383281880 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -42,7 +42,6 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( ensure_object, - ensure_str, is_bool_dtype, is_categorical_dtype, is_dict_like, @@ -395,16 +394,6 @@ def extract(r): for single_ic in sorted(ic): names.insert(single_ic, single_ic) - # If we find unnamed columns all in a single - # level, then our header was too long. - for n in range(len(columns[0])): - if all(ensure_str(col[n]) in self.unnamed_cols for col in columns): - header = ",".join([str(x) for x in self.header]) - raise ParserError( - f"Passed header=[{header}] are too many rows " - "for this multi_index of columns" - ) - # Clean the column names (if we have an index_col). if len(ic): col_names = [ diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index b0742f5b41a92..3fc23525df89e 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -557,26 +557,21 @@ def test_multi_index_unnamed(all_parsers, index_col, columns): else: data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n" + result = parser.read_csv(StringIO(data), header=header, index_col=index_col) + exp_columns = [] + if columns is None: - msg = ( - r"Passed header=\[0,1\] are too " - r"many rows for this multi_index of columns" - ) - with pytest.raises(ParserError, match=msg): - parser.read_csv(StringIO(data), header=header, index_col=index_col) - else: - result = parser.read_csv(StringIO(data), header=header, index_col=index_col) - exp_columns = [] + columns = ["", "", ""] - for i, col in enumerate(columns): - if not col: # Unnamed. - col = f"Unnamed: {i if index_col is None else i + 1}_level_0" + for i, col in enumerate(columns): + if not col: # Unnamed. + col = f"Unnamed: {i if index_col is None else i + 1}_level_0" - exp_columns.append(col) + exp_columns.append(col) - columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) - expected = DataFrame([[2, 3], [4, 5]], columns=columns) - tm.assert_frame_equal(result, expected) + columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) + expected = DataFrame([[2, 3], [4, 5]], columns=columns) + tm.assert_frame_equal(result, expected) @skip_pyarrow diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index d4b78d8371ede..3aac7e95e6591 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -14,7 +14,6 @@ import pytest from pandas.compat import is_platform_windows -from pandas.errors import ParserError import pandas.util._test_decorators as td from pandas import ( @@ -918,13 +917,8 @@ def test_wikipedia_states_multiindex(self, datapath): assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04) def test_parser_error_on_empty_header_row(self): - msg = ( - r"Passed header=\[0,1\] are too many " - r"rows for this multi_index of columns" - ) - with pytest.raises(ParserError, match=msg): - self.read_html( - """ + result = self.read_html( + """ @@ -935,8 +929,15 @@ def test_parser_error_on_empty_header_row(self):
""", - header=[0, 1], - ) + header=[0, 1], + ) + expected = DataFrame( + [["a", "b"]], + columns=MultiIndex.from_tuples( + [("Unnamed: 0_level_0", "A"), ("Unnamed: 1_level_0", "B")] + ), + ) + tm.assert_frame_equal(result[0], expected) def test_decimal_rows(self): # GH 12907