diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 413dbb9cd0850..f026822bd9d2b 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -454,6 +454,7 @@ Other API changes
- :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
- Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
- Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
+- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`)
- Changed the ``name`` attribute of several holidays in
``USFederalHolidayCalendar`` to match `official federal holiday
names `_
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 5584730be90e8..b769383281880 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -42,7 +42,6 @@
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.dtypes.common import (
ensure_object,
- ensure_str,
is_bool_dtype,
is_categorical_dtype,
is_dict_like,
@@ -395,16 +394,6 @@ def extract(r):
for single_ic in sorted(ic):
names.insert(single_ic, single_ic)
- # If we find unnamed columns all in a single
- # level, then our header was too long.
- for n in range(len(columns[0])):
- if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
- header = ",".join([str(x) for x in self.header])
- raise ParserError(
- f"Passed header=[{header}] are too many rows "
- "for this multi_index of columns"
- )
-
# Clean the column names (if we have an index_col).
if len(ic):
col_names = [
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index b0742f5b41a92..3fc23525df89e 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -557,26 +557,21 @@ def test_multi_index_unnamed(all_parsers, index_col, columns):
else:
data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n"
+ result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
+ exp_columns = []
+
if columns is None:
- msg = (
- r"Passed header=\[0,1\] are too "
- r"many rows for this multi_index of columns"
- )
- with pytest.raises(ParserError, match=msg):
- parser.read_csv(StringIO(data), header=header, index_col=index_col)
- else:
- result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
- exp_columns = []
+ columns = ["", "", ""]
- for i, col in enumerate(columns):
- if not col: # Unnamed.
- col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
+ for i, col in enumerate(columns):
+ if not col: # Unnamed.
+ col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
- exp_columns.append(col)
+ exp_columns.append(col)
- columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
- expected = DataFrame([[2, 3], [4, 5]], columns=columns)
- tm.assert_frame_equal(result, expected)
+ columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
+ expected = DataFrame([[2, 3], [4, 5]], columns=columns)
+ tm.assert_frame_equal(result, expected)
@skip_pyarrow
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index d4b78d8371ede..3aac7e95e6591 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -14,7 +14,6 @@
import pytest
from pandas.compat import is_platform_windows
-from pandas.errors import ParserError
import pandas.util._test_decorators as td
from pandas import (
@@ -918,13 +917,8 @@ def test_wikipedia_states_multiindex(self, datapath):
assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04)
def test_parser_error_on_empty_header_row(self):
- msg = (
- r"Passed header=\[0,1\] are too many "
- r"rows for this multi_index of columns"
- )
- with pytest.raises(ParserError, match=msg):
- self.read_html(
- """
+ result = self.read_html(
+ """
| |
@@ -935,8 +929,15 @@ def test_parser_error_on_empty_header_row(self):
""",
- header=[0, 1],
- )
+ header=[0, 1],
+ )
+ expected = DataFrame(
+ [["a", "b"]],
+ columns=MultiIndex.from_tuples(
+ [("Unnamed: 0_level_0", "A"), ("Unnamed: 1_level_0", "B")]
+ ),
+ )
+ tm.assert_frame_equal(result[0], expected)
def test_decimal_rows(self):
# GH 12907