From 59ebf0a186500b94b6e3b2a6225b92a914ffd6af Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sun, 28 Nov 2021 22:35:03 +0100
Subject: [PATCH 1/4] Deprecate raising in read_csv when header row contains
 only empty cells

---
 doc/source/whatsnew/v1.4.0.rst        | 1 +
 pandas/io/parsers/base_parser.py      | 7 +++++++
 pandas/tests/io/parser/test_header.py | 7 +++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 50156d4565bbd..43fdc68e384f8 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -470,6 +470,7 @@ Other Deprecations
 - Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
 - Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
 - Deprecated passing ``skipna=None`` for :meth:`DataFrame.mad` and :meth:`Series.mad`, pass ``skipna=True`` instead (:issue:`44580`)
+- Deprecated raising an error when header row contains only empty cells in :func:`read_csv` (:issue:`13054`)
 - Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`)
 - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
 -
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 4f5ba3460a3c8..226cfe0c455d1 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -381,6 +381,13 @@ def extract(r):
         for n in range(len(columns[0])):
             if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
                 header = ",".join([str(x) for x in self.header])
+                warnings.warn(
+                    f"The passed header=[{header}] has at least one line without data. "
+                    "This will return a MultiIndex in the future where at least one "
+                    "level consists of only Unnamed: entries.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
                 raise ParserError(
                     f"Passed header=[{header}] are too many rows "
                     "for this multi_index of columns"
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index dc3792989357a..637bee84ad599 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -557,8 +557,11 @@ def test_multi_index_unnamed(all_parsers, index_col, columns):
             r"Passed header=\[0,1\] are too "
             r"many rows for this multi_index of columns"
         )
-        with pytest.raises(ParserError, match=msg):
-            parser.read_csv(StringIO(data), header=header, index_col=index_col)
+        with tm.assert_produces_warning(
+            FutureWarning, match="consists of only Unnamed", check_stacklevel=False
+        ):
+            with pytest.raises(ParserError, match=msg):
+                parser.read_csv(StringIO(data), header=header, index_col=index_col)
     else:
         result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
         exp_columns = []

From 4ce71f00f27d5692a64dd31080646bcfc5d127de Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 15 Dec 2021 11:47:32 +0100
Subject: [PATCH 2/4] Stop raising immediately

---
 doc/source/whatsnew/v1.4.0.rst        |  2 +-
 pandas/io/parsers/base_parser.py      | 18 ----------------
 pandas/tests/io/parser/test_header.py | 30 ++++++++++-----------------
 3 files changed, 12 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 43fdc68e384f8..9d42fef70c9c7 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -402,6 +402,7 @@ Other API changes
 - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
 - Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
 - Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
+- :func:`read_csv` no longer raising an error when one of the header rows consists only of ``Unnamed: `` columns (:issue:`13054`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -470,7 +471,6 @@ Other Deprecations
 - Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
 - Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
 - Deprecated passing ``skipna=None`` for :meth:`DataFrame.mad` and :meth:`Series.mad`, pass ``skipna=True`` instead (:issue:`44580`)
-- Deprecated raising an error when header row contains only empty cells in :func:`read_csv` (:issue:`13054`)
 - Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`)
 - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
 -
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 226cfe0c455d1..4cfc8ebee4d1a 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -39,7 +39,6 @@
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
     ensure_object,
-    ensure_str,
     is_bool_dtype,
     is_categorical_dtype,
     is_dict_like,
@@ -376,23 +375,6 @@ def extract(r):
         columns = list(zip(*(extract(r) for r in header)))
         names = ic + columns
 
-        # If we find unnamed columns all in a single
-        # level, then our header was too long.
-        for n in range(len(columns[0])):
-            if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
-                header = ",".join([str(x) for x in self.header])
-                warnings.warn(
-                    f"The passed header=[{header}] has at least one line without data. "
-                    "This will return a MultiIndex in the future where at least one "
-                    "level consists of only Unnamed: entries.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                raise ParserError(
-                    f"Passed header=[{header}] are too many rows "
-                    "for this multi_index of columns"
-                )
-
         # Clean the column names (if we have an index_col).
         if len(ic):
             col_names = [
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index 637bee84ad599..dd3bd0ee10ffa 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -552,29 +552,21 @@ def test_multi_index_unnamed(all_parsers, index_col, columns):
     else:
         data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n"
 
+    result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
+    exp_columns = []
+
     if columns is None:
-        msg = (
-            r"Passed header=\[0,1\] are too "
-            r"many rows for this multi_index of columns"
-        )
-        with tm.assert_produces_warning(
-            FutureWarning, match="consists of only Unnamed", check_stacklevel=False
-        ):
-            with pytest.raises(ParserError, match=msg):
-                parser.read_csv(StringIO(data), header=header, index_col=index_col)
-    else:
-        result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
-        exp_columns = []
+        columns = ["", "", ""]
 
-        for i, col in enumerate(columns):
-            if not col:  # Unnamed.
-                col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
+    for i, col in enumerate(columns):
+        if not col:  # Unnamed.
+            col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
 
-            exp_columns.append(col)
+        exp_columns.append(col)
 
-        columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
-        expected = DataFrame([[2, 3], [4, 5]], columns=columns)
-        tm.assert_frame_equal(result, expected)
+    columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
+    expected = DataFrame([[2, 3], [4, 5]], columns=columns)
+    tm.assert_frame_equal(result, expected)
 
 
 @skip_pyarrow

From 3fa586b40ebd04f7761ad404b959df1908eada5e Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 15 Dec 2021 12:31:17 +0100
Subject: [PATCH 3/4] Adjust whatsnew

---
 doc/source/whatsnew/v1.4.0.rst |  2 +-
 pandas/tests/io/test_html.py   | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 6da9829531b93..e0a7d28408d95 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -454,7 +454,7 @@ Other API changes
 - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
 - Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
 - Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
-- :func:`read_csv` no longer raising an error when one of the header rows consists only of ``Unnamed: `` columns (:issue:`13054`)
+- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed: `` columns (:issue:`13054`)
 - Changed the ``name`` attribute of several holidays in
   ``USFederalHolidayCalendar`` to match `official federal holiday
   names <https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/>`_
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index d4b78d8371ede..3aac7e95e6591 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -14,7 +14,6 @@
 import pytest
 
 from pandas.compat import is_platform_windows
-from pandas.errors import ParserError
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -918,13 +917,8 @@ def test_wikipedia_states_multiindex(self, datapath):
         assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04)
 
     def test_parser_error_on_empty_header_row(self):
-        msg = (
-            r"Passed header=\[0,1\] are too many "
-            r"rows for this multi_index of columns"
-        )
-        with pytest.raises(ParserError, match=msg):
-            self.read_html(
-                """
+        result = self.read_html(
+            """
                 <table>
                     <thead>
                         <tr><th></th><th></tr>
@@ -935,8 +929,15 @@ def test_parser_error_on_empty_header_row(self):
                     </tbody>
                 </table>
             """,
-                header=[0, 1],
-            )
+            header=[0, 1],
+        )
+        expected = DataFrame(
+            [["a", "b"]],
+            columns=MultiIndex.from_tuples(
+                [("Unnamed: 0_level_0", "A"), ("Unnamed: 1_level_0", "B")]
+            ),
+        )
+        tm.assert_frame_equal(result[0], expected)
 
     def test_decimal_rows(self):
         # GH 12907

From 4d80ca35bad48c77998fbed80e7752664bcb378e Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 18 Dec 2021 21:42:52 +0100
Subject: [PATCH 4/4] Fix whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 8ae5665c4283f..e189774161736 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -454,7 +454,7 @@ Other API changes
 - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
 - Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
 - Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
-- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed: `` columns (:issue:`13054`)
+- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`)
 - Changed the ``name`` attribute of several holidays in
   ``USFederalHolidayCalendar`` to match `official federal holiday
   names <https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/>`_