diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 76f6e864a174f..fa95ad06bd7ca 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -863,6 +863,7 @@ I/O - Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`) - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`) +- Bug in :func:`read_excel` raising uncontrolled ``IndexError`` when ``header`` references non-existing rows (:issue:`43143`) - Bug in :func:`read_html` where elements surrounding ``
`` were joined without a space between them (:issue:`29528`) - Bug in :func:`read_csv` when data is longer than header leading to issues with callables in ``usecols`` expecting strings (:issue:`46997`) - Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d20f347e54d6b..24b881bda4805 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -774,6 +774,12 @@ def parse( assert isinstance(skiprows, int) row += skiprows + if row > len(data) - 1: + raise ValueError( + f"header index {row} exceeds maximum index " + f"{len(data) - 1} of data.", + ) + data[row], control_row = fill_mi_header(data[row], control_row) if index_col is not None: diff --git a/pandas/tests/io/data/excel/df_header_oob.xlsx b/pandas/tests/io/data/excel/df_header_oob.xlsx new file mode 100644 index 0000000000000..1e26091cd2ace Binary files /dev/null and b/pandas/tests/io/data/excel/df_header_oob.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f6c60b11cc8ff..4ca34bec0a7d9 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1556,6 +1556,12 @@ def test_excel_read_binary_via_read_excel(self, read_ext, engine): expected = pd.read_excel("test1" + read_ext, engine=engine) tm.assert_frame_equal(result, expected) + def test_read_excel_header_index_out_of_range(self, engine): + # GH#43143 + with open("df_header_oob.xlsx", "rb") as f: + with pytest.raises(ValueError, match="exceeds maximum"): + pd.read_excel(f, header=[0, 1]) + @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"]) def test_header_with_index_col(self, filename): # GH 33476