From c4bf8fb0ed7970abd078f482e69591f084a683cd Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Tue, 4 May 2021 15:55:02 -0500
Subject: [PATCH 01/17] Fix #41225.

---
 pandas/io/excel/_base.py | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index d26a991ba2820..9edbeaa8e1634 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1014,9 +1014,15 @@ def close(self):
         return content
 
 
-XLS_SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
-ZIP_SIGNATURE = b"PK\x03\x04"
-PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE))
+SIGNATURES = {
+    "biff2": b"\x09\x00\x04\x00",
+    "biff3": b"\x09\x02\x06\x00",
+    "biff4": b"\x09\x04\x06\x00",
+    "biff5": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
+    "biff8": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
+    "zip": b"PK\x03\x04",
+}
+PEEK_SIZE = max(map(len, SIGNATURES.values()))
 
 
 @doc(storage_options=_shared_docs["storage_options"])
@@ -1037,8 +1043,8 @@ def inspect_excel_format(
 
     Returns
     -------
-    str
-        Format of file.
+    str or None
+        Format of file (if it can be determined)
 
     Raises
     ------
@@ -1063,10 +1069,14 @@ def inspect_excel_format(
             peek = buf
         stream.seek(0)
 
-        if peek.startswith(XLS_SIGNATURE):
+        if any(
+            peek.startswith(signature)
+            for (file_format, signature) in SIGNATURES.items()
+            if file_format.startswith('biff')
+        ):
             return "xls"
-        elif not peek.startswith(ZIP_SIGNATURE):
-            raise ValueError("File is not a recognized excel file")
+        elif not peek.startswith(SIGNATURES["zip"]):
+            return None
 
         # ZipFile typing is overly-strict
         # https://github.com/python/typeshed/issues/4212
@@ -1174,8 +1184,12 @@ def __init__(
                 ext = inspect_excel_format(
                     content_or_path=path_or_buffer, storage_options=storage_options
                 )
+                if ext is None:
+                    raise ValueError(
+                        "Excel file format cannot be determined, you must specify "
+                        "an engine manually."
+                    )
 
-            # ext will always be valid, otherwise inspect_excel_format would raise
             engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
             if engine == "auto":
                 engine = get_default_engine(ext, mode="reader")
@@ -1190,12 +1204,13 @@ def __init__(
                         path_or_buffer, storage_options=storage_options
                     )
 
-            if ext != "xls" and xlrd_version >= "2":
+            # Pass through if ext is None, otherwise check if ext valid for xlrd
+            if ext and ext != "xls" and xlrd_version >= "2":
                 raise ValueError(
                     f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
                     f"only the xls format is supported. Install openpyxl instead."
                 )
-            elif ext != "xls":
+            elif ext and ext != "xls":
                 caller = inspect.stack()[1]
                 if (
                     caller.filename.endswith(

From 95bf3258f723d249296066208c5bffa8d007a99f Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 08:59:43 -0500
Subject: [PATCH 02/17] Adjust return type.

---
 pandas/io/excel/_base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 9edbeaa8e1634..27428225e95f5 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -10,6 +10,7 @@
 from typing import (
     Any,
     Mapping,
+    Optional,
     cast,
 )
 import warnings
@@ -1029,7 +1030,7 @@ def close(self):
 def inspect_excel_format(
     content_or_path: FilePathOrBuffer,
     storage_options: StorageOptions = None,
-) -> str:
+) -> Optional[str]:
     """
     Inspect the path or content of an excel file and get its format.
 

From 416ddd9539e41abaeff2418a7a9889b595e1ab96 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 09:23:27 -0500
Subject: [PATCH 03/17] Update tests.

---
 pandas/tests/io/excel/test_readers.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index c4b3221e1d3a7..5f87931636515 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import pytest
+from xlrd import XLRDError
 
 import pandas.util._test_decorators as td
 
@@ -718,9 +719,15 @@ def test_missing_file_raises(self, read_ext):
 
     def test_corrupt_bytes_raises(self, read_ext, engine):
         bad_stream = b"foo"
-        if engine is None or engine == "xlrd":
+        if engine is None:
             error = ValueError
-            msg = "File is not a recognized excel file"
+            msg = (
+                "Excel file format cannot be determined, you must "
+                "specify an engine manually."
+            )
+        elif engine == "xlrd":
+            error = XLRDError
+            msg = "Unsupported format, or corrupt file.*"
         else:
             error = BadZipFile
             msg = "File is not a zip file"

From 44053bb728172b31788b896f10e6ffe2e4ccdba1 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 09:24:27 -0500
Subject: [PATCH 04/17] Properly stylize strings.

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 27428225e95f5..ea6f27a057780 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1073,7 +1073,7 @@ def inspect_excel_format(
         if any(
             peek.startswith(signature)
             for (file_format, signature) in SIGNATURES.items()
-            if file_format.startswith('biff')
+            if file_format.startswith("biff")
         ):
             return "xls"
         elif not peek.startswith(SIGNATURES["zip"]):

From 1b1b6481597c84d2fe98ad6aac9b0957e56c06f7 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 09:52:12 -0500
Subject: [PATCH 05/17] Correct expected exception message.

---
 pandas/tests/io/excel/test_readers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 5f87931636515..69f5436a7476a 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -727,7 +727,10 @@ def test_corrupt_bytes_raises(self, read_ext, engine):
             )
         elif engine == "xlrd":
             error = XLRDError
-            msg = "Unsupported format, or corrupt file.*"
+            msg = (
+                "Unsupported format, or corrupt file: Expected BOF "
+                "record; found b'foo'"
+            )
         else:
             error = BadZipFile
             msg = "File is not a zip file"

From 3c11c74b2a603e7c66251ead40670a53fabb1f24 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 11:14:29 -0500
Subject: [PATCH 06/17] Add relevant whatsnew entry.

---
 doc/source/whatsnew/v1.2.5.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst
index 60e146b2212eb..09c7239c5c3ec 100644
--- a/doc/source/whatsnew/v1.2.5.rst
+++ b/doc/source/whatsnew/v1.2.5.rst
@@ -34,8 +34,7 @@ Bug fixes
 
 Other
 ~~~~~
-
--
+- Loosen XLS signatures used in :func:`read_excel` to determine if the `xlrd` engine should be used (:issue:`41225`)
 -
 
 .. ---------------------------------------------------------------------------

From 9d0e8fadc0d9f932ceed359b276c12df820d551d Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 13:25:57 -0500
Subject: [PATCH 07/17] Address requested cosmetic changes.

---
 doc/source/whatsnew/v1.2.5.rst | 3 ++-
 doc/source/whatsnew/v1.3.0.rst | 1 +
 pandas/io/excel/_base.py       | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst
index 09c7239c5c3ec..60e146b2212eb 100644
--- a/doc/source/whatsnew/v1.2.5.rst
+++ b/doc/source/whatsnew/v1.2.5.rst
@@ -34,7 +34,8 @@ Bug fixes
 
 Other
 ~~~~~
-- Loosen XLS signatures used in :func:`read_excel` to determine if the `xlrd` engine should be used (:issue:`41225`)
+
+-
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 61bea198e42db..89f7789ee50df 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -831,6 +831,7 @@ I/O
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
+- Loosen XLS signatures used in :func:`read_excel` to determine if the `xlrd` engine should be used (:issue:`41225`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index ea6f27a057780..4f5d242bbcbca 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -10,7 +10,7 @@
 from typing import (
     Any,
     Mapping,
-    Optional,
+    Union,
     cast,
 )
 import warnings
@@ -1030,7 +1030,7 @@ def close(self):
 def inspect_excel_format(
     content_or_path: FilePathOrBuffer,
     storage_options: StorageOptions = None,
-) -> Optional[str]:
+) -> Union[str, None]:
     """
     Inspect the path or content of an excel file and get its format.
 

From 3d7980813bb4bf99cb6640f23b95591a0abf9b30 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 13:58:09 -0500
Subject: [PATCH 08/17] Import exception when needed.

---
 pandas/tests/io/excel/test_readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 69f5436a7476a..3736e50948dee 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import pytest
-from xlrd import XLRDError
 
 import pandas.util._test_decorators as td
 
@@ -726,6 +725,7 @@ def test_corrupt_bytes_raises(self, read_ext, engine):
                 "specify an engine manually."
             )
         elif engine == "xlrd":
+            from xlrd import XLRDError
             error = XLRDError
             msg = (
                 "Unsupported format, or corrupt file: Expected BOF "

From cbfa5634dab2c3aed144bc58dea2ea09fffe974b Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Wed, 5 May 2021 14:54:22 -0500
Subject: [PATCH 09/17] Use new type hint mechanism for multiple output types.

---
 pandas/io/excel/_base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 4f5d242bbcbca..09d69df71504b 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -10,7 +10,6 @@
 from typing import (
     Any,
     Mapping,
-    Union,
     cast,
 )
 import warnings
@@ -1030,7 +1029,7 @@ def close(self):
 def inspect_excel_format(
     content_or_path: FilePathOrBuffer,
     storage_options: StorageOptions = None,
-) -> Union[str, None]:
+) -> str | None:
     """
     Inspect the path or content of an excel file and get its format.
 

From c113b20c65b575076f46a9a517ab93e376f5bf53 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Thu, 6 May 2021 08:16:11 -0500
Subject: [PATCH 10/17] Address minor documentation recommendations.

---
 doc/source/whatsnew/v1.3.0.rst | 5 ++---
 pandas/io/excel/_base.py       | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 89f7789ee50df..c132eee603844 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -195,7 +195,7 @@ Other enhancements
 - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
-- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
+- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`) and older .xls files (:issue:`41225`)
 - :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.ExponentialMovingWindow.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`, :issue:`41267`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
@@ -224,7 +224,6 @@ Other enhancements
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
--
 
 .. ---------------------------------------------------------------------------
 
@@ -831,7 +830,7 @@ I/O
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
-- Loosen XLS signatures used in :func:`read_excel` to determine if the `xlrd` engine should be used (:issue:`41225`)
+- :func:`read_excel` now raises the specified engine's exception for incorrect file types if the excel format cannot be determined by pandas (:issue:`41225`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 09d69df71504b..f2a02d9ca43fb 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1044,7 +1044,7 @@ def inspect_excel_format(
     Returns
     -------
     str or None
-        Format of file (if it can be determined)
+        Format of file if it can be determined.
 
     Raises
     ------

From f023c9d2a80f21eb190d11021790bd05f1317d8e Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Thu, 6 May 2021 12:53:51 -0500
Subject: [PATCH 11/17] Add tests.

---
 pandas/io/excel/_base.py           |  6 +++---
 pandas/tests/io/excel/test_xlrd.py | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index f2a02d9ca43fb..56df68ae9ebe2 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1015,9 +1015,9 @@ def close(self):
 
 
 SIGNATURES = {
-    "biff2": b"\x09\x00\x04\x00",
-    "biff3": b"\x09\x02\x06\x00",
-    "biff4": b"\x09\x04\x06\x00",
+    "biff2": b"\x09\x00\x04\x00\x07\x00\x10\x00",
+    "biff3": b"\x09\x02\x06\x00\x00\x00\x10\x00",
+    "biff4": b"\x09\x04\x06\x00\x00\x00\x10\x00",
     "biff5": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
     "biff8": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
     "zip": b"PK\x03\x04",
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index c0d8acf8ab562..4232493b5f5c0 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -77,3 +77,22 @@ def test_read_excel_warning_with_xlsx_file(datapath):
     else:
         with tm.assert_produces_warning(None):
             pd.read_excel(path, "Sheet1", engine=None)
+
+
+def test_read_old_xls_files():
+    # GH 41226
+    import io
+    import struct
+
+    headers = {
+        "biff2": b"\x09\x00\x04\x00\x07\x00\x10\x00",
+        "biff3": b"\x09\x02\x06\x00\x00\x00\x10\x00",
+        "biff4": b"\x09\x04\x06\x00\x00\x00\x10\x00",
+        "biff5": b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
+    }
+    body = b"\x00" * 16 + b"\x3e\x00\x03\x00\xfe\xff"  # Required for biff5
+    for file_format, header in headers.items():
+        f = io.BytesIO(header + body)
+        with pytest.raises(struct.error, match="unpack requires a buffer "):
+            # If struct.error is raised, file has passed xlrd's filetype checks
+            pd.read_excel(f)

From 955f67916d6d0ed633d2af08118ea763793d6348 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Mon, 10 May 2021 14:30:01 -0500
Subject: [PATCH 12/17] Reword based on feedback.

---
 doc/source/whatsnew/v1.3.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index c132eee603844..920371e30c250 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -830,7 +830,8 @@ I/O
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
-- :func:`read_excel` now raises the specified engine's exception for incorrect file types if the excel format cannot be determined by pandas (:issue:`41225`)
+- Bug in read_excel would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
+-
 
 Period
 ^^^^^^

From d6206bc5d104a8fc61c64d8fd3a28f0c53780a52 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Mon, 10 May 2021 14:31:00 -0500
Subject: [PATCH 13/17] Refactor to test whether  returns .

---
 pandas/tests/io/excel/test_xlrd.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index 4232493b5f5c0..0acbefca30806 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -1,3 +1,4 @@
+import io
 import pytest
 
 from pandas.compat._optional import import_optional_dependency
@@ -7,6 +8,7 @@
 from pandas.tests.io.excel import xlrd_version
 
 from pandas.io.excel import ExcelFile
+from pandas.io.excel._base import inspect_excel_format
 
 xlrd = pytest.importorskip("xlrd")
 xlwt = pytest.importorskip("xlwt")
@@ -79,20 +81,16 @@ def test_read_excel_warning_with_xlsx_file(datapath):
             pd.read_excel(path, "Sheet1", engine=None)
 
 
-def test_read_old_xls_files():
+@pytest.mark.parametrize(
+    "file_header",
+    [
+        b"\x09\x00\x04\x00\x07\x00\x10\x00",
+        b"\x09\x02\x06\x00\x00\x00\x10\x00",
+        b"\x09\x04\x06\x00\x00\x00\x10\x00",
+        b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
+    ]
+)
+def test_read_old_xls_files(file_header):
     # GH 41226
-    import io
-    import struct
-
-    headers = {
-        "biff2": b"\x09\x00\x04\x00\x07\x00\x10\x00",
-        "biff3": b"\x09\x02\x06\x00\x00\x00\x10\x00",
-        "biff4": b"\x09\x04\x06\x00\x00\x00\x10\x00",
-        "biff5": b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
-    }
-    body = b"\x00" * 16 + b"\x3e\x00\x03\x00\xfe\xff"  # Required for biff5
-    for file_format, header in headers.items():
-        f = io.BytesIO(header + body)
-        with pytest.raises(struct.error, match="unpack requires a buffer "):
-            # If struct.error is raised, file has passed xlrd's filetype checks
-            pd.read_excel(f)
+    f = io.BytesIO(file_header)
+    assert inspect_excel_format(f) == "xls"

From c306f697efafa5b69f862f85b2a71ac49e5cbf84 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Mon, 10 May 2021 14:31:25 -0500
Subject: [PATCH 14/17] Refactor to remove extraneous key information into
 comments.

---
 pandas/io/excel/_base.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 56df68ae9ebe2..8c9f9b39795b6 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1014,15 +1014,14 @@ def close(self):
         return content
 
 
-SIGNATURES = {
-    "biff2": b"\x09\x00\x04\x00\x07\x00\x10\x00",
-    "biff3": b"\x09\x02\x06\x00\x00\x00\x10\x00",
-    "biff4": b"\x09\x04\x06\x00\x00\x00\x10\x00",
-    "biff5": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
-    "biff8": b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
-    "zip": b"PK\x03\x04",
-}
-PEEK_SIZE = max(map(len, SIGNATURES.values()))
+XLS_SIGNATURES = (
+    b"\x09\x00\x04\x00\x07\x00\x10\x00",  # BIFF2
+    b"\x09\x02\x06\x00\x00\x00\x10\x00",  # BIFF3
+    b"\x09\x04\x06\x00\x00\x00\x10\x00",  # BIFF4
+    b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",  # Compound File Binary
+)
+ZIP_SIGNATURE = b"PK\x03\x04"
+PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE, )))
 
 
 @doc(storage_options=_shared_docs["storage_options"])
@@ -1069,13 +1068,9 @@ def inspect_excel_format(
             peek = buf
         stream.seek(0)
 
-        if any(
-            peek.startswith(signature)
-            for (file_format, signature) in SIGNATURES.items()
-            if file_format.startswith("biff")
-        ):
+        if any(peek.startswith(sig) for sig in XLS_SIGNATURES):
             return "xls"
-        elif not peek.startswith(SIGNATURES["zip"]):
+        elif not peek.startswith(ZIP_SIGNATURE):
             return None
 
         # ZipFile typing is overly-strict

From 8cc480eea087d6ef84bf5a7d0438bc75f1c7dd14 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Thu, 13 May 2021 09:19:07 -0500
Subject: [PATCH 15/17] Address pre-commit.

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 8c9f9b39795b6..5f62e897ff256 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1021,7 +1021,7 @@ def close(self):
     b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",  # Compound File Binary
 )
 ZIP_SIGNATURE = b"PK\x03\x04"
-PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE, )))
+PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE,)))
 
 
 @doc(storage_options=_shared_docs["storage_options"])

From 03d6393884b71dae64a1f7ca9b6abc6d8d5bdc93 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Thu, 13 May 2021 09:33:23 -0500
Subject: [PATCH 16/17] Pre-commit fixes.

---
 pandas/tests/io/excel/test_readers.py | 1 +
 pandas/tests/io/excel/test_xlrd.py    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 3736e50948dee..1feb154782e66 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -726,6 +726,7 @@ def test_corrupt_bytes_raises(self, read_ext, engine):
             )
         elif engine == "xlrd":
             from xlrd import XLRDError
+
             error = XLRDError
             msg = (
                 "Unsupported format, or corrupt file: Expected BOF "
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index 0acbefca30806..67c7634f2677b 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -1,4 +1,5 @@
 import io
+
 import pytest
 
 from pandas.compat._optional import import_optional_dependency
@@ -88,7 +89,7 @@ def test_read_excel_warning_with_xlsx_file(datapath):
         b"\x09\x02\x06\x00\x00\x00\x10\x00",
         b"\x09\x04\x06\x00\x00\x00\x10\x00",
         b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
-    ]
+    ],
 )
 def test_read_old_xls_files(file_header):
     # GH 41226

From 9e9a8ffb375b5c7a24f58545187f79a4deebbff2 Mon Sep 17 00:00:00 2001
From: Geoffrey Eisenbarth <geoffrey.eisenbarth@gmail.com>
Date: Fri, 14 May 2021 07:58:08 -0500
Subject: [PATCH 17/17] Address requested changes in the whatsnew.

---
 doc/source/whatsnew/v1.3.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index c60be05c5211f..196a5b5cd136e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -197,7 +197,7 @@ Other enhancements
 - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
-- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`) and older .xls files (:issue:`41225`)
+- :func:`pandas.read_excel` can now auto detect .xlsb files and older .xls files (:issue:`35416`, :issue:`41225`)
 - :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.ExponentialMovingWindow.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`, :issue:`41267`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
@@ -843,7 +843,7 @@ I/O
 - Bug in :func:`read_csv` and :func:`read_excel` not respecting dtype for duplicated column name when ``mangle_dupe_cols`` is set to ``True`` (:issue:`35211`)
 - Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
-- Bug in read_excel would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
+- Bug in :func:`read_excel` would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
 -
 
 Period