From d3c7200ae513bc0a1e9d08c075a9043e047a554a Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Tue, 15 Aug 2023 23:53:07 +0200
Subject: [PATCH 1/7] added unit-test to highlight issue #54564

---
 pandas/tests/io/data/excel/test6.xls | Bin 0 -> 5632 bytes
 pandas/tests/io/excel/test_xlrd.py   |  12 ++++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 pandas/tests/io/data/excel/test6.xls

diff --git a/pandas/tests/io/data/excel/test6.xls b/pandas/tests/io/data/excel/test6.xls
new file mode 100644
index 0000000000000000000000000000000000000000..e43a1a67510d8e9b6daae5b50670337c3c0c9e9e
GIT binary patch
literal 5632
zcmeHLO=uHQ5dPjKX_L~V*_aAeD4}50+KYJc(ry*BCrj0XB7&wiptPhCdeMWe)*J=F
zlj5b&iy&1m{-A>1JO~v%2!fF4(UYLiLv{UTHYrIDAw?)w-Yh$Bc4pq2+3)@AzV~j)
zEzLjbDq{@|B81<53@tT!LpfG`PJ(vdm&a-urA(EjO@u%!%8qP_;CcLQtMsMxcNq>I
zhF_BdUwEI!MT|`4bD0gKgPKFuNDc$!26w4RV4G%@QhrYP7Ud7r^M&#u)!a}%fD}sR
z6qdY^zWr<qZ0V%aqqYRO!4s|XNi}=Xi8u7|XKu=1r!|5Dg$voqf78$%ZpQ-HZ^OBK
zu8^H9A<Yh4z)wE|{E!zAdX=&Ijm5zr11w8Sp#c9@_RCuREhh}j!au76yofmNhonXX
z$Is_-(<_h_HJ~;Y!CXz&&DFS$;db(Ibg1m=NKUfu%DwOJkBp6aXH+v=X-x+94hak&
zSEU9kCp3u6HK=c&@;%D0i4V4`+@(Ct1nJv5+F20}&#BLKwe~X3KBW)d&Kh|a<>I%a
zPrHj>z1@@-hs&4Kiyyst^zf@P&KPpdq2KhIJGX_NdDYTz7E5-Yl?Q7;?d*>_T*WkZ
z{UCATcHkxJ2nBe!y0RIFry$_)?Um?x#c5NW3qd(VG$a!@uw!{<&`II3f-_XnNg=+L
zECLn*i-1MIB481)2v`Ix0-Fng;9r=3y-Zv71QFhH1|Ww4a<21>x0IItPXvx(iq|wZ
ztcQ?iS->UseDkyw5mQA*#!@p>JuejB6O3NK1jaOviDs(T`EXyj8ej6LGd6k%HjIrt
TnzgMve(n1=8>E5XU&a3eowT}k

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index 509029861715e..dc086d02db153 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -1,5 +1,6 @@
 import io
 
+import numpy as np
 import pytest
 
 import pandas as pd
@@ -44,6 +45,17 @@ def test_read_xlsx_fails(datapath):
         pd.read_excel(path, engine="xlrd")
 
 
+def test_nan_in_xls(datapath):
+    # GH 54564
+    path = datapath("io", "data", "excel", "test6.xls")
+
+    expected = pd.DataFrame(np.r_[:3, np.nan].reshape(2, 2))
+
+    result = pd.read_excel(path)
+
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "file_header",
     [

From 87fd93332dc40a66a81bb195a2afff57e7a73aec Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Wed, 16 Aug 2023 00:13:06 +0200
Subject: [PATCH 2/7] fixed #54564

---
 pandas/io/excel/_xlrd.py           | 14 +++++++++++---
 pandas/tests/io/excel/test_xlrd.py |  4 ++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index c68a0ab516e05..43f3519d1a389 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -120,9 +120,17 @@ def _parse_cell(cell_contents, cell_typ):
             elif cell_typ == XL_CELL_NUMBER:
                 # GH5394 - Excel 'numbers' are always floats
                 # it's a minimal perf hit and less surprising
-                val = int(cell_contents)
-                if val == cell_contents:
-                    cell_contents = val
+                try:
+                    val = int(cell_contents)
+                except Exception:
+                    # GH54564 - if the cell contents are NaN/Inf, we get an exception;
+                    # that is just another case where we don't want to convert.
+                    # The exception filter is quite general on purpose: whenever
+                    # the cell content cannot be converted to int - just don't.
+                    pass
+                else:
+                    if val == cell_contents:
+                        cell_contents = val
             return cell_contents
 
         data = []
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index dc086d02db153..ce65fda5ad6c9 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -49,9 +49,9 @@ def test_nan_in_xls(datapath):
     # GH 54564
     path = datapath("io", "data", "excel", "test6.xls")
 
-    expected = pd.DataFrame(np.r_[:3, np.nan].reshape(2, 2))
+    expected = pd.DataFrame(np.r_[:3, np.nan].reshape(2, 2)).astype(float)
 
-    result = pd.read_excel(path)
+    result = pd.read_excel(path, header=None).astype(float)
 
     tm.assert_frame_equal(result, expected)
 

From d0333185118fd8b23ff8dab8a3949292b1cc4102 Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Wed, 16 Aug 2023 00:21:13 +0200
Subject: [PATCH 3/7] added whatsnew entry

---
 doc/source/whatsnew/v2.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index d1a689dc60830..ca8ab076f156e 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -749,6 +749,7 @@ I/O
 - Bug in :func:`json_normalize`, fix json_normalize cannot parse metadata fields list type (:issue:`37782`)
 - Bug in :func:`read_csv` where it would error when ``parse_dates`` was set to a list or dictionary with ``engine="pyarrow"`` (:issue:`47961`)
 - Bug in :func:`read_csv`, with ``engine="pyarrow"`` erroring when specifying a ``dtype`` with ``index_col`` (:issue:`53229`)
+- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
 - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`)
 - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
 - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)

From 2cf1b38041e4fcb6f44215fb4e0a1a499c6ec063 Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Thu, 17 Aug 2023 19:42:57 +0200
Subject: [PATCH 4/7] anticipate this fix going into v2.2.0 instead of v2.1.0

---
 doc/source/whatsnew/v2.1.0.rst | 1 -
 doc/source/whatsnew/v2.2.0.rst | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index ca8ab076f156e..d1a689dc60830 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -749,7 +749,6 @@ I/O
 - Bug in :func:`json_normalize`, fix json_normalize cannot parse metadata fields list type (:issue:`37782`)
 - Bug in :func:`read_csv` where it would error when ``parse_dates`` was set to a list or dictionary with ``engine="pyarrow"`` (:issue:`47961`)
 - Bug in :func:`read_csv`, with ``engine="pyarrow"`` erroring when specifying a ``dtype`` with ``index_col`` (:issue:`53229`)
-- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
 - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`)
 - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
 - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index c35473b852eb9..d8f94608b0b7b 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -166,8 +166,7 @@ MultiIndex
 
 I/O
 ^^^
--
--
+- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
 
 Period
 ^^^^^^

From b77352949f41f0a2aadd58ecce778467dc122b52 Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Fri, 18 Aug 2023 10:44:47 +0200
Subject: [PATCH 5/7] LBYL instead of EAFP

---
 pandas/io/excel/_xlrd.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 43f3519d1a389..2c318e380bce0 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from datetime import time
+import math
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -120,15 +121,12 @@ def _parse_cell(cell_contents, cell_typ):
             elif cell_typ == XL_CELL_NUMBER:
                 # GH5394 - Excel 'numbers' are always floats
                 # it's a minimal perf hit and less surprising
-                try:
-                    val = int(cell_contents)
-                except Exception:
+                if math.isfinite(cell_contents):
                     # GH54564 - if the cell contents are NaN/Inf, we get an exception;
                     # that is just another case where we don't want to convert.
                     # The exception filter is quite general on purpose: whenever
                     # the cell content cannot be converted to int - just don't.
-                    pass
-                else:
+                    val = int(cell_contents)
                     if val == cell_contents:
                         cell_contents = val
             return cell_contents

From f0e92aa79f2164db8403deaa7e08b9f00481a008 Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Fri, 18 Aug 2023 14:58:11 +0200
Subject: [PATCH 6/7] address review request

---
 pandas/io/excel/_xlrd.py           | 5 +----
 pandas/tests/io/excel/test_xlrd.py | 4 ++--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 2c318e380bce0..a444970792e6e 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -122,10 +122,7 @@ def _parse_cell(cell_contents, cell_typ):
                 # GH5394 - Excel 'numbers' are always floats
                 # it's a minimal perf hit and less surprising
                 if math.isfinite(cell_contents):
-                    # GH54564 - if the cell contents are NaN/Inf, we get an exception;
-                    # that is just another case where we don't want to convert.
-                    # The exception filter is quite general on purpose: whenever
-                    # the cell content cannot be converted to int - just don't.
+                    # GH54564 - don't attempt to convert NaN/Inf
                     val = int(cell_contents)
                     if val == cell_contents:
                         cell_contents = val
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index ce65fda5ad6c9..efef18641041c 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -49,9 +49,9 @@ def test_nan_in_xls(datapath):
     # GH 54564
     path = datapath("io", "data", "excel", "test6.xls")
 
-    expected = pd.DataFrame(np.r_[:3, np.nan].reshape(2, 2)).astype(float)
+    expected = pd.DataFrame({0: np.r_[0, 2], 1: np.r_[1, np.nan]})
 
-    result = pd.read_excel(path, header=None).astype(float)
+    result = pd.read_excel(path, header=None)
 
     tm.assert_frame_equal(result, expected)
 

From aef4d2bd01f5453619bf54cee33620ec8d92800d Mon Sep 17 00:00:00 2001
From: Yves Delley <delleyves@gmx.ch>
Date: Mon, 21 Aug 2023 13:57:53 +0200
Subject: [PATCH 7/7] fix tests on windows

---
 pandas/tests/io/excel/test_xlrd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index efef18641041c..6d5008ca9ee68 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -49,7 +49,7 @@ def test_nan_in_xls(datapath):
     # GH 54564
     path = datapath("io", "data", "excel", "test6.xls")
 
-    expected = pd.DataFrame({0: np.r_[0, 2], 1: np.r_[1, np.nan]})
+    expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
 
     result = pd.read_excel(path, header=None)