Skip to content

Backport PR #36175: BUG: read_excel for ods files raising UnboundLocalError in certain cases #36355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`)
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
- Fixed regression in :meth:`read_excel` with `engine="odf"` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, and :issue:`35802`)
-

.. ---------------------------------------------------------------------------
Expand Down
26 changes: 14 additions & 12 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,22 +191,24 @@ def _get_cell_string_value(self, cell) -> str:
Find and decode OpenDocument text:s tags that represent
a run length encoded sequence of space characters.
"""
from odf.element import Element, Text
from odf.element import Element
from odf.namespaces import TEXTNS
from odf.text import P, S
from odf.text import S

text_p = P().qname
text_s = S().qname

p = cell.childNodes[0]

value = []
if p.qname == text_p:
for k, fragment in enumerate(p.childNodes):
if isinstance(fragment, Text):
value.append(fragment.data)
elif isinstance(fragment, Element):
if fragment.qname == text_s:
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))

for fragment in cell.childNodes:
if isinstance(fragment, Element):
if fragment.qname == text_s:
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
value.append(" " * spaces)
else:
# recursive impl needed in case of nested fragments
# with multiple spaces
# https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704
value.append(self._get_cell_string_value(fragment))
else:
value.append(str(fragment))
return "".join(value)
Binary file added pandas/tests/io/data/excel/gh-35802.ods
Binary file not shown.
Binary file added pandas/tests/io/data/excel/gh-36122.ods
Binary file not shown.
17 changes: 17 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,23 @@ def test_reader_spaces(self, read_ext):
)
tm.assert_frame_equal(actual, expected)

# gh-36122, gh-35802
@pytest.mark.parametrize(
"basename,expected",
[
("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})),
("gh-36122", DataFrame(columns=["got 2nd sa"])),
],
)
def test_read_excel_ods_nested_xml(self, read_ext, basename, expected):
# see gh-35802
engine = pd.read_excel.keywords["engine"]
if engine != "odf":
pytest.skip(f"Skipped for engine: {engine}")

actual = pd.read_excel(basename + read_ext)
tm.assert_frame_equal(actual, expected)

def test_reading_all_sheets(self, read_ext):
# Test reading all sheet names by setting sheet_name to None,
# Ensure a dict is returned.
Expand Down