Skip to content

BUG: to_xml raising for pd.NA #45116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,7 @@ I/O
- Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
- Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`)
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
- Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`)
Expand Down
5 changes: 2 additions & 3 deletions pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas.util._decorators import doc

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.missing import isna

from pandas.core.frame import DataFrame
from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -571,9 +572,7 @@ def build_elems(self) -> None:
elem_name = f"{self.prefix_uri}{flat_col}"
try:
val = (
None
if self.d[col] in [None, ""] or self.d[col] != self.d[col]
else str(self.d[col])
None if isna(self.d[col]) or self.d[col] == "" else str(self.d[col])
)
SubElement(self.elem_row, elem_name).text = val
except KeyError:
Expand Down
Empty file added pandas/tests/io/xml/__init__.py
Empty file.
23 changes: 17 additions & 6 deletions pandas/tests/io/xml/test_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas.util._test_decorators as td

from pandas import (
NA,
DataFrame,
Index,
)
Expand Down Expand Up @@ -1307,15 +1308,25 @@ def test_filename_and_suffix_comp(parser, compression_only):
assert geom_xml == output.strip()


@td.skip_if_no("lxml")
def test_ea_dtypes(any_numeric_ea_dtype):
# GH#43903
expected = """<?xml version='1.0' encoding='utf-8'?>
<data>
<row>
<index>0</index>
<a/>
</row>
</data>"""
df = DataFrame({"a": [NA]}).astype(any_numeric_ea_dtype)
result = df.to_xml()
assert result.strip() == expected


def test_unsuported_compression(datapath, parser):
with pytest.raises(ValueError, match="Unrecognized compression type"):
with tm.ensure_clean() as path:
# Argument "compression" to "to_xml" of "DataFrame" has incompatible type
# "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
# Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
geom_df.to_xml(
path, parser=parser, compression="7z" # type: ignore[arg-type]
)
geom_df.to_xml(path, parser=parser, compression="7z")


# STORAGE OPTIONS
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,9 +684,7 @@ def test_names_option_wrong_type(datapath, parser):
filename = datapath("io", "data", "xml", "books.xml")

with pytest.raises(TypeError, match=("is not a valid type for names")):
read_xml(
filename, names="Col1, Col2, Col3", parser=parser # type: ignore[arg-type]
)
read_xml(filename, names="Col1, Col2, Col3", parser=parser)


# ENCODING
Expand Down Expand Up @@ -1056,10 +1054,7 @@ def test_wrong_compression(parser, compression, compression_only):
def test_unsuported_compression(datapath, parser):
with pytest.raises(ValueError, match="Unrecognized compression type"):
with tm.ensure_clean() as path:
# error: Argument "compression" to "read_xml" has incompatible type
# "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
# Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
read_xml(path, parser=parser, compression="7z") # type: ignore[arg-type]
read_xml(path, parser=parser, compression="7z")


# STORAGE OPTIONS
Expand Down