From 841c1551c4b98cc69a45e457b07740381ecc48ae Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 23:19:23 +0100 Subject: [PATCH 1/2] BUG: to_xml raising for pd.NA --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/io/formats/xml.py | 5 ++--- pandas/tests/io/xml/__init__.py | 0 pandas/tests/io/xml/test_to_xml.py | 22 ++++++++++++++++------ pandas/tests/io/xml/test_xml.py | 9 ++------- 5 files changed, 21 insertions(+), 16 deletions(-) create mode 100644 pandas/tests/io/xml/__init__.py diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b280e82c73b89..9b36b0d8373d5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -856,6 +856,7 @@ I/O - Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`) - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`) +- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`) - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`) - Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`) - Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`) diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 8e05afaa06919..1b11bb12757bb 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -18,6 +18,7 @@ from pandas.util._decorators import doc from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.missing import isna from pandas.core.frame import DataFrame from pandas.core.shared_docs import _shared_docs @@ -571,9 +572,7 @@ def build_elems(self) -> None: elem_name = f"{self.prefix_uri}{flat_col}" try: val = ( - None - if self.d[col] in [None, ""] or self.d[col] != self.d[col] - else str(self.d[col]) + None if isna(self.d[col]) or self.d[col] == "" else str(self.d[col]) ) SubElement(self.elem_row, elem_name).text = val except KeyError: diff --git a/pandas/tests/io/xml/__init__.py b/pandas/tests/io/xml/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index e0c2b3794a00c..4249537f94068 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -12,6 +12,7 @@ import pandas.util._test_decorators as td from pandas import ( + NA, DataFrame, Index, ) @@ -1307,15 +1308,24 @@ def test_filename_and_suffix_comp(parser, compression_only): assert geom_xml == output.strip() +def test_ea_dtypes(any_numeric_ea_dtype): + # GH#43903 + expected = """ + + + 0 + + +""" + df = DataFrame({"a": [NA]}).astype(any_numeric_ea_dtype) + result = df.to_xml() + assert result.strip() == expected + + def test_unsuported_compression(datapath, parser): with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean() as path: - # Argument "compression" to "to_xml" of "DataFrame" has incompatible type - # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'], - # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]" - geom_df.to_xml( - path, parser=parser, compression="7z" # type: ignore[arg-type] - ) + geom_df.to_xml(path, parser=parser, compression="7z") # STORAGE OPTIONS diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 30ba95fd82bf2..b72111ec6cf1e 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -684,9 +684,7 @@ def test_names_option_wrong_type(datapath, parser): filename = datapath("io", "data", "xml", "books.xml") with pytest.raises(TypeError, match=("is not a valid type for names")): - read_xml( - filename, names="Col1, Col2, Col3", parser=parser # type: ignore[arg-type] - ) + read_xml(filename, names="Col1, Col2, Col3", parser=parser) # ENCODING @@ -1056,10 +1054,7 @@ def test_wrong_compression(parser, compression, compression_only): def test_unsuported_compression(datapath, parser): with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean() as path: - # error: Argument "compression" to "read_xml" has incompatible type - # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'], - # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]" - read_xml(path, parser=parser, compression="7z") # type: ignore[arg-type] + read_xml(path, parser=parser, compression="7z") # STORAGE OPTIONS From 1d502bb5be5066e1511e9981045c0e753353e137 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 29 Dec 2021 23:55:59 +0100 Subject: [PATCH 2/2] Skip when no lxml --- pandas/tests/io/xml/test_to_xml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 4249537f94068..c8828c08dba44 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -1308,6 +1308,7 @@ def test_filename_and_suffix_comp(parser, compression_only): assert geom_xml == output.strip() +@td.skip_if_no("lxml") def test_ea_dtypes(any_numeric_ea_dtype): # GH#43903 expected = """