From 7f91171f153103fbdec729ce75f1df2b080bc9a6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Dec 2023 17:41:38 -0800 Subject: [PATCH] BUG read_csv(quotechar=, engine='pyarrow') --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/io/parsers/arrow_parser_wrapper.py | 1 + pandas/tests/io/parser/test_quoting.py | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index c69e61c899600..05065aef473c5 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -597,6 +597,7 @@ I/O - Bug in :func:`read_csv` where ``engine="python"`` did not respect ``chunksize`` arg when ``skiprows`` was specified. (:issue:`56323`) - Bug in :func:`read_csv` where ``engine="python"`` was causing a ``TypeError`` when a callable ``skiprows`` and a chunk size was specified. (:issue:`55677`) - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`) +- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``quotechar`` was ignored (:issue:`52266`) - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) - Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 037239e4ec83c..1c79392d54771 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -65,6 +65,7 @@ def _get_pyarrow_options(self) -> None: "escapechar": "escape_char", "skip_blank_lines": "ignore_empty_lines", "decimal": "decimal_point", + "quotechar": "quote_char", } for pandas_name, pyarrow_name in mapping.items(): if pandas_name in self.kwds and self.kwds.get(pandas_name) is not None: diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 0a1ba0252f106..a70b7e3389c1b 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -66,7 +66,6 @@ def test_quote_char_basic(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # ValueError: The 'quoting' option is not supported @pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"]) def test_quote_char_various(all_parsers, quote_char): parser = all_parsers