From 1a294d21d1a798a842fe57c163e406dbfd4c2104 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Nov 2023 13:29:36 -0800 Subject: [PATCH] TST: de-xfail pyarrow parser tests --- .../io/parser/common/test_common_basic.py | 26 ++++++++++++++----- .../io/parser/common/test_file_buffer_url.py | 7 ++++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 0c28db245de31..558fdb7632102 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -399,11 +399,16 @@ def test_escapechar(all_parsers): tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"])) -@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators def test_ignore_leading_whitespace(all_parsers): # see gh-3374, gh-6607 parser = all_parsers data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9" + + if parser.engine == "pyarrow": + msg = "the 'pyarrow' engine does not support regex separators" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), sep=r"\s+") + return result = parser.read_csv(StringIO(data), sep=r"\s+") expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]}) @@ -582,12 +587,14 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request): if sep == r"\s+": data = data.replace(",", " ") + if parser.engine == "pyarrow": - mark = pytest.mark.xfail( - raises=ValueError, - reason="the 'pyarrow' engine does not support regex separators", - ) - request.applymarker(mark) + msg = "the 'pyarrow' engine does not support regex separators" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines + ) + return result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines) expected = DataFrame(exp_data, columns=["A", "B", "C"]) @@ -610,7 +617,6 @@ def test_whitespace_lines(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators @pytest.mark.parametrize( "data,expected", [ @@ -635,6 +641,12 @@ def test_whitespace_lines(all_parsers): def test_whitespace_regex_separator(all_parsers, data, expected): # see gh-6607 parser = all_parsers + if parser.engine == "pyarrow": + msg = "the 'pyarrow' engine does not support regex separators" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), sep=r"\s+") + return + result = parser.read_csv(StringIO(data), sep=r"\s+") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 69c39fdf4cdbe..c374795019ff4 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -235,7 +235,6 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg, request): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators def test_temporary_file(all_parsers): # see gh-13398 parser = all_parsers @@ -246,6 +245,12 @@ def test_temporary_file(all_parsers): new_file.flush() new_file.seek(0) + if parser.engine == "pyarrow": + msg = "the 'pyarrow' engine does not support regex separators" + with pytest.raises(ValueError, match=msg): + parser.read_csv(new_file, sep=r"\s+", header=None) + return + result = parser.read_csv(new_file, sep=r"\s+", header=None) expected = DataFrame([[0, 0]])