Skip to content

Commit f64c608

Browse files
TST: change pyarrow skips to xfails (#55576)
* TST: change pyarrow skips to xfails * revert edits where CI is different from local --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 0c7d303 commit f64c608

15 files changed

+268
-239
lines changed

pandas/tests/io/parser/common/test_common_basic.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
)
3535

3636
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
37-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
3837

3938

4039
def test_override_set_noconvert_columns():
@@ -515,8 +514,6 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
515514
tm.assert_frame_equal(result, expected)
516515

517516

518-
# Skip for now, actually only one test fails though, but its tricky to xfail
519-
@skip_pyarrow
520517
@pytest.mark.parametrize(
521518
"sep,skip_blank_lines,exp_data",
522519
[
@@ -536,7 +533,7 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
536533
),
537534
],
538535
)
539-
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):
536+
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
540537
parser = all_parsers
541538
data = """\
542539
A,B,C
@@ -550,6 +547,12 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):
550547

551548
if sep == r"\s+":
552549
data = data.replace(",", " ")
550+
if parser.engine == "pyarrow":
551+
mark = pytest.mark.xfail(
552+
raises=ValueError,
553+
reason="the 'pyarrow' engine does not support regex separators",
554+
)
555+
request.applymarker(mark)
553556

554557
result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
555558
expected = DataFrame(exp_data, columns=["A", "B", "C"])

pandas/tests/io/parser/common/test_index.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121

2222
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
2323

24-
# GH#43650: Some expected failures with the pyarrow engine can occasionally
25-
# cause a deadlock instead, so we skip these instead of xfailing
26-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
27-
2824

2925
@pytest.mark.parametrize(
3026
"data,kwargs,expected",
@@ -278,7 +274,8 @@ def test_empty_with_index(all_parsers):
278274
tm.assert_frame_equal(result, expected)
279275

280276

281-
@skip_pyarrow
277+
# CSV parse error: Empty CSV file or block: cannot infer number of columns
278+
@xfail_pyarrow
282279
def test_empty_with_multi_index(all_parsers):
283280
# see gh-10467
284281
data = "x,y,z"
@@ -291,7 +288,8 @@ def test_empty_with_multi_index(all_parsers):
291288
tm.assert_frame_equal(result, expected)
292289

293290

294-
@skip_pyarrow
291+
# CSV parse error: Empty CSV file or block: cannot infer number of columns
292+
@xfail_pyarrow
295293
def test_empty_with_reversed_multi_index(all_parsers):
296294
data = "x,y,z"
297295
parser = all_parsers

pandas/tests/io/parser/common/test_ints.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
1818
)
1919

20-
# GH#43650: Some expected failures with the pyarrow engine can occasionally
21-
# cause a deadlock instead, so we skip these instead of xfailing
22-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
20+
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
2321

2422

2523
def test_int_conversion(all_parsers):
@@ -102,12 +100,16 @@ def test_parse_integers_above_fp_precision(all_parsers):
102100
tm.assert_frame_equal(result, expected)
103101

104102

105-
@skip_pyarrow # Flaky
106103
@pytest.mark.parametrize("sep", [" ", r"\s+"])
107104
def test_integer_overflow_bug(all_parsers, sep):
108105
# see gh-2601
109106
data = "65248E10 11\n55555E55 22\n"
110107
parser = all_parsers
108+
if parser.engine == "pyarrow" and sep != " ":
109+
msg = "the 'pyarrow' engine does not support regex separators"
110+
with pytest.raises(ValueError, match=msg):
111+
parser.read_csv(StringIO(data), header=None, sep=sep)
112+
return
111113

112114
result = parser.read_csv(StringIO(data), header=None, sep=sep)
113115
expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]])
@@ -124,7 +126,8 @@ def test_int64_min_issues(all_parsers):
124126
tm.assert_frame_equal(result, expected)
125127

126128

127-
@skip_pyarrow
129+
# ValueError: The 'converters' option is not supported with the 'pyarrow' engine
130+
@xfail_pyarrow
128131
@pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
129132
def test_int64_overflow(all_parsers, conv):
130133
data = """ID
@@ -168,7 +171,7 @@ def test_int64_overflow(all_parsers, conv):
168171
parser.read_csv(StringIO(data), converters={"ID": conv})
169172

170173

171-
@skip_pyarrow
174+
@xfail_pyarrow # CSV parse error: Empty CSV file or block
172175
@pytest.mark.parametrize(
173176
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
174177
)
@@ -182,7 +185,7 @@ def test_int64_uint64_range(all_parsers, val):
182185
tm.assert_frame_equal(result, expected)
183186

184187

185-
@skip_pyarrow
188+
@xfail_pyarrow # CSV parse error: Empty CSV file or block
186189
@pytest.mark.parametrize(
187190
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
188191
)
@@ -196,7 +199,7 @@ def test_outside_int64_uint64_range(all_parsers, val):
196199
tm.assert_frame_equal(result, expected)
197200

198201

199-
@skip_pyarrow
202+
@xfail_pyarrow # gets float64 dtype instead of object
200203
@pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
201204
def test_numeric_range_too_wide(all_parsers, exp_data):
202205
# No numerical dtype can hold both negative and uint64

pandas/tests/io/parser/common/test_read_errors.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import pandas._testing as tm
2323

2424
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
25-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
2625

2726

2827
def test_empty_decimal_marker(all_parsers):
@@ -44,7 +43,6 @@ def test_empty_decimal_marker(all_parsers):
4443
parser.read_csv(StringIO(data), decimal="")
4544

4645

47-
@skip_pyarrow
4846
def test_bad_stream_exception(all_parsers, csv_dir_path):
4947
# see gh-13652
5048
#
@@ -65,7 +63,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
6563
parser.read_csv(stream)
6664

6765

68-
@skip_pyarrow
66+
@xfail_pyarrow # ValueError: The 'comment' option is not supported
6967
def test_malformed(all_parsers):
7068
# see gh-6607
7169
parser = all_parsers
@@ -80,7 +78,7 @@ def test_malformed(all_parsers):
8078
parser.read_csv(StringIO(data), header=1, comment="#")
8179

8280

83-
@skip_pyarrow
81+
@xfail_pyarrow # ValueError: The 'iterator' option is not supported
8482
@pytest.mark.parametrize("nrows", [5, 3, None])
8583
def test_malformed_chunks(all_parsers, nrows):
8684
data = """ignore
@@ -100,7 +98,7 @@ def test_malformed_chunks(all_parsers, nrows):
10098
reader.read(nrows)
10199

102100

103-
@skip_pyarrow
101+
@xfail_pyarrow # does not raise
104102
def test_catch_too_many_names(all_parsers):
105103
# see gh-5156
106104
data = """\
@@ -115,12 +113,17 @@ def test_catch_too_many_names(all_parsers):
115113
else "Number of passed names did not match "
116114
"number of header fields in the file"
117115
)
116+
depr_msg = "Passing a BlockManager to DataFrame is deprecated"
117+
warn = None
118+
if parser.engine == "pyarrow":
119+
warn = DeprecationWarning
118120

119-
with pytest.raises(ValueError, match=msg):
120-
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
121+
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
122+
with pytest.raises(ValueError, match=msg):
123+
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
121124

122125

123-
@skip_pyarrow
126+
@xfail_pyarrow # CSV parse error: Empty CSV file or block
124127
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
125128
def test_raise_on_no_columns(all_parsers, nrows):
126129
parser = all_parsers
@@ -208,7 +211,6 @@ def test_read_csv_wrong_num_columns(all_parsers):
208211
parser.read_csv(StringIO(data))
209212

210213

211-
@skip_pyarrow
212214
def test_null_byte_char(request, all_parsers):
213215
# see gh-2741
214216
data = "\x00,foo"
@@ -226,12 +228,19 @@ def test_null_byte_char(request, all_parsers):
226228
out = parser.read_csv(StringIO(data), names=names)
227229
tm.assert_frame_equal(out, expected)
228230
else:
229-
msg = "NULL byte detected"
231+
if parser.engine == "pyarrow":
232+
msg = (
233+
"CSV parse error: Empty CSV file or block: "
234+
"cannot infer number of columns"
235+
)
236+
else:
237+
msg = "NULL byte detected"
230238
with pytest.raises(ParserError, match=msg):
231239
parser.read_csv(StringIO(data), names=names)
232240

233241

234-
@skip_pyarrow
242+
# ValueError: the 'pyarrow' engine does not support sep=None with delim_whitespace=False
243+
@xfail_pyarrow
235244
@pytest.mark.filterwarnings("always::ResourceWarning")
236245
def test_open_file(request, all_parsers):
237246
# GH 39024

pandas/tests/io/parser/conftest.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -279,19 +279,3 @@ def pyarrow_xfail(request):
279279
if parser.engine == "pyarrow":
280280
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
281281
request.applymarker(mark)
282-
283-
284-
@pytest.fixture
285-
def pyarrow_skip(request):
286-
"""
287-
Fixture that skips a test if the engine is pyarrow.
288-
"""
289-
if "all_parsers" in request.fixturenames:
290-
parser = request.getfixturevalue("all_parsers")
291-
elif "all_parsers_all_precisions" in request.fixturenames:
292-
# Return value is tuple of (engine, precision)
293-
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
294-
else:
295-
return
296-
if parser.engine == "pyarrow":
297-
pytest.skip("pyarrow doesn't support this.")

pandas/tests/io/parser/dtypes/test_categorical.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
)
2626

2727
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
28-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
2928

3029

3130
@xfail_pyarrow
@@ -55,9 +54,8 @@ def test_categorical_dtype(all_parsers, dtype):
5554
tm.assert_frame_equal(actual, expected)
5655

5756

58-
@skip_pyarrow # Flaky
5957
@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
60-
def test_categorical_dtype_single(all_parsers, dtype):
58+
def test_categorical_dtype_single(all_parsers, dtype, request):
6159
# see gh-10153
6260
parser = all_parsers
6361
data = """a,b,c
@@ -67,6 +65,13 @@ def test_categorical_dtype_single(all_parsers, dtype):
6765
expected = DataFrame(
6866
{"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
6967
)
68+
if parser.engine == "pyarrow":
69+
mark = pytest.mark.xfail(
70+
strict=False,
71+
reason="Flaky test sometimes gives object dtype instead of Categorical",
72+
)
73+
request.applymarker(mark)
74+
7075
actual = parser.read_csv(StringIO(data), dtype=dtype)
7176
tm.assert_frame_equal(actual, expected)
7277

@@ -141,6 +146,7 @@ def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
141146
tm.assert_frame_equal(actual, expected)
142147

143148

149+
# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
144150
@xfail_pyarrow
145151
def test_categorical_dtype_chunksize_infer_categories(all_parsers):
146152
# see gh-10153
@@ -161,6 +167,7 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers):
161167
tm.assert_frame_equal(actual, expected)
162168

163169

170+
# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
164171
@xfail_pyarrow
165172
def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
166173
# see gh-10153
@@ -253,7 +260,6 @@ def test_categorical_coerces_numeric(all_parsers):
253260
tm.assert_frame_equal(result, expected)
254261

255262

256-
@skip_pyarrow # Flaky
257263
def test_categorical_coerces_datetime(all_parsers):
258264
parser = all_parsers
259265
dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)

pandas/tests/io/parser/test_compression.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
1818
)
1919

20-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
21-
2220

2321
@pytest.fixture(params=[True, False])
2422
def buffer(request):
@@ -36,7 +34,6 @@ def parser_and_data(all_parsers, csv1):
3634
return parser, data, expected
3735

3836

39-
@skip_pyarrow
4037
@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
4138
def test_zip(parser_and_data, compression):
4239
parser, data, expected = parser_and_data
@@ -54,7 +51,6 @@ def test_zip(parser_and_data, compression):
5451
tm.assert_frame_equal(result, expected)
5552

5653

57-
@skip_pyarrow
5854
@pytest.mark.parametrize("compression", ["zip", "infer"])
5955
def test_zip_error_multiple_files(parser_and_data, compression):
6056
parser, data, expected = parser_and_data
@@ -70,7 +66,6 @@ def test_zip_error_multiple_files(parser_and_data, compression):
7066
parser.read_csv(path, compression=compression)
7167

7268

73-
@skip_pyarrow
7469
def test_zip_error_no_files(parser_and_data):
7570
parser, _, _ = parser_and_data
7671

@@ -82,7 +77,6 @@ def test_zip_error_no_files(parser_and_data):
8277
parser.read_csv(path, compression="zip")
8378

8479

85-
@skip_pyarrow
8680
def test_zip_error_invalid_zip(parser_and_data):
8781
parser, _, _ = parser_and_data
8882

@@ -92,7 +86,6 @@ def test_zip_error_invalid_zip(parser_and_data):
9286
parser.read_csv(f, compression="zip")
9387

9488

95-
@skip_pyarrow
9689
@pytest.mark.parametrize("filename", [None, "test.{ext}"])
9790
def test_compression(
9891
request,
@@ -128,7 +121,6 @@ def test_compression(
128121
tm.assert_frame_equal(result, expected)
129122

130123

131-
@skip_pyarrow
132124
@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
133125
def test_infer_compression(all_parsers, csv1, buffer, ext):
134126
# see gh-9770
@@ -148,7 +140,6 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
148140
tm.assert_frame_equal(result, expected)
149141

150142

151-
@skip_pyarrow
152143
def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt):
153144
# see gh-18071, gh-24130
154145
parser = all_parsers
@@ -166,7 +157,6 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
166157
tm.assert_frame_equal(result, expected)
167158

168159

169-
@skip_pyarrow
170160
@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
171161
def test_invalid_compression(all_parsers, invalid_compression):
172162
parser = all_parsers
@@ -178,7 +168,6 @@ def test_invalid_compression(all_parsers, invalid_compression):
178168
parser.read_csv("test_file.zip", **compress_kwargs)
179169

180170

181-
@skip_pyarrow
182171
def test_compression_tar_archive(all_parsers, csv_dir_path):
183172
parser = all_parsers
184173
path = os.path.join(csv_dir_path, "tar_csv.tar.gz")
@@ -200,7 +189,6 @@ def test_ignore_compression_extension(all_parsers):
200189
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
201190

202191

203-
@skip_pyarrow
204192
def test_writes_tar_gz(all_parsers):
205193
parser = all_parsers
206194
data = DataFrame(

0 commit comments

Comments
 (0)