Skip to content

TST: de-xfail pyarrow parser tests #56056

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions pandas/tests/io/parser/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


Expand Down Expand Up @@ -238,7 +237,6 @@ def test_parse_encoded_special_characters(encoding):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
@pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"])
def test_encoding_memory_map(all_parsers, encoding):
# GH40986
Expand All @@ -252,11 +250,17 @@ def test_encoding_memory_map(all_parsers, encoding):
)
with tm.ensure_clean() as file:
expected.to_csv(file, index=False, encoding=encoding)

if parser.engine == "pyarrow":
msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(file, encoding=encoding, memory_map=True)
return

df = parser.read_csv(file, encoding=encoding, memory_map=True)
tm.assert_frame_equal(df, expected)


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_chunk_splits_multibyte_char(all_parsers):
"""
Chunk splits a multibyte character with memory_map=True
Expand All @@ -272,11 +276,17 @@ def test_chunk_splits_multibyte_char(all_parsers):
df.iloc[2047] = "a" * 127 + "ą"
with tm.ensure_clean("bug-gh43540.csv") as fname:
df.to_csv(fname, index=False, header=False, encoding="utf-8")
dfr = parser.read_csv(fname, header=None, memory_map=True, engine="c")

if parser.engine == "pyarrow":
msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(fname, header=None, memory_map=True)
return

dfr = parser.read_csv(fname, header=None, memory_map=True)
tm.assert_frame_equal(dfr, df)


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_readcsv_memmap_utf8(all_parsers):
"""
GH 43787
Expand All @@ -300,9 +310,14 @@ def test_readcsv_memmap_utf8(all_parsers):
df = DataFrame(lines)
with tm.ensure_clean("utf8test.csv") as fname:
df.to_csv(fname, index=False, header=False, encoding="utf-8")
dfr = parser.read_csv(
fname, header=None, memory_map=True, engine="c", encoding="utf-8"
)

if parser.engine == "pyarrow":
msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8")
return

dfr = parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8")
tm.assert_frame_equal(df, dfr)


Expand Down
17 changes: 12 additions & 5 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1979,8 +1979,6 @@ def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, co
tm.assert_frame_equal(result, expected)


# ValueError: The 'thousands' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
def test_date_parser_usecols_thousands(all_parsers):
# GH#39365
data = """A,B,C
Expand All @@ -1989,12 +1987,21 @@ def test_date_parser_usecols_thousands(all_parsers):
"""

parser = all_parsers
warn = UserWarning

if parser.engine == "pyarrow":
# DeprecationWarning for passing a Manager object
warn = (UserWarning, DeprecationWarning)
msg = "The 'thousands' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data),
parse_dates=[1],
usecols=[1, 2],
thousands="-",
)
return

result = parser.read_csv_check_warnings(
warn,
UserWarning,
"Could not infer format",
StringIO(data),
parse_dates=[1],
Expand Down