Skip to content

TST: incorrect pyarrow xfails #50691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 80 additions & 30 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,20 +351,15 @@ def check_accumulate(self, s, op_name, skipna):
self.assert_series_equal(result, expected, check_dtype=False)

@pytest.mark.parametrize("skipna", [True, False])
def test_accumulate_series_raises(
self, data, all_numeric_accumulations, skipna, request
):
def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna):
pa_type = data.dtype.pyarrow_dtype
if (
(pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type))
and all_numeric_accumulations == "cumsum"
and not pa_version_under9p0
):
request.node.add_marker(
pytest.mark.xfail(
reason=f"{all_numeric_accumulations} implemented for {pa_type}"
)
)
pytest.skip("These work, are tested by test_accumulate_series.")

op_name = all_numeric_accumulations
ser = pd.Series(data)

Expand All @@ -374,21 +369,43 @@ def test_accumulate_series_raises(
@pytest.mark.parametrize("skipna", [True, False])
def test_accumulate_series(self, data, all_numeric_accumulations, skipna, request):
pa_type = data.dtype.pyarrow_dtype
op_name = all_numeric_accumulations
ser = pd.Series(data)

do_skip = False
if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you change these to use pytest skips with a message that the type & accumulator isn't supported?

if op_name in ["cumsum", "cumprod"]:
do_skip = True
elif pa.types.is_temporal(pa_type) and not pa.types.is_duration(pa_type):
if op_name in ["cumsum", "cumprod"]:
do_skip = True
elif pa.types.is_duration(pa_type):
if op_name == "cumprod":
do_skip = True

if do_skip:
pytest.skip(
"These should *not* work, we test in test_accumulate_series_raises "
"that these correctly raise."
)

if all_numeric_accumulations != "cumsum" or pa_version_under9p0:
request.node.add_marker(
pytest.mark.xfail(
reason=f"{all_numeric_accumulations} not implemented",
raises=NotImplementedError,
)
)
elif not (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)):
elif all_numeric_accumulations == "cumsum" and (
pa.types.is_duration(pa_type) or pa.types.is_boolean(pa_type)
):
request.node.add_marker(
pytest.mark.xfail(
reason=f"{all_numeric_accumulations} not implemented for {pa_type}"
reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
raises=NotImplementedError,
)
)
op_name = all_numeric_accumulations
ser = pd.Series(data)

self.check_accumulate(ser, op_name, skipna)


Expand All @@ -415,6 +432,47 @@ def check_reduce(self, ser, op_name, skipna):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
pa_dtype = data.dtype.pyarrow_dtype
opname = all_numeric_reductions

ser = pd.Series(data)

should_work = True
if pa.types.is_temporal(pa_dtype) and opname in [
"sum",
"var",
"skew",
"kurt",
"prod",
]:
if pa.types.is_duration(pa_dtype) and opname in ["sum"]:
# summing timedeltas is one case that *is* well-defined
pass
else:
should_work = False
elif (
pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype)
) and opname in [
"sum",
"mean",
"median",
"prod",
"std",
"sem",
"var",
"skew",
"kurt",
]:
should_work = False

if not should_work:
# matching the non-pyarrow versions, these operations *should* not
# work for these dtypes
msg = f"does not support reduction '{opname}'"
with pytest.raises(TypeError, match=msg):
getattr(ser, opname)(skipna=skipna)

return

xfail_mark = pytest.mark.xfail(
raises=TypeError,
reason=(
Expand Down Expand Up @@ -446,24 +504,16 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
),
)
)
elif (
not (
pa.types.is_integer(pa_dtype)
or pa.types.is_floating(pa_dtype)
or pa.types.is_boolean(pa_dtype)
)
and not (
all_numeric_reductions in {"min", "max"}
and (
(
pa.types.is_temporal(pa_dtype)
and not pa.types.is_duration(pa_dtype)
)
or pa.types.is_string(pa_dtype)
or pa.types.is_binary(pa_dtype)
)
)
and not all_numeric_reductions == "count"

elif all_numeric_reductions in [
"mean",
"median",
"std",
"sem",
] and pa.types.is_temporal(pa_dtype):
request.node.add_marker(xfail_mark)
elif all_numeric_reductions in ["sum", "min", "max"] and pa.types.is_duration(
pa_dtype
):
request.node.add_marker(xfail_mark)
elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in {
Expand Down