From c9cf126133749abfa3e906b372856ecddbb410fa Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 11 Jan 2023 20:01:59 -0800 Subject: [PATCH 1/2] TST: incorrect pyarrow xfails --- pandas/tests/extension/test_arrow.py | 110 +++++++++++++++++++-------- 1 file changed, 80 insertions(+), 30 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 78c49ae066288..05bba78027653 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -351,20 +351,16 @@ def check_accumulate(self, s, op_name, skipna): self.assert_series_equal(result, expected, check_dtype=False) @pytest.mark.parametrize("skipna", [True, False]) - def test_accumulate_series_raises( - self, data, all_numeric_accumulations, skipna, request - ): + def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna): pa_type = data.dtype.pyarrow_dtype if ( (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)) and all_numeric_accumulations == "cumsum" and not pa_version_under9p0 ): - request.node.add_marker( - pytest.mark.xfail( - reason=f"{all_numeric_accumulations} implemented for {pa_type}" - ) - ) + # These work, are tested by test_accumulate_series + return + op_name = all_numeric_accumulations ser = pd.Series(data) @@ -374,6 +370,25 @@ def test_accumulate_series_raises( @pytest.mark.parametrize("skipna", [True, False]) def test_accumulate_series(self, data, all_numeric_accumulations, skipna, request): pa_type = data.dtype.pyarrow_dtype + op_name = all_numeric_accumulations + ser = pd.Series(data) + + if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type): + if op_name in ["cumsum", "cumprod"]: + # These should *not* work, we test in test_accumulate_series_raises + # that these correctly raise + return + elif pa.types.is_temporal(pa_type) and not pa.types.is_duration(pa_type): + if op_name in ["cumsum", "cumprod"]: + # These should *not* work, we test in test_accumulate_series_raises + # that these correctly raise + return + elif pa.types.is_duration(pa_type): + if op_name == "cumprod": + # These should *not* work, we test in test_accumulate_series_raises + # that these correctly raise + return + if all_numeric_accumulations != "cumsum" or pa_version_under9p0: request.node.add_marker( pytest.mark.xfail( @@ -381,14 +396,16 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques raises=NotImplementedError, ) ) - elif not (pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type)): + elif all_numeric_accumulations == "cumsum" and ( + pa.types.is_duration(pa_type) or pa.types.is_boolean(pa_type) + ): request.node.add_marker( pytest.mark.xfail( - reason=f"{all_numeric_accumulations} not implemented for {pa_type}" + reason=f"{all_numeric_accumulations} not implemented for {pa_type}", + raises=NotImplementedError, ) ) - op_name = all_numeric_accumulations - ser = pd.Series(data) + self.check_accumulate(ser, op_name, skipna) @@ -415,6 +432,47 @@ def check_reduce(self, ser, op_name, skipna): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series(self, data, all_numeric_reductions, skipna, request): pa_dtype = data.dtype.pyarrow_dtype + opname = all_numeric_reductions + + ser = pd.Series(data) + + should_work = True + if pa.types.is_temporal(pa_dtype) and opname in [ + "sum", + "var", + "skew", + "kurt", + "prod", + ]: + if pa.types.is_duration(pa_dtype) and opname in ["sum"]: + # summing timedeltas is one case that *is* well-defined + pass + else: + should_work = False + elif ( + pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype) + ) and opname in [ + "sum", + "mean", + "median", + "prod", + "std", + "sem", + "var", + "skew", + "kurt", + ]: + should_work = False + + if not should_work: + # matching the non-pyarrow versions, these operations *should* not + # work for these dtypes + msg = f"does not support reduction '{opname}'" + with pytest.raises(TypeError, match=msg): + getattr(ser, opname)(skipna=skipna) + + return + xfail_mark = pytest.mark.xfail( raises=TypeError, reason=( @@ -446,24 +504,16 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): ), ) ) - elif ( - not ( - pa.types.is_integer(pa_dtype) - or pa.types.is_floating(pa_dtype) - or pa.types.is_boolean(pa_dtype) - ) - and not ( - all_numeric_reductions in {"min", "max"} - and ( - ( - pa.types.is_temporal(pa_dtype) - and not pa.types.is_duration(pa_dtype) - ) - or pa.types.is_string(pa_dtype) - or pa.types.is_binary(pa_dtype) - ) - ) - and not all_numeric_reductions == "count" + + elif all_numeric_reductions in [ + "mean", + "median", + "std", + "sem", + ] and pa.types.is_temporal(pa_dtype): + request.node.add_marker(xfail_mark) + elif all_numeric_reductions in ["sum", "min", "max"] and pa.types.is_duration( + pa_dtype ): request.node.add_marker(xfail_mark) elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { From fff85fc61d4c7472bb2da07abcd9de9246d947aa Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 12 Jan 2023 11:17:31 -0800 Subject: [PATCH 2/2] change return to pytest.skip --- pandas/tests/extension/test_arrow.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 05bba78027653..2dc635f18ffae 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -358,8 +358,7 @@ def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna) and all_numeric_accumulations == "cumsum" and not pa_version_under9p0 ): - # These work, are tested by test_accumulate_series - return + pytest.skip("These work, are tested by test_accumulate_series.") op_name = all_numeric_accumulations ser = pd.Series(data) @@ -373,21 +372,22 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques op_name = all_numeric_accumulations ser = pd.Series(data) + do_skip = False if pa.types.is_string(pa_type) or pa.types.is_binary(pa_type): if op_name in ["cumsum", "cumprod"]: - # These should *not* work, we test in test_accumulate_series_raises - # that these correctly raise - return + do_skip = True elif pa.types.is_temporal(pa_type) and not pa.types.is_duration(pa_type): if op_name in ["cumsum", "cumprod"]: - # These should *not* work, we test in test_accumulate_series_raises - # that these correctly raise - return + do_skip = True elif pa.types.is_duration(pa_type): if op_name == "cumprod": - # These should *not* work, we test in test_accumulate_series_raises - # that these correctly raise - return + do_skip = True + + if do_skip: + pytest.skip( + "These should *not* work, we test in test_accumulate_series_raises " + "that these correctly raise." + ) if all_numeric_accumulations != "cumsum" or pa_version_under9p0: request.node.add_marker(