diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 9b6b1ab3b8909..ff99d6b759d66 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -35,6 +35,7 @@ pa_version_under17p0, pa_version_under18p0, pa_version_under19p0, + pa_version_under20p0, ) if TYPE_CHECKING: @@ -195,6 +196,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "pa_version_under17p0", "pa_version_under18p0", "pa_version_under19p0", + "pa_version_under20p0", "HAS_PYARROW", "IS64", "ISMUSL", diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a63cde8022e24..17fe36c4b4469 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -40,6 +40,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, + pa_version_under20p0, ) from pandas.core.dtypes.dtypes import ( @@ -448,6 +449,9 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if op_name == "kurt" or (pa_version_under20p0 and op_name == "skew"): + return False + dtype = ser.dtype # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has # no attribute "pyarrow_dtype" @@ -464,7 +468,7 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: pass else: return False - elif pa.types.is_binary(pa_dtype) and op_name == "sum": + elif pa.types.is_binary(pa_dtype) and op_name in ["sum", "skew"]: return False elif ( pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype) @@ -525,18 +529,31 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque f"pyarrow={pa.__version__} for {pa_dtype}" ), ) - if all_numeric_reductions in {"skew", "kurt"} and ( - dtype._is_numeric or dtype.kind == "b" - ): - request.applymarker(xfail_mark) - - elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { + if pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { "sem", "std", "var", "median", }: request.applymarker(xfail_mark) + elif ( + not pa_version_under20p0 + and all_numeric_reductions == "skew" + and ( + pa.types.is_boolean(pa_dtype) + or ( + skipna + and ( + pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype) + ) + ) + ) + ): + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/apache/arrow/issues/45733", + ) + ) super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) @pytest.mark.parametrize("skipna", [True, False]) @@ -563,7 +580,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if op_name in ["max", "min"]: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": - if op_name not in ["median", "var", "std"]: + if op_name not in ["median", "var", "std", "skew"]: cmp_dtype = arr.dtype else: cmp_dtype = "float64[pyarrow]" @@ -582,7 +599,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna, request): op_name = all_numeric_reductions - if op_name == "skew": + if op_name == "skew" and pa_version_under20p0: if data.dtype._is_numeric: mark = pytest.mark.xfail(reason="skew not implemented") request.applymarker(mark)