diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 9c867544a324b..023cb68300433 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -34,6 +34,7 @@ Bug fixes - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`) - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) - Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`) +- Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`) - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b60d29aff6991..1f99f2494527b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -55,6 +55,7 @@ ExtensionArray, ExtensionArraySupportsAnyAll, ) +from pandas.core.arrays.masked import BaseMaskedArray from pandas.core.arrays.string_ import StringDtype import pandas.core.common as com from pandas.core.indexers import ( @@ -450,6 +451,9 @@ def _cmp_method(self, other, op): result = pc_func(self._pa_array, other._pa_array) elif isinstance(other, (np.ndarray, list)): result = pc_func(self._pa_array, other) + elif isinstance(other, BaseMaskedArray): + # GH 52625 + result = pc_func(self._pa_array, other.__arrow_array__()) elif is_scalar(other): try: result = pc_func(self._pa_array, pa.scalar(other)) @@ -497,6 +501,9 @@ def _evaluate_op_method(self, other, op, arrow_funcs): result = pc_func(self._pa_array, other._pa_array) elif isinstance(other, (np.ndarray, list)): result = pc_func(self._pa_array, pa.array(other, from_pandas=True)) + elif isinstance(other, BaseMaskedArray): + # GH 52625 + result = pc_func(self._pa_array, other.__arrow_array__()) elif is_scalar(other): if isna(other) and op.__name__ in ARROW_LOGICAL_FUNCS: # pyarrow kleene ops require null to be typed diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7e4532b1ee326..9bf266267a7bd 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -21,6 +21,7 @@ BytesIO, StringIO, ) +import operator import pickle import re @@ -1216,7 +1217,7 @@ def test_add_series_with_extension_array(self, data, request): class TestBaseComparisonOps(base.BaseComparisonOpsTests): - def test_compare_array(self, data, comparison_op, na_value, request): + def test_compare_array(self, data, comparison_op, na_value): ser = pd.Series(data) # pd.Series([ser.iloc[0]] * len(ser)) may not return ArrowExtensionArray # since ser.iloc[0] is a python scalar @@ -1255,6 +1256,20 @@ def test_invalid_other_comp(self, data, comparison_op): ): comparison_op(data, object()) + @pytest.mark.parametrize("masked_dtype", ["boolean", "Int64", "Float64"]) + def test_comp_masked_numpy(self, masked_dtype, comparison_op): + # GH 52625 + data = [1, 0, None] + ser_masked = pd.Series(data, dtype=masked_dtype) + ser_pa = pd.Series(data, dtype=f"{masked_dtype.lower()}[pyarrow]") + result = comparison_op(ser_pa, ser_masked) + if comparison_op in [operator.lt, operator.gt, operator.ne]: + exp = [False, False, None] + else: + exp = [True, True, None] + expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_())) + tm.assert_series_equal(result, expected) + class TestLogicalOps: """Various Series and DataFrame logical ops methods.""" @@ -1399,6 +1414,23 @@ def test_kleene_xor_scalar(self, other, expected): a, pd.Series([True, False, None], dtype="boolean[pyarrow]") ) + @pytest.mark.parametrize( + "op, exp", + [ + ["__and__", True], + ["__or__", True], + ["__xor__", False], + ], + ) + def test_logical_masked_numpy(self, op, exp): + # GH 52625 + data = [True, False, None] + ser_masked = pd.Series(data, dtype="boolean") + ser_pa = pd.Series(data, dtype="boolean[pyarrow]") + result = getattr(ser_pa, op)(ser_masked) + expected = pd.Series([exp, False, None], dtype=ArrowDtype(pa.bool_())) + tm.assert_series_equal(result, expected) + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"):