diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 894dc75e351a4..efd44b1bca3f2 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1,6 +1,7 @@ from __future__ import annotations from copy import deepcopy +import operator import re from typing import ( TYPE_CHECKING, @@ -51,6 +52,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core import roperator from pandas.core.arraylike import OpsMixin from pandas.core.arrays.base import ExtensionArray import pandas.core.common as com @@ -459,6 +461,29 @@ def _cmp_method(self, other, op): return BooleanArray(values, mask) def _evaluate_op_method(self, other, op, arrow_funcs): + pa_type = self._data.type + if (pa.types.is_string(pa_type) or pa.types.is_binary(pa_type)) and op in [ + operator.add, + roperator.radd, + ]: + length = self._data.length() + + seps: list[str] | list[bytes] + if pa.types.is_string(pa_type): + seps = [""] * length + else: + seps = [b""] * length + + if is_scalar(other): + other = [other] * length + elif isinstance(other, type(self)): + other = other._data + if op is operator.add: + result = pc.binary_join_element_wise(self._data, other, seps) + else: + result = pc.binary_join_element_wise(other, self._data, seps) + return type(self)(result) + pc_func = arrow_funcs[op.__name__] if pc_func is NotImplemented: raise NotImplementedError(f"{op.__name__} not implemented.") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 8f9bf83881d3e..adb86b568e891 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -96,15 +96,7 @@ def test_astype_roundtrip(dtype): tm.assert_series_equal(result, ser) -def test_add(dtype, request): - if dtype.storage == "pyarrow": - reason = ( - "unsupported operand type(s) for +: 'ArrowStringArray' and " - "'ArrowStringArray'" - ) - mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) - request.node.add_marker(mark) - +def test_add(dtype): a = pd.Series(["a", "b", "c", None, None], dtype=dtype) b = pd.Series(["x", "y", None, "z", None], dtype=dtype) @@ -140,12 +132,7 @@ def test_add_2d(dtype, request): s + b -def test_add_sequence(dtype, request): - if dtype.storage == "pyarrow": - reason = "unsupported operand type(s) for +: 'ArrowStringArray' and 'list'" - mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason) - request.node.add_marker(mark) - +def test_add_sequence(dtype): a = pd.array(["a", "b", None, None], dtype=dtype) other = ["x", None, "y", None] diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 1b1cbc8130e4d..2140a2e71eda9 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1013,6 +1013,10 @@ def _get_scalar_exception(self, opname, pa_dtype): exc = NotImplementedError elif arrow_temporal_supported: exc = None + elif opname in ["__add__", "__radd__"] and ( + pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype) + ): + exc = None elif not (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)): exc = pa.ArrowNotImplementedError else: @@ -1187,9 +1191,7 @@ def test_add_series_with_extension_array(self, data, request): return if (pa_version_under8p0 and pa.types.is_duration(pa_dtype)) or ( - pa.types.is_binary(pa_dtype) - or pa.types.is_string(pa_dtype) - or pa.types.is_boolean(pa_dtype) + pa.types.is_boolean(pa_dtype) ): request.node.add_marker( pytest.mark.xfail( diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index 88d928ceecc43..c439a5f006922 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -6,7 +6,6 @@ MultiIndex, Series, _testing as tm, - get_option, ) from pandas.core.strings.accessor import StringMethods @@ -124,16 +123,8 @@ def test_api_per_method( method(*args, **kwargs) -def test_api_for_categorical(any_string_method, any_string_dtype, request): +def test_api_for_categorical(any_string_method, any_string_dtype): # https://github.com/pandas-dev/pandas/issues/10661 - - if any_string_dtype == "string[pyarrow]" or ( - any_string_dtype == "string" and get_option("string_storage") == "pyarrow" - ): - # unsupported operand type(s) for +: 'ArrowStringArray' and 'str' - mark = pytest.mark.xfail(raises=NotImplementedError, reason="Not Implemented") - request.node.add_marker(mark) - s = Series(list("aabb"), dtype=any_string_dtype) s = s + " " + s c = s.astype("category")