diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index ae047939c6526..e9d4e45c07925 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -42,6 +42,7 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... +def is_pyarrow_array(obj: object) -> bool: ... def is_period(val: object) -> TypeGuard[Period]: ... def is_interval(val: object) -> TypeGuard[Interval]: ... def is_decimal(val: object) -> TypeGuard[Decimal]: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 80592d4f67c98..48819655f4d4c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -149,6 +149,16 @@ i8max = INT64_MAX u8max = UINT64_MAX +cdef bint PYARROW_INSTALLED = False + +try: + import pyarrow as pa + + PYARROW_INSTALLED = True +except ImportError: + pa = None + + @cython.wraparound(False) @cython.boundscheck(False) def memory_usage_of_objects(arr: object[:]) -> int64_t: @@ -1177,6 +1187,19 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1: ) +def is_pyarrow_array(obj): + """ + Return True if given object is a pyarrow Array or ChunkedArray. + + Returns + ------- + bool + """ + if PYARROW_INSTALLED: + return isinstance(obj, (pa.Array, pa.ChunkedArray)) + return False + + _TYPE_MAP = { "categorical": "categorical", "category": "categorical", diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index c9dc20cf93ddd..d2cd183f26173 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -355,7 +355,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal result[na_values] = libmissing.NA else: - if hasattr(scalars, "type"): + if lib.is_pyarrow_array(scalars): # pyarrow array; we cannot rely on the "to_numpy" check in # ensure_string_array because calling scalars.to_numpy would set # zero_copy_only to True which caused problems see GH#52076