diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 8b4833f6ce043..e7bfda82494a3 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`) - Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`) - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`) +- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`) - Fixed regression in :meth:`Series.value_counts` raising for numeric data if ``bins`` was specified (:issue:`54857`) - Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5e22b774fb880..1d74bb8b83e4e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1000,7 +1000,7 @@ def duplicated( duplicated : ndarray[bool] """ if hasattr(values, "dtype"): - if isinstance(values.dtype, ArrowDtype): + if isinstance(values.dtype, ArrowDtype) and values.dtype.kind in "ifub": values = values._to_masked() # type: ignore[union-attr] if isinstance(values.dtype, BaseMaskedDtype): diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 324ab1204e16e..10b2e98586365 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import ( Categorical, Series, @@ -256,3 +257,11 @@ def test_duplicated_arrow_dtype(self): result = ser.drop_duplicates() expected = Series([True, False, None], dtype="bool[pyarrow]") tm.assert_series_equal(result, expected) + + def test_drop_duplicates_arrow_strings(self): + # GH#54904 + pa = pytest.importorskip("pyarrow") + ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string())) + result = ser.drop_duplicates() + expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string())) + tm.assert_series_equal(result, expecetd)