diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 73779d7e4cc74..a25999d8085cb 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -20,7 +20,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) .. --------------------------------------------------------------------------- .. _whatsnew_203.other: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a45299c8ba896..8b23c80afee31 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -111,6 +111,8 @@ from pandas.core.dtypes.dtypes import ( ) from pandas.core.dtypes.inference import is_dict_like +from pandas.core.arrays.boolean import BooleanDtype + cdef: float64_t INF = np.inf float64_t NEGINF = -INF @@ -1194,7 +1196,9 @@ cdef class TextReader: array_type = dtype.construct_array_type() try: # use _from_sequence_of_strings if the class defines it - if dtype.kind == "b": + if isinstance(dtype, BooleanDtype): + # xref GH 47534: BooleanArray._from_sequence_of_strings has extra + # kwargs true_values = [x.decode() for x in self.true_values] false_values = [x.decode() for x in self.false_values] result = array_type._from_sequence_of_strings( diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index cb8cccebb6978..f79b7ebe7efef 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -75,6 +75,7 @@ FloatingArray, IntegerArray, ) +from pandas.core.arrays.boolean import BooleanDtype from pandas.core.indexes.api import ( Index, MultiIndex, @@ -809,7 +810,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi elif isinstance(cast_type, ExtensionDtype): array_type = cast_type.construct_array_type() try: - if is_bool_dtype(cast_type): + if isinstance(cast_type, BooleanDtype): # error: Unexpected keyword argument "true_values" for # "_from_sequence_of_strings" of "ExtensionArray" return array_type._from_sequence_of_strings( # type: ignore[call-arg] # noqa: E501 diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9129e84700a55..a0d15c70b5720 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -47,6 +47,7 @@ import pandas as pd import pandas._testing as tm +from pandas.api.extensions import no_default from pandas.api.types import ( is_bool_dtype, is_float_dtype, @@ -723,14 +724,11 @@ def test_setitem_preserves_views(self, data): class TestBaseParsing(base.BaseParsingTests): + @pytest.mark.parametrize("dtype_backend", ["pyarrow", no_default]) @pytest.mark.parametrize("engine", ["c", "python"]) - def test_EA_types(self, engine, data, request): + def test_EA_types(self, engine, data, dtype_backend, request): pa_dtype = data.dtype.pyarrow_dtype - if pa.types.is_boolean(pa_dtype): - request.node.add_marker( - pytest.mark.xfail(raises=TypeError, reason="GH 47534") - ) - elif pa.types.is_decimal(pa_dtype): + if pa.types.is_decimal(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=NotImplementedError, @@ -755,7 +753,10 @@ def test_EA_types(self, engine, data, request): else: csv_output = StringIO(csv_output) result = pd.read_csv( - csv_output, dtype={"with_dtype": str(data.dtype)}, engine=engine + csv_output, + dtype={"with_dtype": str(data.dtype)}, + engine=engine, + dtype_backend=dtype_backend, ) expected = df self.assert_frame_equal(result, expected)