pandas-dev · mroeschke · May 31, 2023 · May 25, 2023 · May 25, 2023 · May 25, 2023
diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst
@@ -31,6 +31,7 @@ Bug fixes
 - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
 - Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
 - Bug in :func:`read_csv` raising ``OverflowError`` for ``engine="pyarrow"`` and ``parse_dates`` set (:issue:`53295`)
+- Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` (:issue:`53390`)
 - Bug in :func:`to_datetime` was inferring format to contain ``"%H"`` instead of ``"%I"`` if date contained "AM" / "PM" tokens (:issue:`53147`)
 - Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
 - Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -111,6 +111,8 @@ from pandas.core.dtypes.dtypes import (
 )
 from pandas.core.dtypes.inference import is_dict_like
 
+from pandas.core.arrays.boolean import BooleanDtype
+
 cdef:
     float64_t INF = <float64_t>np.inf
     float64_t NEGINF = -INF
@@ -1194,7 +1196,9 @@ cdef class TextReader:
             array_type = dtype.construct_array_type()
             try:
                 # use _from_sequence_of_strings if the class defines it
-                if dtype.kind == "b":
+                if isinstance(dtype, BooleanDtype):
+                    # xref GH 47534: BooleanArray._from_sequence_of_strings has extra
+                    # kwargs
                     true_values = [x.decode() for x in self.true_values]
                     false_values = [x.decode() for x in self.false_values]
                     result = array_type._from_sequence_of_strings(

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -75,6 +75,7 @@
     FloatingArray,
     IntegerArray,
 )
+from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.indexes.api import (
     Index,
     MultiIndex,
@@ -809,7 +810,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
         elif isinstance(cast_type, ExtensionDtype):
             array_type = cast_type.construct_array_type()
             try:
-                if is_bool_dtype(cast_type):
+                if isinstance(cast_type, BooleanDtype):
                     # error: Unexpected keyword argument "true_values" for
                     # "_from_sequence_of_strings" of "ExtensionArray"
                     return array_type._from_sequence_of_strings(  # type: ignore[call-arg]  # noqa: E501

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -538,3 +538,17 @@ def test_ea_int_avoid_overflow(all_parsers):
         }
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_bool_pyarrow(all_parsers):
+    # GH 53390
+    pytest.importorskip("pyarrow")
+    parser = all_parsers
+    data = """col
+True
+False
+True
+"""
+    result = parser.read_csv(StringIO(data), dtype={"col": "bool[pyarrow]"})
+    expected = DataFrame({"col": [True, False, True]}, dtype="bool[pyarrow]")
+    tm.assert_frame_equal(result, expected)