Skip to content

Commit 1a57d15

Browse files
authored
BUG: read_csv with dtype=bool[pyarrow] (#53391)
* BUG: read_csv with dtype=bool[pyarrow] * Use existing test instead * Clarify whatsnew * Move to 2.0.3
1 parent 05ee112 commit 1a57d15

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

doc/source/whatsnew/v2.0.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Fixed regressions
2121

2222
Bug fixes
2323
~~~~~~~~~
24-
-
24+
- Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`)
2525

2626
.. ---------------------------------------------------------------------------
2727
.. _whatsnew_203.other:

pandas/_libs/parsers.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ from pandas.core.dtypes.dtypes import (
111111
)
112112
from pandas.core.dtypes.inference import is_dict_like
113113

114+
from pandas.core.arrays.boolean import BooleanDtype
115+
114116
cdef:
115117
float64_t INF = <float64_t>np.inf
116118
float64_t NEGINF = -INF
@@ -1194,7 +1196,9 @@ cdef class TextReader:
11941196
array_type = dtype.construct_array_type()
11951197
try:
11961198
# use _from_sequence_of_strings if the class defines it
1197-
if dtype.kind == "b":
1199+
if isinstance(dtype, BooleanDtype):
1200+
# xref GH 47534: BooleanArray._from_sequence_of_strings has extra
1201+
# kwargs
11981202
true_values = [x.decode() for x in self.true_values]
11991203
false_values = [x.decode() for x in self.false_values]
12001204
result = array_type._from_sequence_of_strings(

pandas/io/parsers/base_parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
FloatingArray,
7676
IntegerArray,
7777
)
78+
from pandas.core.arrays.boolean import BooleanDtype
7879
from pandas.core.indexes.api import (
7980
Index,
8081
MultiIndex,
@@ -809,7 +810,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
809810
elif isinstance(cast_type, ExtensionDtype):
810811
array_type = cast_type.construct_array_type()
811812
try:
812-
if is_bool_dtype(cast_type):
813+
if isinstance(cast_type, BooleanDtype):
813814
# error: Unexpected keyword argument "true_values" for
814815
# "_from_sequence_of_strings" of "ExtensionArray"
815816
return array_type._from_sequence_of_strings( # type: ignore[call-arg] # noqa: E501

pandas/tests/extension/test_arrow.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
import pandas as pd
4949
import pandas._testing as tm
50+
from pandas.api.extensions import no_default
5051
from pandas.api.types import (
5152
is_bool_dtype,
5253
is_float_dtype,
@@ -723,14 +724,11 @@ def test_setitem_preserves_views(self, data):
723724

724725

725726
class TestBaseParsing(base.BaseParsingTests):
727+
@pytest.mark.parametrize("dtype_backend", ["pyarrow", no_default])
726728
@pytest.mark.parametrize("engine", ["c", "python"])
727-
def test_EA_types(self, engine, data, request):
729+
def test_EA_types(self, engine, data, dtype_backend, request):
728730
pa_dtype = data.dtype.pyarrow_dtype
729-
if pa.types.is_boolean(pa_dtype):
730-
request.node.add_marker(
731-
pytest.mark.xfail(raises=TypeError, reason="GH 47534")
732-
)
733-
elif pa.types.is_decimal(pa_dtype):
731+
if pa.types.is_decimal(pa_dtype):
734732
request.node.add_marker(
735733
pytest.mark.xfail(
736734
raises=NotImplementedError,
@@ -755,7 +753,10 @@ def test_EA_types(self, engine, data, request):
755753
else:
756754
csv_output = StringIO(csv_output)
757755
result = pd.read_csv(
758-
csv_output, dtype={"with_dtype": str(data.dtype)}, engine=engine
756+
csv_output,
757+
dtype={"with_dtype": str(data.dtype)},
758+
engine=engine,
759+
dtype_backend=dtype_backend,
759760
)
760761
expected = df
761762
self.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)