Closed
Description
Code Sample, a copy-pastable example if possible
In [1]: import pandas as pd, numpy as np
# expected behaviour with ordinary dtype
In [2]: pd.Series([True, False], dtype=int)
Out[2]:
0 1
1 0
dtype: int64
# broken
In [3]: pd.Series([True, False], dtype=pd.Int64Dtype())
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure)
694 if is_integer_dtype(dtype):
--> 695 subarr = maybe_cast_to_integer_array(arr, dtype)
696
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in maybe_cast_to_integer_array(arr, dtype, copy)
1304 if not hasattr(arr, "astype"):
-> 1305 casted = np.array(arr, dtype=dtype, copy=copy)
1306 else:
TypeError: data type not understood
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-3-b747cfcdf17f> in <module>
----> 1 pd.Series([True, False], dtype=pd.Int64Dtype())
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
260 else:
261 data = sanitize_array(data, index, dtype, copy,
--> 262 raise_cast_failure=True)
263
264 data = SingleBlockManager(data, index, fastpath=True)
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
605 try:
606 subarr = _try_cast(data, False, dtype, copy,
--> 607 raise_cast_failure)
608 except Exception:
609 if raise_cast_failure: # pragma: no cover
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure)
714 # create an extension array from its dtype
715 array_type = dtype.construct_array_type()._from_sequence
--> 716 subarr = array_type(arr, dtype=dtype, copy=copy)
717 elif dtype is not None and raise_cast_failure:
718 raise
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/arrays/integer.py in _from_sequence(cls, scalars, dtype, copy)
301 @classmethod
302 def _from_sequence(cls, scalars, dtype=None, copy=False):
--> 303 return integer_array(scalars, dtype=dtype, copy=copy)
304
305 @classmethod
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/arrays/integer.py in integer_array(values, dtype, copy)
109 TypeError if incompatible types
110 """
--> 111 values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
112 return IntegerArray(values, mask)
113
/usr/local/anaconda3/lib/python3.7/site-packages/pandas/core/arrays/integer.py in coerce_to_array(values, dtype, mask, copy)
190 elif not (is_integer_dtype(values) or is_float_dtype(values)):
191 raise TypeError("{} cannot be converted to an IntegerDtype".format(
--> 192 values.dtype))
193
194 if mask is None:
TypeError: bool cannot be converted to an IntegerDtype
Problem description
Pandas is unable to convert array of bools to IntegerDtype, while conversion to int is supported. What's interesting, if an arrays contains NaNs, then conversion goes as expected.
In [4]: pd.Series([True, False, np.nan], dtype=pd.Int64Dtype())
Out[4]:
0 1
1 0
2 NaN
dtype: Int64
Expected Output
In [5]: pd.Series([True, False], dtype=pd.Int64Dtype())
Out[4]:
0 1
1 0
dtype: Int64
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
pandas: 0.24.1
pytest: 4.2.0
pip: 19.0.1
setuptools: 40.7.3
Cython: 0.29.4
numpy: 1.15.4
scipy: 1.2.0
pyarrow: None
xarray: None
IPython: 7.2.0
sphinx: 1.8.4
patsy: None
dateutil: 2.7.5
pytz: 2018.9
blosc: None
bottleneck: 1.2.1
tables: None
numexpr: None
feather: None
matplotlib: 3.0.2
openpyxl: 2.5.14
xlrd: 1.2.0
xlwt: 1.3.0
xlsxwriter: 1.1.2
lxml.etree: 4.3.0
bs4: 4.7.1
html5lib: 1.0.1
sqlalchemy: 1.2.17
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
gcsfs: None