Closed
Description
It seems that Pandas' boolean dtype has some bug -- comparison with strings fails.
Code to reproduce (version 1.3.4)
import pandas as pd
s = pd.Series([True, False, pd.NA], dtype="boolean")
s == "X"
The code above fails with this error:
TypeError Traceback (most recent call last)
<ipython-input-41-f42702e3c612> in <module>
----> 1 a == "X"
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/ops/common.py in new_method(self, other)
67 other = item_from_zerodim(other)
68
---> 69 return method(self, other)
70
71 return new_method
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/arraylike.py in __eq__(self, other)
30 @unpack_zerodim_and_defer("__eq__")
31 def __eq__(self, other):
---> 32 return self._cmp_method(other, operator.eq)
33
34 @unpack_zerodim_and_defer("__ne__")
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
5500
5501 with np.errstate(all="ignore"):
-> 5502 res_values = ops.comparison_op(lvalues, rvalues, op)
5503
5504 return self._construct_result(res_values, name=res_name)
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
268 ):
269 # Call the method on lvalues
--> 270 res_values = op(lvalues, rvalues)
271
272 elif is_scalar(rvalues) and isna(rvalues):
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/ops/common.py in new_method(self, other)
67 other = item_from_zerodim(other)
68
---> 69 return method(self, other)
70
71 return new_method
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/arraylike.py in __eq__(self, other)
30 @unpack_zerodim_and_defer("__eq__")
31 def __eq__(self, other):
---> 32 return self._cmp_method(other, operator.eq)
33
34 @unpack_zerodim_and_defer("__ne__")
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/arrays/boolean.py in _cmp_method(self, other, op)
671 mask = self._mask | mask
672
--> 673 return BooleanArray(result, mask, copy=False)
674
675 def _arith_method(self, other, op):
~/virtenvs/mkl/lib/python3.9/site-packages/pandas/core/arrays/boolean.py in __init__(self, values, mask, copy)
297 def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
298 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
--> 299 raise TypeError(
300 "values should be boolean numpy array. Use "
301 "the 'pd.array' function instead"
TypeError: values should be boolean numpy array. Use the 'pd.array' function instead