-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Implement Kleene logic for BooleanArray #29842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
bb904cb
13c7ea3
fff786f
4067e7f
708c553
c56894e
2e9d547
373aaab
7f78a64
36b171b
747e046
d0a8cca
fe061b0
9f9e44c
0a34257
2ba0034
2d1129a
a24fc22
77dd1fc
7b9002c
c18046b
1237caa
2ecf9b8
87aeb09
969b6dc
1c9ba49
8eec954
cb47b6a
2a946b9
efb6f8b
004238e
5a2c81c
7032318
bbb7f9b
ce763b4
5bc5328
457bd08
31c2bc6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
.. currentmodule:: pandas | ||
|
||
.. _boolean: | ||
|
||
************************** | ||
Nullable Boolean Data Type | ||
************************** | ||
|
||
.. versionadded:: 1.0.0 | ||
|
||
.. _boolean.klean: | ||
|
||
Kleene Logic | ||
------------ | ||
|
||
:class:`arrays.BooleanArray` implements Kleene logic (sometime called three-value logic) for | ||
logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). | ||
|
||
Here's a table for ``and``. | ||
|
||
========== =========== ============ | ||
left value right value output value | ||
========== =========== ============ | ||
True True True | ||
True False False | ||
True NA NA | ||
False False False | ||
False NA False | ||
NA NA NA | ||
========== =========== ============ | ||
|
||
|
||
And for ``or`` | ||
|
||
========== =========== ============ | ||
left value right value output value | ||
========== =========== ============ | ||
True True True | ||
True False True | ||
True NA True | ||
False False False | ||
False NA NA | ||
NA NA NA | ||
========== =========== ============ | ||
|
||
And for ``xor`` | ||
|
||
========== =========== ============ | ||
left value right value output value | ||
========== =========== ============ | ||
True True False | ||
True False True | ||
True NA NA | ||
False False False | ||
False NA NA | ||
NA NA NA | ||
========== =========== ============ | ||
|
||
When an ``NA`` is present in an operation, the output value is ``NA`` only if | ||
the result cannot be determined soley based on the other input. For example, | ||
``True | NA`` is ``True``, because both ``True | True`` and ``True | False`` | ||
are ``True``. In that case, we don't actually need to consider the value | ||
of the ``NA``. | ||
|
||
On the other hand, ``True & NA`` is ``NA``. The result depends on whether | ||
the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``, | ||
but ``True & False`` is ``False``, so we can't determine the output. | ||
|
||
|
||
This differs from how ``np.nan`` behaves in logical operations. Pandas treated | ||
``np.nan`` is *always false in the output*. | ||
|
||
In ``or`` | ||
|
||
.. ipython:: python | ||
|
||
pd.Series([True, False, np.nan], dtype="object") | True | ||
pd.Series([True, False, np.nan], dtype="boolean") | True | ||
|
||
In ``and`` | ||
|
||
pd.Series([True, False, np.nan], dtype="object") & True | ||
pd.Series([True, False, np.nan], dtype="boolean") & True |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -391,13 +391,101 @@ def test_scalar(self, data, all_logical_operators): | |
|
||
def test_array(self, data, all_logical_operators): | ||
op_name = all_logical_operators | ||
if "or" in op_name: | ||
pytest.skip("confusing") | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
other = pd.array([True] * len(data), dtype="boolean") | ||
self._compare_other(data, op_name, other) | ||
other = np.array([True] * len(data)) | ||
self._compare_other(data, op_name, other) | ||
other = pd.Series([True] * len(data), dtype="boolean") | ||
self._compare_other(data, op_name, other) | ||
|
||
def test_kleene_or(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A careful review of these new test cases would be greatly appreciated. I've tried to make them as clear as possible, while covering all the cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went through the tests, very clear, added a few comments, for the rest looks good to me! |
||
# A clear test of behavior. | ||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||
b = pd.array([True, False, None] * 3, dtype="boolean") | ||
result = a | b | ||
expected = pd.array( | ||
[True, True, True, True, False, None, True, None, None], dtype="boolean" | ||
) | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = b | a | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
def test_kleene_or_scalar(self): | ||
a = pd.array([True, False, None], dtype="boolean") | ||
result = a | np.nan # TODO: pd.NA | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
expected = pd.array([True, None, None], dtype="boolean") | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = np.nan | a # TODO: pd.NA | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@pytest.mark.parametrize( | ||
"left,right,expected", | ||
[ | ||
([True, False, None], True, [True, True, True]), | ||
([True, False, None], False, [True, False, None]), | ||
([True, False, None], np.nan, [True, None, None]), | ||
# TODO: pd.NA | ||
], | ||
) | ||
def test_kleene_or_cases(self, left, right, expected): | ||
if isinstance(left, list): | ||
left = pd.array(left, dtype="boolean") | ||
if isinstance(right, list): | ||
right = pd.array(right, dtype="boolean") | ||
expected = pd.array(expected, dtype="boolean") | ||
result = left | right | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = right | left | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
def test_kleene_and(self): | ||
# A clear test of behavior. | ||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||
b = pd.array([True, False, None] * 3, dtype="boolean") | ||
result = a & b | ||
expected = pd.array( | ||
[True, False, None, False, False, False, None, False, None], dtype="boolean" | ||
) | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = b & a | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
def test_kleene_and_scalar(self): | ||
a = pd.array([True, False, None], dtype="boolean") | ||
result = a & np.nan # TODO: pd.NA | ||
expected = pd.array([None, None, None], dtype="boolean") | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = np.nan & a # TODO: pd.na | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
def test_kleene_xor(self): | ||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||
b = pd.array([True, False, None] * 3, dtype="boolean") | ||
result = a ^ b | ||
expected = pd.array( | ||
[False, True, None, True, False, None, None, None, None], dtype="boolean" | ||
) | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = b ^ a | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
def test_kleene_scalar(self): | ||
a = pd.array([True, False, None], dtype="boolean") | ||
result = a ^ np.nan # TODO: pd.NA | ||
expected = pd.array([None, None, None], dtype="boolean") | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
result = np.nan ^ a # TODO: pd.NA | ||
tm.assert_extension_array_equal(result, expected) | ||
|
||
|
||
class TestComparisonOps(BaseOpsUtil): | ||
def _compare_other(self, data, op_name, other): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is "ref" here a placeholder?