pandas-dev · jorisvandenbossche · Dec 9, 2019 · Nov 25, 2019 · Nov 26, 2019 · Nov 26, 2019
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
@@ -73,6 +73,7 @@ See the :ref:`overview` for more detail about what's in the library.
   * :doc:`user_guide/missing_data`
   * :doc:`user_guide/categorical`
   * :doc:`user_guide/integer_na`
+  * :doc:`user_guide/boolean`
   * :doc:`user_guide/visualization`
   * :doc:`user_guide/computation`
   * :doc:`user_guide/groupby`

diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst
@@ -0,0 +1,77 @@
+.. currentmodule:: pandas
+
+.. ipython:: python
+   :suppress:
+
+   import pandas as pd
+   import numpy as np
+
+.. _boolean:
+
+**************************
+Nullable Boolean Data Type
+**************************
+
+.. versionadded:: 1.0.0
+
+.. _boolean.klean:
-.. _boolean.klean:
+.. _boolean.kleene:
-.. _boolean.klean:
+.. _boolean.kleene:
+
+Kleene Logic
+------------
+
+:class:`arrays.BooleanArray` implements Kleene logic (sometimes called three-value logic) for
+logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or).
+
+This table demonstrates the results for every combination. These operations are symmetrical,
+so flipping the left- and right-hand side makes no difference in the result.
+
+================= =========
+Expression        Result
+================= =========
+``True & True``   ``True``
+``True & False``  ``False``
+``True & NA``     ``NA``
+``False & False`` ``False``
+``False & NA``    ``False``
+``NA & NA``       ``NA``
+``True | True``   ``True``
+``True | False``  ``True``
+``True | NA``     ``True``
+``False | False`` ``False``
+``False | NA``    ``NA``
+``NA | NA``       ``NA``
+``True ^ True``   ``False``
+``True ^ False``  ``True``
+``True ^ NA``     ``NA``
+``False ^ False`` ``False``
+``False ^ NA``    ``NA``
+``NA ^ NA``       ``NA``
+================= =========
+
+When an ``NA`` is present in an operation, the output value is ``NA`` only if
+the result cannot be determined soley based on the other input. For example,
+``True | NA`` is ``True``, because both ``True | True`` and ``True | False``
+are ``True``. In that case, we don't actually need to consider the value
+of the ``NA``.
+
+On the other hand, ``True & NA`` is ``NA``. The result depends on whether
+the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``,
+but ``True & False`` is ``False``, so we can't determine the output.
+
+
+This differs from how ``np.nan`` behaves in logical operations. Pandas treated
+``np.nan`` is *always false in the output*.
+
+In ``or``
+
+.. ipython:: python
+
+   pd.Series([True, False, np.nan], dtype="object") | True
+   pd.Series([True, False, np.nan], dtype="boolean") | True
+
+In ``and``
+
+.. ipython:: python
+
+   pd.Series([True, False, np.nan], dtype="object") & True
+   pd.Series([True, False, np.nan], dtype="boolean") & True
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -1,5 +1,5 @@
 import numbers
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING, Optional, Type, Union
 import warnings
 
 import numpy as np
@@ -184,6 +184,9 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
     represented by 2 numpy arrays: a boolean array with the data and
     a boolean array with the mask (True indicating missing).
 
+    BooleanArray implements Kleene logic (sometimes called three-value
+    logic) for logical operations. See :ref:`` for more.
+
     To construct an BooleanArray from generic array-like input, use
     :func:`pandas.array` specifying ``dtype="boolean"`` (see examples
     below).
@@ -283,7 +286,7 @@ def __getitem__(self, item):
 
     def _coerce_to_ndarray(self, force_bool: bool = False):
         """
-        Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).
+        Coerce to an ndarray of object dtype or bool dtype (if force_bool=True).
 
         Parameters
         ----------
@@ -559,35 +562,34 @@ def logical_method(self, other):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented
 
+            assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
             other = lib.item_from_zerodim(other)
+            other_is_booleanarray = isinstance(other, BooleanArray)
+            other_is_scalar = lib.is_scalar(other)
             mask = None
 
-            if isinstance(other, BooleanArray):
+            if other_is_booleanarray:
                 other, mask = other._data, other._mask
             elif is_list_like(other):
                 other = np.asarray(other, dtype="bool")
                 if other.ndim > 1:
                     raise NotImplementedError(
                         "can only perform ops with 1-d structures"
                     )
-                if len(self) != len(other):
-                    raise ValueError("Lengths must match to compare")
                 other, mask = coerce_to_array(other, copy=False)
 
-            # numpy will show a DeprecationWarning on invalid elementwise
-            # comparisons, this will raise in the future
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", "elementwise", FutureWarning)
-                with np.errstate(all="ignore"):
-                    result = op(self._data, other)
-
-            # nans propagate
-            if mask is None:
-                mask = self._mask
-            else:
-                mask = self._mask | mask
+            if not other_is_scalar and len(self) != len(other):
+                raise ValueError("Lengths must match to compare")
 
-            return BooleanArray(result, mask)
+            if op.__name__ in {"or_", "ror_"}:
+                result, mask = kleene_or(self._data, other, self._mask, mask)
+                return BooleanArray(result, mask)
+            elif op.__name__ in {"and_", "rand_"}:
+                result, mask = kleene_and(self._data, other, self._mask, mask)
+                return BooleanArray(result, mask)
+            elif op.__name__ in {"xor", "rxor"}:
+                result, mask = kleene_xor(self._data, other, self._mask, mask)
+                return BooleanArray(result, mask)
 
         name = "__{name}__".format(name=op.__name__)
         return set_function_name(logical_method, name, cls)
@@ -740,6 +742,171 @@ def boolean_arithmetic_method(self, other):
         return set_function_name(boolean_arithmetic_method, name, cls)
 
 
+def kleene_or(
+    left: Union[bool, np.ndarray],
+    right: Union[bool, np.ndarray],
+    left_mask: Optional[np.ndarray],
+    right_mask: Optional[np.ndarray],
+):
+    """
+    Boolean ``or`` using Kleene logic.
+
+    Values are NA where we have ``NA | NA`` or ``NA | False``.
+    ``NA | True`` is considered True.
+
+    Parameters
+    ----------
+    left, right : ndarray, NA, or bool
+        The values of the array.
+    left_mask, right_mask : ndarray, optional
+        The masks. When
+
+    Returns
+    -------
+    result, mask: ndarray[bool]
+        The result of the logical or, and the new mask.
+    """
+    # To reduce the number of cases, we ensure that `left` & `left_mask`
+    # always come from an array, not a scalar. This is safe, since because
+    # A | B == B | A
+    if left_mask is None:
+        return kleene_or(right, left, right_mask, left_mask)
+
+    raise_for_nan(right, method="or")
+
+    mask = left_mask
+
+    if right_mask is not None:
+        mask = mask | right_mask
+    else:
+        mask = mask.copy()
+
+    # handle scalars:
+    # if right_is_scalar and right is libmissing.NA:
+    #     result = left.copy()
+    #     mask = left_mask.copy()
+    #     mask[~result] = True
+    #     return result, mask
+
+    result = left | right
+    mask[left & ~left_mask] = False
+    if right_mask is not None:
+        mask[right & ~right_mask] = False
+    elif right is True:
+        mask[:] = False
+
+    # update
+    return result, mask
+
+
+def kleene_xor(
+    left: Union[bool, np.ndarray],
+    right: Union[bool, np.ndarray],
+    left_mask: Optional[np.ndarray],
+    right_mask: Optional[np.ndarray],
+):
+    """
+    Boolean ``xor`` using Kleene logic.
+
+    This is the same as ``or``, with the following adjustments
+
+    * True, True -> False
+    * True, NA   -> NA
+
+    Parameters
+    ----------
+    left, right : ndarray, NA, or bool
+        The values of the array.
+    left_mask, right_mask : ndarray, optional
+        The masks. When
+
+    Returns
+    -------
+    result, mask: ndarray[bool]
+        The result of the logical xor, and the new mask.
+    """
+    if left_mask is None:
+        return kleene_xor(right, left, right_mask, left_mask)
+
+    raise_for_nan(right, method="xor")
+    # Re-use or, and update with adjustments.
+    result, mask = kleene_or(left, right, left_mask, right_mask)
+
+    # # TODO(pd.NA): change to pd.NA
+    # if lib.is_scalar(right) and right is libmissing.NA:
+    #     # True | NA == True
+    #     # True ^ NA == NA
+    #     mask[result] = True
+
+    result[left & right] = False
+    mask[right & left_mask] = True
+    if right_mask is not None:
+        mask[left & right_mask] = True
+
+    result[mask] = False
+    return result, mask
+
+
+def kleene_and(
+    left: Union[bool, np.ndarray],
+    right: Union[bool, np.ndarray],
+    left_mask: Optional[np.ndarray],
+    right_mask: Optional[np.ndarray],
+):
+    """
+    Boolean ``and`` using Kleene logic.
+
+    Values are ``NA`` for ``NA & NA`` or ``True & NA``.
+
+    Parameters
+    ----------
+    left, right : ndarray, NA, or bool
+        The values of the array.
+    left_mask, right_mask : ndarray, optional
+        The masks. When
+
+    Returns
+    -------
+    result, mask: ndarray[bool]
+        The result of the logical xor, and the new mask.
+    """
+    if left_mask is None:
+        return kleene_and(right, left, right_mask, left_mask)
+
+    raise_for_nan(right, method="and")
+    mask = left_mask
+
+    if right_mask is not None:
+        mask = mask | right_mask
+    else:
+        mask = mask.copy()
+
+    if lib.is_scalar(right):
+        result = left.copy()
+        mask = left_mask.copy()
+        if np.isnan(right):
+            # TODO(pd.NA): change to NA
+            mask[result] = True
+        else:
+            result = result & right  # already copied.
+            if right is False:
+                # unmask everything
+                mask[:] = False
+    else:
+        result = left & right
+        # unmask where either left or right is False
+        mask[~left & ~left_mask] = False
+        mask[~right & ~right_mask] = False
+
+    result[mask] = False
+    return result, mask
+
+
+def raise_for_nan(value, method):
+    if lib.is_scalar(value) and isinstance(value, float) and np.isnan(value):
+        raise ValueError(f"Cannot perform logical '{method}' with NaN")
+
+
 BooleanArray._add_logical_ops()
 BooleanArray._add_comparison_ops()
 BooleanArray._add_arithmetic_ops()