pandas-dev · jreback · Dec 30, 2019 · Dec 2, 2019 · Dec 2, 2019 · Dec 2, 2019
diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
@@ -15,14 +15,16 @@ Nullable integer data type
    IntegerArray is currently experimental. Its API or implementation may
    change without warning.
 
-
 In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent
 missing data. Because ``NaN`` is a float, this forces an array of integers with
 any missing values to become floating point. In some cases, this may not matter
 much. But if your integer column is, say, an identifier, casting to float can
 be problematic. Some integers cannot even be represented as floating point
 numbers.
 
+Construction
+------------
+
 Pandas can represent integer data with possibly missing values using
 :class:`arrays.IntegerArray`. This is an :ref:`extension types <extending.extension-types>`
 implemented within pandas.
@@ -39,6 +41,12 @@ NumPy's ``'int64'`` dtype:
 
    pd.array([1, 2, np.nan], dtype="Int64")
 
+All NA-like values are replaced with :attr:`pandas.NA`.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64")
+
 This array can be stored in a :class:`DataFrame` or :class:`Series` like any
 NumPy array.
 
@@ -78,6 +86,9 @@ with the dtype.
    In the future, we may provide an option for :class:`Series` to infer a
    nullable-integer dtype.
 
+Operations
+----------
+
 Operations involving an integer array will behave similar to NumPy arrays.
 Missing values will be propagated, and the data will be coerced to another
 dtype if needed.
@@ -123,3 +134,15 @@ Reduction and groupby operations such as 'sum' work as well.
 
    df.sum()
    df.groupby('B').A.sum()
+
+Scalar NA Value
+---------------
+
+:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar
+missing value. Slicing a single element that's missing will return
+:attr:`pandas.NA`
+
+.. ipython:: python
+
+   a = pd.array([1, None], dtype="Int64")
+   a[1]
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -6,6 +6,7 @@
 
 from pandas._libs import lib
 from pandas.compat import set_function_name
+from pandas.compat.numpy import function as nv
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import astype_nansafe
@@ -554,7 +555,6 @@ def _values_for_argsort(self) -> np.ndarray:
     @classmethod
     def _create_logical_method(cls, op):
         def logical_method(self, other):
-
             if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented
@@ -597,8 +597,11 @@ def _create_comparison_method(cls, op):
         op_name = op.__name__
 
         def cmp_method(self, other):
+            from pandas.arrays import IntegerArray
 
-            if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
+            if isinstance(
+                other, (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray)
+            ):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented
 
@@ -663,6 +666,16 @@ def _reduce(self, name, skipna=True, **kwargs):
 
         return result
 
+    def any(self, axis=None, out=None, keepdims=False, skipna=True):
+        # Note: needed to implement for
+        # pandas/tests/arrays/test_integer.py::test_preserve_dtypes[sum]
+        nv.validate_any((), dict(out=out, keepdims=keepdims))
+        return self._reduce("any", skipna=skipna)
+
+    def all(self, axis=None, out=None, keepdims=False, skipna=True):
+        nv.validate_any((), dict(out=out, keepdims=keepdims))
+        return self._reduce("all", skipna=skipna)
+
     def _maybe_mask_result(self, result, mask, other, op_name):
         """
         Parameters

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -1,10 +1,10 @@
 import numbers
-from typing import Type
+from typing import Any, Tuple, Type
 import warnings
 
 import numpy as np
 
-from pandas._libs import lib
+from pandas._libs import lib, missing as libmissing
 from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
@@ -43,7 +43,7 @@ class _IntegerDtype(ExtensionDtype):
     name: str
     base = None
     type: Type
-    na_value = np.nan
+    na_value = libmissing.NA
 
     def __repr__(self) -> str:
         sign = "U" if self.is_unsigned_integer else ""
@@ -267,6 +267,11 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
 
     .. versionadded:: 0.24.0
 
+    .. versionchanged:: 1.0.0
+
+       Now uses :attr:`pandas.NA` as its missing value, rather
+       than :attr:`numpy.nan`.
+
     .. warning::
 
        IntegerArray is currently experimental, and its API or internal
@@ -377,14 +382,28 @@ def __getitem__(self, item):
             return self._data[item]
         return type(self)(self._data[item], self._mask[item])
 
-    def _coerce_to_ndarray(self):
+    def _coerce_to_ndarray(self, dtype=None, na_value=libmissing.NA):
         """
         coerce to an ndarary of object dtype
         """
-
         # TODO(jreback) make this better
-        data = self._data.astype(object)
-        data[self._mask] = self._na_value
+        if dtype is None:
+            dtype = object
+        elif is_float_dtype(dtype) and na_value is libmissing.NA:
+            # XXX: Do we want to implicitly treat NA as NaN here?
+            # We should be deliberate in this decision.
+            na_value = np.nan
+
+        data = self._data.astype(dtype)
+
+        if (
+            is_integer_dtype(dtype)
+            and na_value is libmissing.NA
+            and not self._mask.any()
+        ):
+            return data
+        else:
+            data[self._mask] = na_value
         return data
 
     __array_priority__ = 1000  # higher than ndarray so ops dispatch to us
@@ -394,7 +413,7 @@ def __array__(self, dtype=None):
         the array interface, return my values
         We return an object array here to preserve our scalar values
         """
-        return self._coerce_to_ndarray()
+        return self._coerce_to_ndarray(dtype=dtype)
 
     def __arrow_array__(self, type=None):
         """
@@ -510,7 +529,7 @@ def isna(self):
 
     @property
     def _na_value(self):
-        return np.nan
+        return self.dtype.na_value
 
     @classmethod
     def _concat_same_type(cls, to_concat):
@@ -549,7 +568,7 @@ def astype(self, dtype, copy=True):
             return type(self)(result, mask=self._mask, copy=False)
 
         # coerce
-        data = self._coerce_to_ndarray()
+        data = self._coerce_to_ndarray(dtype=dtype)
         return astype_nansafe(data, dtype, copy=None)
 
     @property
@@ -604,12 +623,17 @@ def value_counts(self, dropna=True):
             # w/o passing the dtype
             array = np.append(array, [self._mask.sum()])
             index = Index(
-                np.concatenate([index.values, np.array([np.nan], dtype=object)]),
+                np.concatenate(
+                    [index.values, np.array([self.dtype.na_value], dtype=object)]
+                ),
                 dtype=object,
             )
 
         return Series(array, index=index)
 
+    def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
+        return self._coerce_to_ndarray(na_value=np.nan), np.nan
+
     def _values_for_argsort(self) -> np.ndarray:
         """Return values for sorting.
 
@@ -629,13 +653,13 @@ def _values_for_argsort(self) -> np.ndarray:
 
     @classmethod
     def _create_comparison_method(cls, op):
-        op_name = op.__name__
-
         @unpack_zerodim_and_defer(op.__name__)
         def cmp_method(self, other):
+            from pandas.arrays import BooleanArray
+
             mask = None
 
-            if isinstance(other, IntegerArray):
+            if isinstance(other, (BooleanArray, IntegerArray)):
                 other, mask = other._data, other._mask
 
             elif is_list_like(other):
@@ -660,8 +684,7 @@ def cmp_method(self, other):
             else:
                 mask = self._mask | mask
 
-            result[mask] = op_name == "ne"
-            return result
+            return BooleanArray(result, mask)
 
         name = "__{name}__".format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
@@ -673,7 +696,8 @@ def _reduce(self, name, skipna=True, **kwargs):
         # coerce to a nan-aware float if needed
         if mask.any():
             data = self._data.astype("float64")
-            data[mask] = self._na_value
+            # We explicitly use NaN within reductions.
+            data[mask] = np.nan
 
         op = getattr(nanops, "nan" + name)
         result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
@@ -784,6 +808,12 @@ def integer_arithmetic_method(self, other):
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.
 
+.. versionchanged:: 1.0.0
+
+   Now uses :attr:`pandas.NA` as its missing value,
+   rather than :attr:`numpy.nan`.
+
+
 Attributes
 ----------
 None