-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API: Uses pd.NA in IntegerArray #29964
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
1eec965
f5f61ea
c569562
a8261a4
c8ff04f
cddc9df
9488d34
0d5aab8
fa61a6d
de2c6c6
60d7663
a4c4618
0a500be
1c716f3
67c8d51
22a2bc7
34de18e
78944d1
ffbe299
7abf40e
36d403d
f6b4062
945e8cd
04546f3
a493965
8fc8b3a
a49aa65
8ad166d
dd745c3
88fa412
0902eef
721a1ea
c658307
4f9d775
1244ef4
4a34b45
5293d87
39f225a
ea19b2d
fe2d98e
68fe155
f27a5c2
b97450b
5d62af8
2bf57d6
2f4e1cd
021dc7b
197f18b
259b779
c0cfef9
3183d53
4986d84
76806e9
64b4ccc
b39dc60
800158d
e5d6832
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
import numbers | ||
from typing import Type | ||
from typing import Any, Tuple, Type | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import lib | ||
from pandas._libs import lib, missing as libmissing | ||
from pandas.compat import set_function_name | ||
from pandas.util._decorators import cache_readonly | ||
|
||
|
@@ -43,7 +43,7 @@ class _IntegerDtype(ExtensionDtype): | |
name: str | ||
base = None | ||
type: Type | ||
na_value = np.nan | ||
na_value = libmissing.NA | ||
|
||
def __repr__(self) -> str: | ||
sign = "U" if self.is_unsigned_integer else "" | ||
|
@@ -267,6 +267,11 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin): | |
|
||
.. versionadded:: 0.24.0 | ||
|
||
.. versionchanged:: 1.0.0 | ||
|
||
Now uses :attr:`pandas.NA` as its missing value, rather | ||
than :attr:`numpy.nan`. | ||
|
||
.. warning:: | ||
|
||
IntegerArray is currently experimental, and its API or internal | ||
|
@@ -377,14 +382,28 @@ def __getitem__(self, item): | |
return self._data[item] | ||
return type(self)(self._data[item], self._mask[item]) | ||
|
||
def _coerce_to_ndarray(self): | ||
def _coerce_to_ndarray(self, dtype=None, na_value=libmissing.NA): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we'll want to make a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we already have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Jeff, the discussion about Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry |
||
""" | ||
coerce to an ndarary of object dtype | ||
""" | ||
|
||
# TODO(jreback) make this better | ||
data = self._data.astype(object) | ||
data[self._mask] = self._na_value | ||
if dtype is None: | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
dtype = object | ||
elif is_float_dtype(dtype) and na_value is libmissing.NA: | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# XXX: Do we want to implicitly treat NA as NaN here? | ||
# We should be deliberate in this decision. | ||
na_value = np.nan | ||
|
||
data = self._data.astype(dtype) | ||
|
||
if ( | ||
is_integer_dtype(dtype) | ||
and na_value is libmissing.NA | ||
and not self._mask.any() | ||
): | ||
return data | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
data[self._mask] = na_value | ||
return data | ||
|
||
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us | ||
|
@@ -394,7 +413,7 @@ def __array__(self, dtype=None): | |
the array interface, return my values | ||
We return an object array here to preserve our scalar values | ||
""" | ||
return self._coerce_to_ndarray() | ||
return self._coerce_to_ndarray(dtype=dtype) | ||
|
||
def __arrow_array__(self, type=None): | ||
""" | ||
|
@@ -510,7 +529,7 @@ def isna(self): | |
|
||
@property | ||
def _na_value(self): | ||
return np.nan | ||
return self.dtype.na_value | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
|
@@ -549,7 +568,7 @@ def astype(self, dtype, copy=True): | |
return type(self)(result, mask=self._mask, copy=False) | ||
|
||
# coerce | ||
data = self._coerce_to_ndarray() | ||
data = self._coerce_to_ndarray(dtype=dtype) | ||
return astype_nansafe(data, dtype, copy=None) | ||
|
||
@property | ||
|
@@ -604,12 +623,17 @@ def value_counts(self, dropna=True): | |
# w/o passing the dtype | ||
array = np.append(array, [self._mask.sum()]) | ||
index = Index( | ||
np.concatenate([index.values, np.array([np.nan], dtype=object)]), | ||
np.concatenate( | ||
[index.values, np.array([self.dtype.na_value], dtype=object)] | ||
), | ||
dtype=object, | ||
) | ||
|
||
return Series(array, index=index) | ||
|
||
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | ||
return self._coerce_to_ndarray(na_value=np.nan), np.nan | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def _values_for_argsort(self) -> np.ndarray: | ||
"""Return values for sorting. | ||
|
||
|
@@ -629,13 +653,13 @@ def _values_for_argsort(self) -> np.ndarray: | |
|
||
@classmethod | ||
def _create_comparison_method(cls, op): | ||
op_name = op.__name__ | ||
|
||
@unpack_zerodim_and_defer(op.__name__) | ||
def cmp_method(self, other): | ||
from pandas.arrays import BooleanArray | ||
|
||
mask = None | ||
|
||
if isinstance(other, IntegerArray): | ||
if isinstance(other, (BooleanArray, IntegerArray)): | ||
other, mask = other._data, other._mask | ||
|
||
elif is_list_like(other): | ||
|
@@ -660,8 +684,7 @@ def cmp_method(self, other): | |
else: | ||
mask = self._mask | mask | ||
|
||
result[mask] = op_name == "ne" | ||
return result | ||
return BooleanArray(result, mask) | ||
|
||
name = "__{name}__".format(name=op.__name__) | ||
return set_function_name(cmp_method, name, cls) | ||
|
@@ -673,7 +696,8 @@ def _reduce(self, name, skipna=True, **kwargs): | |
# coerce to a nan-aware float if needed | ||
if mask.any(): | ||
data = self._data.astype("float64") | ||
data[mask] = self._na_value | ||
# We explicitly use NaN within reductions. | ||
data[mask] = np.nan | ||
|
||
op = getattr(nanops, "nan" + name) | ||
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) | ||
|
@@ -784,6 +808,12 @@ def integer_arithmetic_method(self, other): | |
_dtype_docstring = """ | ||
An ExtensionDtype for {dtype} integer data. | ||
|
||
.. versionchanged:: 1.0.0 | ||
|
||
Now uses :attr:`pandas.NA` as its missing value, | ||
rather than :attr:`numpy.nan`. | ||
|
||
|
||
Attributes | ||
---------- | ||
None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
may want to add a versionchanged tag here (and below)