-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Use new NA scalar in BooleanArray #29961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
import numbers | ||
from typing import TYPE_CHECKING, Type | ||
from typing import TYPE_CHECKING, Any, Tuple, Type | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import lib | ||
from pandas._libs import lib, missing as libmissing | ||
from pandas.compat import set_function_name | ||
|
||
from pandas.core.dtypes.base import ExtensionDtype | ||
|
@@ -61,13 +61,13 @@ class BooleanDtype(ExtensionDtype): | |
@property | ||
def na_value(self) -> "Scalar": | ||
""" | ||
BooleanDtype uses :attr:`numpy.nan` as the missing NA value. | ||
BooleanDtype uses :attr:`pd.NA` as the missing NA value. | ||
|
||
.. warning:: | ||
|
||
`na_value` may change in a future release. | ||
""" | ||
return np.nan | ||
return libmissing.NA | ||
|
||
@property | ||
def type(self) -> Type: | ||
|
@@ -223,7 +223,7 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin): | |
|
||
>>> pd.array([True, False, None], dtype="boolean") | ||
<BooleanArray> | ||
[True, False, NaN] | ||
[True, False, NA] | ||
Length: 3, dtype: boolean | ||
""" | ||
|
||
|
@@ -262,17 +262,17 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): | |
values, mask = coerce_to_array(scalars, copy=copy) | ||
return BooleanArray(values, mask) | ||
|
||
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: | ||
data = self._data.astype("int8") | ||
data[self._mask] = -1 | ||
return data, -1 | ||
|
||
@classmethod | ||
def _from_factorized(cls, values, original: "BooleanArray"): | ||
return cls._from_sequence(values, dtype=original.dtype) | ||
|
||
def _formatter(self, boxed=False): | ||
def fmt(x): | ||
if isna(x): | ||
return "NaN" | ||
return str(x) | ||
|
||
return fmt | ||
return str | ||
|
||
def __getitem__(self, item): | ||
if is_integer(item): | ||
|
@@ -281,7 +281,9 @@ def __getitem__(self, item): | |
return self._data[item] | ||
return type(self)(self._data[item], self._mask[item]) | ||
|
||
def _coerce_to_ndarray(self, force_bool: bool = False): | ||
def _coerce_to_ndarray( | ||
self, force_bool: bool = False, na_value: "Scalar" = lib._no_default | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason to prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, not sure. I was probably sidetracked by the idea I could not use None as default as that is a valid value as well .. |
||
): | ||
""" | ||
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True). | ||
|
||
|
@@ -290,6 +292,9 @@ def _coerce_to_ndarray(self, force_bool: bool = False): | |
force_bool : bool, default False | ||
If True, return bool array or raise error if not possible (in | ||
presence of missing values) | ||
na_value : scalar, optional | ||
Scalar missing value indicator to use in numpy array. Defaults | ||
to the native missing value indicator of this array (pd.NA). | ||
""" | ||
if force_bool: | ||
if not self.isna().any(): | ||
|
@@ -298,8 +303,10 @@ def _coerce_to_ndarray(self, force_bool: bool = False): | |
raise ValueError( | ||
"cannot convert to bool numpy array in presence of missing values" | ||
) | ||
if na_value is lib._no_default: | ||
na_value = self._na_value | ||
data = self._data.astype(object) | ||
data[self._mask] = self._na_value | ||
data[self._mask] = na_value | ||
return data | ||
|
||
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us | ||
|
@@ -483,8 +490,17 @@ def astype(self, dtype, copy=True): | |
return IntegerArray( | ||
self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False | ||
) | ||
# for integer, error if there are missing values | ||
if is_integer_dtype(dtype): | ||
if self.isna().any(): | ||
raise ValueError("cannot convert NA to integer") | ||
# for float dtype, ensure we use np.nan before casting (numpy cannot | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pending the discussion in #30038. |
||
# deal with pd.NA) | ||
na_value = lib._no_default | ||
if is_float_dtype(dtype): | ||
na_value = np.nan | ||
# coerce | ||
data = self._coerce_to_ndarray() | ||
data = self._coerce_to_ndarray(na_value=na_value) | ||
return astype_nansafe(data, dtype, copy=None) | ||
|
||
def value_counts(self, dropna=True): | ||
|
@@ -594,8 +610,6 @@ def logical_method(self, other): | |
|
||
@classmethod | ||
def _create_comparison_method(cls, op): | ||
op_name = op.__name__ | ||
|
||
def cmp_method(self, other): | ||
|
||
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): | ||
|
@@ -617,21 +631,26 @@ def cmp_method(self, other): | |
if len(self) != len(other): | ||
raise ValueError("Lengths must match to compare") | ||
|
||
# numpy will show a DeprecationWarning on invalid elementwise | ||
# comparisons, this will raise in the future | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings("ignore", "elementwise", FutureWarning) | ||
with np.errstate(all="ignore"): | ||
result = op(self._data, other) | ||
|
||
# nans propagate | ||
if mask is None: | ||
mask = self._mask | ||
if other is libmissing.NA: | ||
# numpy does not handle pd.NA well as "other" scalar (it returns | ||
# a scalar False instead of an array) | ||
result = np.zeros_like(self._data) | ||
mask = np.ones_like(self._data) | ||
else: | ||
mask = self._mask | mask | ||
# numpy will show a DeprecationWarning on invalid elementwise | ||
# comparisons, this will raise in the future | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings("ignore", "elementwise", FutureWarning) | ||
with np.errstate(all="ignore"): | ||
result = op(self._data, other) | ||
|
||
# nans propagate | ||
if mask is None: | ||
mask = self._mask.copy() | ||
else: | ||
mask = self._mask | mask | ||
|
||
result[mask] = op_name == "ne" | ||
return BooleanArray(result, np.zeros(len(result), dtype=bool), copy=False) | ||
return BooleanArray(result, mask, copy=False) | ||
|
||
name = "__{name}__".format(name=op.__name__) | ||
return set_function_name(cmp_method, name, cls) | ||
|
@@ -643,7 +662,7 @@ def _reduce(self, name, skipna=True, **kwargs): | |
# coerce to a nan-aware float if needed | ||
if mask.any(): | ||
data = self._data.astype("float64") | ||
data[mask] = self._na_value | ||
data[mask] = np.nan | ||
|
||
op = getattr(nanops, "nan" + name) | ||
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) | ||
|
Uh oh!
There was an error while loading. Please reload this page.