-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Implement NA.__array_ufunc__ #30245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement NA.__array_ufunc__ #30245
Changes from all commits
bf8680f
0c69bd0
46f2327
075d58a
b72dd1c
0371cf4
878ef70
97af2e9
f175a34
f2ac945
0f4e121
fe04554
cf9ac10
72e2b67
8d90e9d
6a2fc68
db4cc40
7b1585a
b79e07f
567c584
b27470d
c7c9184
a0dbca8
ce209f9
e4ecadb
8d2763d
f68e178
d8c23e9
4c30bb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ from pandas._libs.tslibs.np_datetime cimport ( | |
get_timedelta64_value, get_datetime64_value) | ||
from pandas._libs.tslibs.nattype cimport ( | ||
checknull_with_nat, c_NaT as NaT, is_null_datetimelike) | ||
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op | ||
|
||
from pandas.compat import is_platform_32bit | ||
|
||
|
@@ -290,16 +291,29 @@ cdef inline bint is_null_period(v): | |
# Implementation of NA singleton | ||
|
||
|
||
def _create_binary_propagating_op(name, divmod=False): | ||
def _create_binary_propagating_op(name, is_divmod=False): | ||
|
||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def method(self, other): | ||
if (other is C_NA or isinstance(other, str) | ||
or isinstance(other, (numbers.Number, np.bool_))): | ||
if divmod: | ||
or isinstance(other, (numbers.Number, np.bool_)) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
or isinstance(other, np.ndarray) and not other.shape): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems a bit strange, but I suppose it is to follow numpy behaviour of 0-dim arrays returning scalars from comparison operations? (if so, maybe add a comment about that) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's to NumPy scalars. Without this, we'd have np.int64(1) == pd.NA raise with > out[:] = NA
E IndexError: too many indices for array There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A numpy scalar (which is different from a 0-dim array) is not a ndarray, so wouldn't pass the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could call lib.item_from_zerodim at the top? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger can you check this thread? I still don't understand how this relates to scalars, as they shouldn't pass the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index 8d4d2c5568..e2bb6448cd 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -295,8 +295,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
def method(self, other):
if (other is C_NA or isinstance(other, str)
- or isinstance(other, (numbers.Number, np.bool_))
- or isinstance(other, np.ndarray) and not other.shape):
+ or isinstance(other, (numbers.Number, np.bool_))):
# Need the other.shape clause to handle NumPy scalars,
# since we do a setitem on `out` below, which
# won't work for NumPy scalars. I have In [3]: np.int64(1) == pd.NA
/Users/taugspurger/.virtualenvs/pandas-dev/bin/ipython:1: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
#!/Users/taugspurger/Envs/pandas-dev/bin/python
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-3-209227154583> in <module>
----> 1 np.int64(1) == pd.NA
~/sandbox/pandas/pandas/_libs/missing.pyx in pandas._libs.missing._create_binary_propagating_op.method()
307 elif isinstance(other, np.ndarray):
308 out = np.empty(other.shape, dtype=object)
--> 309 out[:] = NA
310
311 if is_divmod:
IndexError: too many indices for array So when we get there, it really does seem like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Possibly, no idea. But I trust you this is indeed needed to get it working |
||
# Need the other.shape clause to handle NumPy scalars, | ||
# since we do a setitem on `out` below, which | ||
# won't work for NumPy scalars. | ||
if is_divmod: | ||
return NA, NA | ||
else: | ||
return NA | ||
|
||
elif isinstance(other, np.ndarray): | ||
out = np.empty(other.shape, dtype=object) | ||
out[:] = NA | ||
|
||
if is_divmod: | ||
return out, out.copy() | ||
else: | ||
return out | ||
|
||
return NotImplemented | ||
|
||
method.__name__ = name | ||
|
@@ -369,8 +383,8 @@ class NAType(C_NAType): | |
__rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") | ||
__mod__ = _create_binary_propagating_op("__mod__") | ||
__rmod__ = _create_binary_propagating_op("__rmod__") | ||
__divmod__ = _create_binary_propagating_op("__divmod__", divmod=True) | ||
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", divmod=True) | ||
__divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True) | ||
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True) | ||
# __lshift__ and __rshift__ are not implemented | ||
|
||
__eq__ = _create_binary_propagating_op("__eq__") | ||
|
@@ -397,6 +411,8 @@ class NAType(C_NAType): | |
return type(other)(1) | ||
else: | ||
return NA | ||
elif isinstance(other, np.ndarray): | ||
return np.where(other == 0, other.dtype.type(1), NA) | ||
|
||
return NotImplemented | ||
|
||
|
@@ -408,6 +424,8 @@ class NAType(C_NAType): | |
return other | ||
else: | ||
return NA | ||
elif isinstance(other, np.ndarray): | ||
return np.where((other == 1) | (other == -1), other, NA) | ||
|
||
return NotImplemented | ||
|
||
|
@@ -440,6 +458,31 @@ class NAType(C_NAType): | |
|
||
__rxor__ = __xor__ | ||
|
||
__array_priority__ = 1000 | ||
_HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_) | ||
|
||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | ||
types = self._HANDLED_TYPES + (NAType,) | ||
for x in inputs: | ||
if not isinstance(x, types): | ||
return NotImplemented | ||
|
||
if method != "__call__": | ||
raise ValueError(f"ufunc method '{method}' not supported for NA") | ||
result = maybe_dispatch_ufunc_to_dunder_op( | ||
self, ufunc, method, *inputs, **kwargs | ||
) | ||
if result is NotImplemented: | ||
# For a NumPy ufunc that's not a binop, like np.logaddexp | ||
index = [i for i, x in enumerate(inputs) if x is NA][0] | ||
result = np.broadcast_arrays(*inputs)[index] | ||
if result.ndim == 0: | ||
result = result.item() | ||
if ufunc.nout > 1: | ||
result = (NA,) * ufunc.nout | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return result | ||
|
||
|
||
C_NA = NAType() # C-visible | ||
NA = C_NA # Python-visible |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
DISPATCHED_UFUNCS = { | ||
"add", | ||
"sub", | ||
"mul", | ||
"pow", | ||
"mod", | ||
"floordiv", | ||
"truediv", | ||
"divmod", | ||
"eq", | ||
"ne", | ||
"lt", | ||
"gt", | ||
"le", | ||
"ge", | ||
"remainder", | ||
"matmul", | ||
"or", | ||
"xor", | ||
"and", | ||
} | ||
UFUNC_ALIASES = { | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"subtract": "sub", | ||
"multiply": "mul", | ||
"floor_divide": "floordiv", | ||
"true_divide": "truediv", | ||
"power": "pow", | ||
"remainder": "mod", | ||
"divide": "div", | ||
"equal": "eq", | ||
"not_equal": "ne", | ||
"less": "lt", | ||
"less_equal": "le", | ||
"greater": "gt", | ||
"greater_equal": "ge", | ||
"bitwise_or": "or", | ||
"bitwise_and": "and", | ||
"bitwise_xor": "xor", | ||
} | ||
|
||
# For op(., Array) -> Array.__r{op}__ | ||
REVERSED_NAMES = { | ||
"lt": "__gt__", | ||
"le": "__ge__", | ||
"gt": "__lt__", | ||
"ge": "__le__", | ||
"eq": "__eq__", | ||
"ne": "__ne__", | ||
} | ||
|
||
|
||
def maybe_dispatch_ufunc_to_dunder_op( | ||
object self, object ufunc, str method, *inputs, **kwargs | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for functions that arent |
||
""" | ||
Dispatch a ufunc to the equivalent dunder method. | ||
|
||
Parameters | ||
---------- | ||
self : ArrayLike | ||
The array whose dunder method we dispatch to | ||
ufunc : Callable | ||
A NumPy ufunc | ||
method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} | ||
inputs : ArrayLike | ||
The input arrays. | ||
kwargs : Any | ||
The additional keyword arguments, e.g. ``out``. | ||
|
||
Returns | ||
------- | ||
result : Any | ||
The result of applying the ufunc | ||
""" | ||
# special has the ufuncs we dispatch to the dunder op on | ||
|
||
op_name = ufunc.__name__ | ||
op_name = UFUNC_ALIASES.get(op_name, op_name) | ||
|
||
def not_implemented(*args, **kwargs): | ||
return NotImplemented | ||
|
||
if (method == "__call__" | ||
and op_name in DISPATCHED_UFUNCS | ||
and kwargs.get("out") is None): | ||
if isinstance(inputs[0], type(self)): | ||
name = f"__{op_name}__" | ||
return getattr(self, name, not_implemented)(inputs[1]) | ||
else: | ||
name = REVERSED_NAMES.get(op_name, f"__r{op_name}__") | ||
result = getattr(self, name, not_implemented)(inputs[0]) | ||
return result | ||
else: | ||
return NotImplemented |
Uh oh!
There was an error while loading. Please reload this page.