Skip to content

ENH: implement ExtensionArray.__array_ufunc__ #43899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ Sparse

ExtensionArray
^^^^^^^^^^^^^^
-
- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
-

Styler
Expand Down
20 changes: 19 additions & 1 deletion pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ def reconstruct(result):
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.
inputs = tuple(np.asarray(x) for x in inputs)
# Note: we can't use default_array_ufunc here bc reindexing means
# that `self` may not be among `inputs`
result = getattr(ufunc, method)(*inputs, **kwargs)
elif self.ndim == 1:
# ufunc(series, ...)
Expand All @@ -387,7 +389,7 @@ def reconstruct(result):
else:
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
# Those can have an axis keyword and thus can't be called block-by-block
result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs)
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)

result = reconstruct(result)
return result
Expand Down Expand Up @@ -452,3 +454,19 @@ def _assign_where(out, result, where) -> None:
out[:] = result
else:
np.putmask(out, where, result)


def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Fallback to the behavior we would get if we did not define __array_ufunc__.

Notes
-----
We are assuming that `self` is among `inputs`.
"""
if not any(x is self for x in inputs):
raise NotImplementedError

new_inputs = [x if x is not self else np.asarray(x) for x in inputs]

return getattr(ufunc, method)(*new_inputs, **kwargs)
15 changes: 15 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core import (
arraylike,
missing,
ops,
)
Expand Down Expand Up @@ -1366,6 +1367,20 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
)
return result

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
):
return NotImplemented

result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)


class ExtensionOpsMixin:
"""
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,3 +604,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
else:
result[mask] = np.nan
return result

def __abs__(self):
return self.copy()
7 changes: 7 additions & 0 deletions pandas/tests/arrays/boolean/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ def test_invert(self):
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
)
tm.assert_frame_equal(result, expected)

def test_abs(self):
# matching numpy behavior, abs is the identity function
arr = pd.array([True, False, None], dtype="boolean")
result = abs(arr)

tm.assert_extension_array_equal(result, arr)
19 changes: 19 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,19 @@ def test_abs(self):
result = abs(arr)
tm.assert_timedelta_array_equal(result, expected)

result2 = np.abs(arr)
tm.assert_timedelta_array_equal(result2, expected)

def test_pos(self):
vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
arr = TimedeltaArray(vals)

result = +arr
tm.assert_timedelta_array_equal(result, arr)

result2 = np.positive(arr)
tm.assert_timedelta_array_equal(result2, arr)

def test_neg(self):
vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
arr = TimedeltaArray(vals)
Expand All @@ -100,6 +113,9 @@ def test_neg(self):
result = -arr
tm.assert_timedelta_array_equal(result, expected)

result2 = np.negative(arr)
tm.assert_timedelta_array_equal(result2, expected)

def test_neg_freq(self):
tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
arr = TimedeltaArray(tdi, freq=tdi.freq)
Expand All @@ -108,3 +124,6 @@ def test_neg_freq(self):

result = -arr
tm.assert_timedelta_array_equal(result, expected)

result2 = np.negative(arr)
tm.assert_timedelta_array_equal(result2, expected)
5 changes: 4 additions & 1 deletion pandas/tests/extension/arrow/test_bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def test_view(self, data):
# __setitem__ does not work, so we only have a smoke-test
data.view()

@pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet")
@pytest.mark.xfail(
raises=AttributeError,
reason="__eq__ incorrectly returns bool instead of ndarray[bool]",
)
def test_contains(self, data, data_missing):
super().test_contains(data, data_missing)

Expand Down
32 changes: 28 additions & 4 deletions pandas/tests/extension/base/ops.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import numpy as np
import pytest

import pandas as pd
Expand Down Expand Up @@ -128,11 +129,13 @@ class BaseComparisonOpsTests(BaseOpsUtil):
"""Various Series and DataFrame comparison ops methods."""

def _compare_other(self, s, data, op_name, other):

op = self.get_op_from_name(op_name)
if op_name == "__eq__":
assert not op(s, other).all()
elif op_name == "__ne__":
assert op(s, other).all()
if op_name in ["__eq__", "__ne__"]:
# comparison should match point-wise comparisons
result = op(s, other)
expected = s.combine(other, op)
self.assert_series_equal(result, expected)

else:

Expand Down Expand Up @@ -182,3 +185,24 @@ def test_invert(self, data):
result = ~s
expected = pd.Series(~data, name="name")
self.assert_series_equal(result, expected)

@pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
# the dunder __pos__ works if and only if np.positive works,
# same for __neg__/np.negative and __abs__/np.abs
attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[
ufunc
]

exc = None
try:
result = getattr(data, attr)()
except Exception as err:
exc = err

# if __pos__ raised, then so should the ufunc
with pytest.raises((type(exc), TypeError)):
ufunc(data)
else:
alt = ufunc(data)
self.assert_extension_array_equal(result, alt)