From c7f6b81eefae5dc51255d5802b8b65115770f6ee Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Oct 2021 19:15:09 -0700 Subject: [PATCH 1/3] ENH: implement ExtensionArray.__array_ufunc__ --- pandas/core/arraylike.py | 20 +++++++++++++- pandas/core/arrays/base.py | 15 +++++++++++ pandas/core/arrays/boolean.py | 3 +++ pandas/tests/arrays/boolean/test_ops.py | 7 +++++ pandas/tests/extension/arrow/test_bool.py | 5 +++- pandas/tests/extension/base/ops.py | 32 ++++++++++++++++++++--- 6 files changed, 76 insertions(+), 6 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index f114278caf3ee..3d209189d97d8 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -371,6 +371,8 @@ def reconstruct(result): # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. inputs = tuple(np.asarray(x) for x in inputs) + # Note: we can't use default_array_ufunc here bc reindexing means + # that `self` may not be among `inputs` result = getattr(ufunc, method)(*inputs, **kwargs) elif self.ndim == 1: # ufunc(series, ...) @@ -387,7 +389,7 @@ def reconstruct(result): else: # otherwise specific ufunc methods (eg np..accumulate(..)) # Those can have an axis keyword and thus can't be called block-by-block - result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs) + result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) result = reconstruct(result) return result @@ -452,3 +454,19 @@ def _assign_where(out, result, where) -> None: out[:] = result else: np.putmask(out, where, result) + + +def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Fallback to the behavior we would get if we did not define __array_ufunc__. + + Notes + ----- + We are assuming that `self` is among `inputs`. + """ + if not any(x is self for x in inputs): + raise NotImplementedError + + new_inputs = [x if x is not self else np.asarray(x) for x in inputs] + + return getattr(ufunc, method)(*new_inputs, **kwargs) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8ee5a4a2d913a..b17f309e5f9fb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -65,6 +65,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ( + arraylike, missing, ops, ) @@ -1366,6 +1367,20 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype): ) return result + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if any( + isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) + class ExtensionOpsMixin: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 69896a389102f..1df7c191bdb68 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -604,3 +604,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str): else: result[mask] = np.nan return result + + def __abs__(self): + return self.copy() diff --git a/pandas/tests/arrays/boolean/test_ops.py b/pandas/tests/arrays/boolean/test_ops.py index 52f602258a049..95ebe8528c2e5 100644 --- a/pandas/tests/arrays/boolean/test_ops.py +++ b/pandas/tests/arrays/boolean/test_ops.py @@ -18,3 +18,10 @@ def test_invert(self): {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] ) tm.assert_frame_equal(result, expected) + + def test_abs(self): + # matching numpy behavior, abs is the identity function + arr = pd.array([True, False, None], dtype="boolean") + result = abs(arr) + + tm.assert_extension_array_equal(result, arr) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 6a16433aa0a32..d262f09182a9c 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -54,7 +54,10 @@ def test_view(self, data): # __setitem__ does not work, so we only have a smoke-test data.view() - @pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet") + @pytest.mark.xfail( + raises=AttributeError, + reason="__eq__ incorrectly returns bool instead of ndarray[bool]", + ) def test_contains(self, data, data_missing): super().test_contains(data, data_missing) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index ca22973d0b4d3..e9ceec3a3d7e6 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pytest import pandas as pd @@ -128,11 +129,13 @@ class BaseComparisonOpsTests(BaseOpsUtil): """Various Series and DataFrame comparison ops methods.""" def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) - if op_name == "__eq__": - assert not op(s, other).all() - elif op_name == "__ne__": - assert op(s, other).all() + if op_name in ["__eq__", "__ne__"]: + # comparison should match point-wise comparisons + result = op(s, other) + expected = s.combine(other, op) + self.assert_series_equal(result, expected) else: @@ -182,3 +185,24 @@ def test_invert(self, data): result = ~s expected = pd.Series(~data, name="name") self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) + def test_unary_ufunc_dunder_equivalence(self, data, ufunc): + # the dunder __pos__ works if and only if np.positive works, + # same for __neg__/np.negative and __abs__/np.abs + attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[ + ufunc + ] + + exc = None + try: + result = getattr(data, attr)() + except Exception as err: + exc = err + + # if __pos__ raised, then so should the ufunc + with pytest.raises((type(exc), TypeError)): + ufunc(data) + else: + alt = ufunc(data) + self.assert_extension_array_equal(result, alt) From 24eb722838cd2445d19131ae2d7ac18240a793e7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Oct 2021 19:26:15 -0700 Subject: [PATCH 2/3] tests for TimedeltaArray --- pandas/tests/arrays/test_timedeltas.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 9e2b8e0f1603e..98329776242f1 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -90,6 +90,19 @@ def test_abs(self): result = abs(arr) tm.assert_timedelta_array_equal(result, expected) + result2 = np.abs(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_pos(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + result = +arr + tm.assert_timedelta_array_equal(result, arr) + + result2 = np.positive(arr) + tm.assert_timedelta_array_equal(result2, arr) + def test_neg(self): vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") arr = TimedeltaArray(vals) @@ -100,6 +113,9 @@ def test_neg(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) + def test_neg_freq(self): tdi = pd.timedelta_range("2 Days", periods=4, freq="H") arr = TimedeltaArray(tdi, freq=tdi.freq) @@ -108,3 +124,6 @@ def test_neg_freq(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) From facf61eeb472c7cdd6e0dd51d4c0020ac5f65d1c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 08:49:19 -0700 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index daf0d0d000079..22b49c35e0e68 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -522,7 +522,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ -- +- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`) - Styler