From 7abf40e53796ce52da0b974de2a59690bf3492ef Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 16:14:31 -0600 Subject: [PATCH 1/5] API: Handle pow & rpow special cases Closes https://github.com/pandas-dev/pandas/issues/29997 --- doc/source/reference/arrays.rst | 30 ++++++++++++++++++++++++--- pandas/_libs/missing.pyx | 25 ++++++++++++++++++++-- pandas/tests/scalar/test_na_scalar.py | 24 ++++++++++++++++++++- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index cf14d28772f4c..563d6f5bab833 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -2,9 +2,9 @@ .. _api.arrays: -============= -Pandas arrays -============= +========================= +Pandas arrays and scalars +========================= .. currentmodule:: pandas @@ -28,6 +28,30 @@ Strings :class:`StringDtype` :class:`str` :ref:`api.array Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` =================== ========================= ================== ============================= +As the table shows, each extension type is associated with an array class. Pandas may define +a dedicated scalar for the type (for example, :class:`arrays.IntervalArray` uses :class:`Interval`) +or it may re-use Python's scalars (for example, :class:`StringArray` uses Python's :class:`str`). + +Additionally, pandas defines a singleton scalar missing value :class:`pandas.NA`. This +value is distinct from ``float('nan')``, :attr:`numpy.nan` and Python's :class:`None`. + +.. autosummary:: + :toctree: api/ + + NA + +In binary operations, :class:`NA` is treated as numeric. Generally, ``NA`` propagates, so +the result of ``op(NA, other)`` will be ``NA``. There are a few special cases when the +result is known, even when one of the operands is ``NA``. + +* ``pd.NA ** 0`` is always 0. +* ``1 ** pd.NA`` is always 1. + +In logical operations, :class:`NA` uses Kleene logic. + +Creating Arrays +--------------- + Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). The top-level :meth:`array` method can be used to create a new array, which may be stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFrame`. diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 30832a8e4daab..d0ead37806ae7 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -365,8 +365,6 @@ class NAType(C_NAType): __rmod__ = _create_binary_propagating_op("__rmod__") __divmod__ = _create_binary_propagating_op("__divmod__", divmod=True) __rdivmod__ = _create_binary_propagating_op("__rdivmod__", divmod=True) - __pow__ = _create_binary_propagating_op("__pow__") - __rpow__ = _create_binary_propagating_op("__rpow__") # __lshift__ and __rshift__ are not implemented __eq__ = _create_binary_propagating_op("__eq__") @@ -383,6 +381,29 @@ class NAType(C_NAType): __abs__ = _create_unary_propagating_op("__abs__") __invert__ = _create_unary_propagating_op("__invert__") + # pow has special + def __pow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 0: + return other + else: + return NA + + return NotImplemented + + def __rpow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 1: + return other + else: + return NA + + return NotImplemented + # Logical ops using Kleene logic def __and__(self, other): diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index 586433698a587..3c29fe38b704e 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -38,11 +38,14 @@ def test_arithmetic_ops(all_arithmetic_functions): op = all_arithmetic_functions for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: - if op.__name__ == "rmod" and isinstance(other, str): + if op.__name__ in ("pow", "rpow", "rmod") and isinstance(other, str): continue if op.__name__ in ("divmod", "rdivmod"): assert op(NA, other) is (NA, NA) else: + if op.__name__ == "rpow": + # avoid special case + other += 1 assert op(NA, other) is NA @@ -69,6 +72,25 @@ def test_comparison_ops(): assert (other <= NA) is NA +@pytest.mark.parametrize( + "value", [0, 0.0, False, np.bool_(False), np.int_(0), np.float_(0)] +) +def test_pow_special(value): + result = pd.NA ** value + assert isinstance(result, type(value)) + assert result == 0 + + +@pytest.mark.parametrize( + "value", [1, 1.0, True, np.bool_(True), np.int_(1), np.float_(1)] +) +def test_rpow_special(value): + result = value ** pd.NA + assert result == 1 + if not isinstance(value, (np.float_, np.bool_, np.int_)): + assert isinstance(result, type(value)) + + def test_unary_ops(): assert +NA is NA assert -NA is NA From 36d403dbbfd65c34dab46a719d8205579a1348c4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 06:18:02 -0600 Subject: [PATCH 2/5] move --- doc/source/reference/arrays.rst | 9 --------- doc/source/user_guide/missing_data.rst | 6 ++++++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 563d6f5bab833..b435aba7599dd 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -40,15 +40,6 @@ value is distinct from ``float('nan')``, :attr:`numpy.nan` and Python's :class:` NA -In binary operations, :class:`NA` is treated as numeric. Generally, ``NA`` propagates, so -the result of ``op(NA, other)`` will be ``NA``. There are a few special cases when the -result is known, even when one of the operands is ``NA``. - -* ``pd.NA ** 0`` is always 0. -* ``1 ** pd.NA`` is always 1. - -In logical operations, :class:`NA` uses Kleene logic. - Creating Arrays --------------- diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 1cc485a229123..8b4a3cbed10ff 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -832,6 +832,12 @@ return ``False``. pd.NA == pd.NA pd.NA < 2.5 +There are a few special cases when the result is known, even when one of the +operands is ``NA``. + +* ``pd.NA ** 0`` is always 0. +* ``1 ** pd.NA`` is always 1. + To check if a value is equal to ``pd.NA``, the :func:`isna` function can be used: From 945e8cd9fcd66d268519534b8c62d0f475db22e0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 06:18:26 -0600 Subject: [PATCH 3/5] revert --- doc/source/reference/arrays.rst | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index b435aba7599dd..cf14d28772f4c 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -2,9 +2,9 @@ .. _api.arrays: -========================= -Pandas arrays and scalars -========================= +============= +Pandas arrays +============= .. currentmodule:: pandas @@ -28,21 +28,6 @@ Strings :class:`StringDtype` :class:`str` :ref:`api.array Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` =================== ========================= ================== ============================= -As the table shows, each extension type is associated with an array class. Pandas may define -a dedicated scalar for the type (for example, :class:`arrays.IntervalArray` uses :class:`Interval`) -or it may re-use Python's scalars (for example, :class:`StringArray` uses Python's :class:`str`). - -Additionally, pandas defines a singleton scalar missing value :class:`pandas.NA`. This -value is distinct from ``float('nan')``, :attr:`numpy.nan` and Python's :class:`None`. - -.. autosummary:: - :toctree: api/ - - NA - -Creating Arrays ---------------- - Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). The top-level :meth:`array` method can be used to create a new array, which may be stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFrame`. From 8fc8b3a7b7e60baca4508ee0f47472df5c2f4fa3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 08:59:35 -0600 Subject: [PATCH 4/5] fixup --- doc/source/user_guide/missing_data.rst | 12 ++++++------ pandas/_libs/missing.pyx | 2 +- pandas/tests/scalar/test_na_scalar.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 8b4a3cbed10ff..bbfb143f56b9d 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -822,6 +822,12 @@ For example, ``pd.NA`` propagates in arithmetic operations, similarly to pd.NA + 1 "a" * pd.NA +There are a few special cases when the result is known, even when one of the +operands is ``NA``. + +* ``pd.NA ** 0`` is always 0. +* ``1 ** pd.NA`` is always 1. + In equality and comparison operations, ``pd.NA`` also propagates. This deviates from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always return ``False``. @@ -832,12 +838,6 @@ return ``False``. pd.NA == pd.NA pd.NA < 2.5 -There are a few special cases when the result is known, even when one of the -operands is ``NA``. - -* ``pd.NA ** 0`` is always 0. -* ``1 ** pd.NA`` is always 1. - To check if a value is equal to ``pd.NA``, the :func:`isna` function can be used: diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d0ead37806ae7..976c2a75b635c 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -387,7 +387,7 @@ class NAType(C_NAType): return NA elif isinstance(other, (numbers.Number, np.bool_)): if other == 0: - return other + return type(other)(1) else: return NA diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index 3c29fe38b704e..058e2cd9d962c 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -78,7 +78,7 @@ def test_comparison_ops(): def test_pow_special(value): result = pd.NA ** value assert isinstance(result, type(value)) - assert result == 0 + assert result == 1 @pytest.mark.parametrize( From a49aa654440c2bf75e35e0b84befa78439922ff4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 11:02:14 -0600 Subject: [PATCH 5/5] handle negative --- doc/source/user_guide/missing_data.rst | 10 +++++++-- pandas/_libs/missing.pyx | 3 ++- pandas/tests/scalar/test_na_scalar.py | 30 +++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index bbfb143f56b9d..1bfe196cb2f89 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -825,8 +825,14 @@ For example, ``pd.NA`` propagates in arithmetic operations, similarly to There are a few special cases when the result is known, even when one of the operands is ``NA``. -* ``pd.NA ** 0`` is always 0. -* ``1 ** pd.NA`` is always 1. + +================ ====== +Operation Result +================ ====== +``pd.NA ** 0`` 0 +``1 ** pd.NA`` 1 +``-1 ** pd.NA`` -1 +================ ====== In equality and comparison operations, ``pd.NA`` also propagates. This deviates from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 976c2a75b635c..63aa5501c5250 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -387,6 +387,7 @@ class NAType(C_NAType): return NA elif isinstance(other, (numbers.Number, np.bool_)): if other == 0: + # returning positive is correct for +/- 0. return type(other)(1) else: return NA @@ -397,7 +398,7 @@ class NAType(C_NAType): if other is C_NA: return NA elif isinstance(other, (numbers.Number, np.bool_)): - if other == 1: + if other == 1 or other == -1: return other else: return NA diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index 058e2cd9d962c..40db617c64717 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -73,7 +73,19 @@ def test_comparison_ops(): @pytest.mark.parametrize( - "value", [0, 0.0, False, np.bool_(False), np.int_(0), np.float_(0)] + "value", + [ + 0, + 0.0, + -0, + -0.0, + False, + np.bool_(False), + np.int_(0), + np.float_(0), + np.int_(-0), + np.float_(-0), + ], ) def test_pow_special(value): result = pd.NA ** value @@ -82,11 +94,23 @@ def test_pow_special(value): @pytest.mark.parametrize( - "value", [1, 1.0, True, np.bool_(True), np.int_(1), np.float_(1)] + "value", + [ + 1, + 1.0, + -1, + -1.0, + True, + np.bool_(True), + np.int_(1), + np.float_(1), + np.int_(-1), + np.float_(-1), + ], ) def test_rpow_special(value): result = value ** pd.NA - assert result == 1 + assert result == value if not isinstance(value, (np.float_, np.bool_, np.int_)): assert isinstance(result, type(value))