pandas-dev · jreback · Jun 29, 2018 · May 30, 2018 · May 31, 2018 · May 31, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -14,6 +14,7 @@ Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
+- ``ExtensionArray`` has a ``ExtensionOpsMixin`` factory that allows default operators to be defined (:issue:`20659`, :issue:`19577`)
 -
 
 .. _whatsnew_0240.api_breaking:

diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py
@@ -3,5 +3,8 @@
                                   register_index_accessor,
                                   register_series_accessor)
 from pandas.core.algorithms import take  # noqa
-from pandas.core.arrays.base import ExtensionArray  # noqa
+from pandas.core.arrays.base import (ExtensionArray,    # noqa
+                                     ExtensionArithmeticMixin,
+                                     ExtensionComparisonMixin,
+                                     ExtensionOpsBase)
 from pandas.core.dtypes.dtypes import ExtensionDtype  # noqa
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -1,2 +1,5 @@
-from .base import ExtensionArray  # noqa
+from .base import (ExtensionArray,    # noqa
+                   ExtensionArithmeticMixin,
+                   ExtensionComparisonMixin,
+                   ExtensionOpsBase)
 from .categorical import Categorical  # noqa
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -7,8 +7,16 @@
 """
 import numpy as np
 
+import operator
+
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
+from pandas.compat import set_function_name, PY3
+import pandas.core.common as com
+from pandas.core.dtypes.common import (
+    is_extension_array_dtype,
+    is_list_like)
+from pandas.core import ops
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -610,3 +618,121 @@ def _ndarray_values(self):
         used for interacting with our indexers.
         """
         return np.array(self)
+
+
+class ExtensionOpsBase(object):
+    """
+    A base class for the mixins for different operators.
+    Can also be used to define an individual method for a specific
+    operator using the class method create_method()
+    """
+    @classmethod
+    def create_method(cls, op):
+        """
+        A class method that returns a method that will correspond to an
+        operator for an ExtensionArray subclass.
+
+        Parameters
+        ----------
+        op:   An operator that takes arguments op(a, b)
+
+        Returns
+        -------
+        A method that can be bound to a method of a class
+
+        Usage
+        -----
+        Given an ExtensionArray subclass called MyClass, use
+
+        mymethod = create_method(my_operator)
+        in the class definition of MyClass to create the operator
+
+        """
+        op_name = ops._get_op_name(op, False)
+
+        def _binop(self, other):
+            def convert_values(parm):
+                if isinstance(parm, ExtensionArray):
+                    ovalues = list(parm)
+                elif is_extension_array_dtype(parm):
+                    ovalues = parm.values
+                elif is_list_like(parm):
+                    ovalues = parm
+                else:  # Assume its an object
+                    ovalues = [parm] * len(self)
+                return ovalues
+            lvalues = convert_values(self)
+            rvalues = convert_values(other)
+
+            try:
+                res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
+            except TypeError:
+                msg = ("ExtensionDtype invalid operation " +
+                       "{opn} between {one} and {two}")
+                raise TypeError(msg.format(opn=op_name,
+                                           one=type(lvalues),
+                                           two=type(rvalues)))
+
+            res_values = com._values_from_object(res)
+
+            try:
+                res_values = self._from_sequence(res_values)
+            except TypeError:
+                pass
+
+            return res_values
+
+        name = '__{name}__'.format(name=op_name)
+        return set_function_name(_binop, name, cls)
+
+
+class ExtensionArithmeticMixin(ExtensionOpsBase):
+    """A mixin for defining the arithmetic operations on an ExtensionArray
+    class, where it assumed that the underlying objects have the operators
+    already defined.
+
+    Usage
+    ------
+    If you have defined a subclass MyClass(ExtensionArray), then
+    use MyClass(ExtensionArray, ExtensionArithmeticMixin) to
+    get the arithmetic operators
+    """
+
+    __add__ = ExtensionOpsBase.create_method(operator.add)
+    __radd__ = ExtensionOpsBase.create_method(ops.radd)
+    __sub__ = ExtensionOpsBase.create_method(operator.sub)
+    __rsub__ = ExtensionOpsBase.create_method(ops.rsub)
+    __mul__ = ExtensionOpsBase.create_method(operator.mul)
+    __rmul__ = ExtensionOpsBase.create_method(ops.rmul)
+    __pow__ = ExtensionOpsBase.create_method(operator.pow)
+    __rpow__ = ExtensionOpsBase.create_method(ops.rpow)
+    __mod__ = ExtensionOpsBase.create_method(operator.mod)
+    __rmod__ = ExtensionOpsBase.create_method(ops.rmod)
+    __floordiv__ = ExtensionOpsBase.create_method(operator.floordiv)
+    __rfloordiv__ = ExtensionOpsBase.create_method(ops.rfloordiv)
+    __truediv__ = ExtensionOpsBase.create_method(operator.truediv)
+    __rtruediv__ = ExtensionOpsBase.create_method(ops.rtruediv)
+    if not PY3:
+        __div__ = ExtensionOpsBase.create_method(operator.div)
+        __rdiv__ = ExtensionOpsBase.create_method(ops.rdiv)
+
+    __divmod__ = ExtensionOpsBase.create_method(divmod)
+
+
+class ExtensionComparisonMixin(ExtensionOpsBase):
+    """A mixin for defining the comparison operations on an ExtensionArray
+    class, where it assumed that the underlying objects have the operators
+    already defined.
+
+    Usage
+    ------
+    If you have defined a subclass MyClass(ExtensionArray), then
+    use MyClass(ExtensionArray, ExtensionComparisonMixin) to
+    get the arithmetic operators
+    """
+    __eq__ = ExtensionOpsBase.create_method(operator.eq)
+    __ne__ = ExtensionOpsBase.create_method(operator.ne)
+    __lt__ = ExtensionOpsBase.create_method(operator.lt)
+    __gt__ = ExtensionOpsBase.create_method(operator.gt)
+    __le__ = ExtensionOpsBase.create_method(operator.le)
+    __ge__ = ExtensionOpsBase.create_method(operator.ge)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2972,16 +2972,20 @@ def get_value(self, series, key):
         # use this, e.g. DatetimeIndex
         s = getattr(series, '_values', None)
         if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
-            # GH 20825
+            # GH 20882, 21257
             # Unify Index and ExtensionArray treatment
             # First try to convert the key to a location
-            # If that fails, see if key is an integer, and
+            # If that fails, raise a KeyError if an integer
+            # index, otherwise, see if key is an integer, and
             # try that
             try:
                 iloc = self.get_loc(key)
                 return s[iloc]
             except KeyError:
-                if is_integer(key):
+                if (len(self) > 0 and
+                        self.inferred_type in ['integer', 'boolean']):
+                    raise
+                elif is_integer(key):
                     return s[key]
 
         s = com._values_from_object(series)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -30,6 +30,7 @@
     is_bool_dtype,
     is_list_like,
     is_scalar,
+    is_extension_array_dtype,
     _ensure_object)
 from pandas.core.dtypes.cast import (
     maybe_upcast_putmask, find_common_type,
@@ -990,6 +991,26 @@ def _construct_divmod_result(left, result, index, name, dtype):
     )
 
 
+def dispatch_to_extension_op(left, right, op_name):
+    """
+    Assume that left is a Series backed by an ExtensionArray,
+    apply the operator defined by op_name.
+    """
+
+    method = getattr(left.values, op_name, None)
+    if method is not None:
+        res_values = method(right)
+    if method is None or res_values is NotImplemented:
+        msg = "ExtensionArray invalid operation {opn} between {one} and {two}"
+        raise TypeError(msg.format(opn=op_name,
+                                   one=type(left.values),
+                                   two=type(right)))
+
+    res_name = get_op_result_name(left, right)
+    return left._constructor(res_values, index=left.index,
+                             name=res_name)
+
+
 def _arith_method_SERIES(cls, op, special):
     """
     Wrapper function for Series arithmetic operations, to avoid
@@ -1058,6 +1079,9 @@ def wrapper(left, right):
             raise TypeError("{typ} cannot perform the operation "
                             "{op}".format(typ=type(left).__name__, op=str_rep))
 
+        elif is_extension_array_dtype(left):
+            return dispatch_to_extension_op(left, right, op_name)
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1208,6 +1232,9 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
+        elif is_extension_array_dtype(self):
+            return dispatch_to_extension_op(self, other, op_name)
+
         elif isinstance(other, ABCSeries):
             # By this point we have checked that self._indexed_same(other)
             res_values = na_op(self.values, other.values)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2196,23 +2196,22 @@ def _binop(self, other, func, level=None, fill_value=None):
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
         the other
-
         Parameters
         ----------
         other : Series or scalar value
         func : function
             Function that takes two scalars as inputs and return a scalar
         fill_value : scalar value
-
+            The default specifies to use the appropriate NaN value for
+            the underlying dtype of the Series
         Returns
         -------
         result : Series
-
         Examples
         --------
         >>> s1 = Series([1, 2])
@@ -2221,26 +2220,36 @@ def combine(self, other, func, fill_value=np.nan):
         0    0
         1    2
         dtype: int64
-
         See Also
         --------
         Series.combine_first : Combine Series values, choosing the calling
             Series's values first
         """
+        self_is_ext = is_extension_array_dtype(self.values)
+        if fill_value is None:
+            fill_value = na_value_for_dtype(self.dtype, False)
+
         if isinstance(other, Series):
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=self.dtype)
-            for i, idx in enumerate(new_index):
+            new_values = []
+            for idx in new_index:
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all='ignore'):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
             new_index = self.index
             with np.errstate(all='ignore'):
-                new_values = func(self._values, other)
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
+
+        if self_is_ext and not is_categorical_dtype(self.values):
+            try:
+                new_values = self._values._from_sequence(new_values)
+            except TypeError:
+                pass
+
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other):

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -130,7 +130,7 @@ def test_get(self, data):
         expected = s.iloc[[0, 1]]
         self.assert_series_equal(result, expected)
 
-        assert s.get(-1) == s.iloc[-1]
+        assert s.get(-1) is None
         assert s.get(s.index.max() + 1) is None
 
         s = pd.Series(data[:6], index=list('abcdef'))

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -6,7 +6,9 @@
 import numpy as np
 
 import pandas as pd
-from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays import (ExtensionArray,
+                                ExtensionArithmeticMixin,
+                                ExtensionComparisonMixin)
 from pandas.core.dtypes.base import ExtensionDtype
 
 
@@ -24,11 +26,14 @@ def construct_from_string(cls, string):
                             "'{}'".format(cls, string))
 
 
-class DecimalArray(ExtensionArray):
+class DecimalArray(ExtensionArray, ExtensionArithmeticMixin,
+                   ExtensionComparisonMixin):
     dtype = DecimalDtype()
 
     def __init__(self, values):
-        assert all(isinstance(v, decimal.Decimal) for v in values)
+        for val in values:
+            if not isinstance(val, self.dtype.type):
+                raise TypeError
         values = np.asarray(values, dtype=object)
 
         self._data = values

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -7,6 +7,9 @@
 
 from pandas.tests.extension import base
 
+from pandas.tests.series.test_operators import TestSeriesOperators
+from pandas.util._decorators import cache_readonly
+
 from .array import DecimalDtype, DecimalArray, make_data
 
 
@@ -183,3 +186,36 @@ def test_dataframe_constructor_with_different_dtype_raises():
     xpr = "Cannot coerce extension array to dtype 'int64'. "
     with tm.assert_raises_regex(ValueError, xpr):
         pd.DataFrame({"A": arr}, dtype='int64')
+
+
+_ts = pd.Series(DecimalArray(make_data()))
+
+
+class TestOperator(BaseDecimal, TestSeriesOperators):
+    @cache_readonly
+    def ts(self):
+        ts = _ts.copy()
+        ts.name = 'ts'
+        return ts
+
+    def test_operators(self):
+        def absfunc(v):
+            if isinstance(v, pd.Series):
+                vals = v.values
+                return pd.Series(vals._from_sequence([abs(i) for i in vals]))
+            else:
+                return abs(v)
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+        super(TestOperator, self).test_operators(absfunc)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+
+    def test_operators_corner(self):
+        pytest.skip("Cannot add empty Series of float64 to DecimalArray")
+
+    def test_divmod(self):
+        pytest.skip("divmod not appropriate for Decimal type")