REF: ops.missing (#27257)

jbrockmendel · jreback · commit 8794516cf428 · 2019-07-06T13:43:36.000-04:00
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -69,6 +69,7 @@
 from pandas.core.indexes.frozen import FrozenList
 import pandas.core.missing as missing
 from pandas.core.ops import get_op_result_name, make_invalid_op
+from pandas.core.ops.missing import dispatch_missing
 import pandas.core.sorting as sorting
 from pandas.core.strings import StringMethods
 
@@ -154,7 +155,7 @@ def index_arithmetic_method(self, other):
         with np.errstate(all="ignore"):
             result = op(values, other)
 
-        result = missing.dispatch_missing(op, values, other, result)
+        result = dispatch_missing(op, values, other, result)
 
         attrs = self._get_attributes_dict()
         attrs = self._maybe_update_attributes(attrs)
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -1,8 +1,6 @@
 """
 Routines for filling missing data.
 """
-import operator
-
 import numpy as np
 
 from pandas._libs import algos, lib
@@ -13,7 +11,6 @@
     ensure_float64,
     is_datetime64_dtype,
     is_datetime64tz_dtype,
-    is_float_dtype,
     is_integer,
     is_integer_dtype,
     is_numeric_v_string_like,
@@ -578,141 +575,6 @@ def clean_reindex_fill_method(method):
     return clean_fill_method(method, allow_nearest=True)
 
 
-def fill_zeros(result, x, y, name, fill):
-    """
-    If this is a reversed op, then flip x,y
-
-    If we have an integer value (or array in y)
-    and we have 0's, fill them with the fill,
-    return the result.
-
-    Mask the nan's from x.
-    """
-    if fill is None or is_float_dtype(result):
-        return result
-
-    if name.startswith(("r", "__r")):
-        x, y = y, x
-
-    is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
-    is_scalar_type = is_scalar(y)
-
-    if not is_variable_type and not is_scalar_type:
-        return result
-
-    if is_scalar_type:
-        y = np.array(y)
-
-    if is_integer_dtype(y):
-
-        if (y == 0).any():
-
-            # GH 7325, mask and nans must be broadcastable (also: PR 9308)
-            # Raveling and then reshaping makes np.putmask faster
-            mask = ((y == 0) & ~np.isnan(result)).ravel()
-
-            shape = result.shape
-            result = result.astype("float64", copy=False).ravel()
-
-            np.putmask(result, mask, fill)
-
-            # if we have a fill of inf, then sign it correctly
-            # (GH 6178 and PR 9308)
-            if np.isinf(fill):
-                signs = y if name.startswith(("r", "__r")) else x
-                signs = np.sign(signs.astype("float", copy=False))
-                negative_inf_mask = (signs.ravel() < 0) & mask
-                np.putmask(result, negative_inf_mask, -fill)
-
-            if "floordiv" in name:  # (PR 9308)
-                nan_mask = ((y == 0) & (x == 0)).ravel()
-                np.putmask(result, nan_mask, np.nan)
-
-            result = result.reshape(shape)
-
-    return result
-
-
-def mask_zero_div_zero(x, y, result, copy=False):
-    """
-    Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
-    of the numerator or the denominator.
-
-    Parameters
-    ----------
-    x : ndarray
-    y : ndarray
-    result : ndarray
-    copy : bool (default False)
-        Whether to always create a new array or try to fill in the existing
-        array if possible.
-
-    Returns
-    -------
-    filled_result : ndarray
-
-    Examples
-    --------
-    >>> x = np.array([1, 0, -1], dtype=np.int64)
-    >>> y = 0       # int 0; numpy behavior is different with float
-    >>> result = x / y
-    >>> result      # raw numpy result does not fill division by zero
-    array([0, 0, 0])
-    >>> mask_zero_div_zero(x, y, result)
-    array([ inf,  nan, -inf])
-    """
-    if is_scalar(y):
-        y = np.array(y)
-
-    zmask = y == 0
-    if zmask.any():
-        shape = result.shape
-
-        nan_mask = (zmask & (x == 0)).ravel()
-        neginf_mask = (zmask & (x < 0)).ravel()
-        posinf_mask = (zmask & (x > 0)).ravel()
-
-        if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
-            # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
-            result = result.astype("float64", copy=copy).ravel()
-
-            np.putmask(result, nan_mask, np.nan)
-            np.putmask(result, posinf_mask, np.inf)
-            np.putmask(result, neginf_mask, -np.inf)
-
-            result = result.reshape(shape)
-
-    return result
-
-
-def dispatch_missing(op, left, right, result):
-    """
-    Fill nulls caused by division by zero, casting to a different dtype
-    if necessary.
-
-    Parameters
-    ----------
-    op : function (operator.add, operator.div, ...)
-    left : object (Index for non-reversed ops)
-    right : object (Index fof reversed ops)
-    result : ndarray
-
-    Returns
-    -------
-    result : ndarray
-    """
-    opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
-    if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]:
-        result = mask_zero_div_zero(left, right, result)
-    elif op is operator.mod:
-        result = fill_zeros(result, left, right, opstr, np.nan)
-    elif op is divmod:
-        res0 = mask_zero_div_zero(left, right, result[0])
-        res1 = fill_zeros(result[1], left, right, opstr, np.nan)
-        result = (res0, res1)
-    return result
-
-
 def _interp_limit(invalid, fw_limit, bw_limit):
     """
     Get indexers of values that won't be filled
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -49,8 +49,8 @@
 import pandas as pd
 from pandas._typing import ArrayLike
 import pandas.core.common as com
-import pandas.core.missing as missing
 
+from . import missing
 from .roperator import (  # noqa:F401
     radd,
     rand_,
diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py
@@ -0,0 +1,165 @@
+"""
+Missing data handling for arithmetic operations.
+
+In particular, pandas conventions regarding divison by zero differ
+from numpy in the following ways:
+    1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
+       gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
+       the remaining pairs
+       (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
+
+       pandas convention is to return [-inf, nan, inf] for all dtype
+       combinations.
+
+       Note: the numpy behavior described here is py3-specific.
+
+    2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
+       gives precisely the same results as the // operation.
+
+       pandas convention is to return [nan, nan, nan] for all dtype
+       combinations.
+
+    3) divmod behavior consistent with 1) and 2).
+"""
+import operator
+
+import numpy as np
+
+from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar
+
+
+def fill_zeros(result, x, y, name, fill):
+    """
+    If this is a reversed op, then flip x,y
+
+    If we have an integer value (or array in y)
+    and we have 0's, fill them with the fill,
+    return the result.
+
+    Mask the nan's from x.
+    """
+    if fill is None or is_float_dtype(result):
+        return result
+
+    if name.startswith(("r", "__r")):
+        x, y = y, x
+
+    is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
+    is_scalar_type = is_scalar(y)
+
+    if not is_variable_type and not is_scalar_type:
+        return result
+
+    if is_scalar_type:
+        y = np.array(y)
+
+    if is_integer_dtype(y):
+
+        if (y == 0).any():
+
+            # GH#7325, mask and nans must be broadcastable (also: GH#9308)
+            # Raveling and then reshaping makes np.putmask faster
+            mask = ((y == 0) & ~np.isnan(result)).ravel()
+
+            shape = result.shape
+            result = result.astype("float64", copy=False).ravel()
+
+            np.putmask(result, mask, fill)
+
+            # if we have a fill of inf, then sign it correctly
+            # (GH#6178 and GH#9308)
+            if np.isinf(fill):
+                signs = y if name.startswith(("r", "__r")) else x
+                signs = np.sign(signs.astype("float", copy=False))
+                negative_inf_mask = (signs.ravel() < 0) & mask
+                np.putmask(result, negative_inf_mask, -fill)
+
+            if "floordiv" in name:  # (GH#9308)
+                nan_mask = ((y == 0) & (x == 0)).ravel()
+                np.putmask(result, nan_mask, np.nan)
+
+            result = result.reshape(shape)
+
+    return result
+
+
+def mask_zero_div_zero(x, y, result, copy=False):
+    """
+    Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
+    of the numerator or the denominator.
+
+    Parameters
+    ----------
+    x : ndarray
+    y : ndarray
+    result : ndarray
+    copy : bool (default False)
+        Whether to always create a new array or try to fill in the existing
+        array if possible.
+
+    Returns
+    -------
+    filled_result : ndarray
+
+    Examples
+    --------
+    >>> x = np.array([1, 0, -1], dtype=np.int64)
+    >>> y = 0       # int 0; numpy behavior is different with float
+    >>> result = x / y
+    >>> result      # raw numpy result does not fill division by zero
+    array([0, 0, 0])
+    >>> mask_zero_div_zero(x, y, result)
+    array([ inf,  nan, -inf])
+    """
+    if is_scalar(y):
+        y = np.array(y)
+
+    zmask = y == 0
+    if zmask.any():
+        shape = result.shape
+
+        nan_mask = (zmask & (x == 0)).ravel()
+        neginf_mask = (zmask & (x < 0)).ravel()
+        posinf_mask = (zmask & (x > 0)).ravel()
+
+        if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
+            # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
+            result = result.astype("float64", copy=copy).ravel()
+
+            np.putmask(result, nan_mask, np.nan)
+            np.putmask(result, posinf_mask, np.inf)
+            np.putmask(result, neginf_mask, -np.inf)
+
+            result = result.reshape(shape)
+
+    return result
+
+
+def dispatch_missing(op, left, right, result):
+    """
+    Fill nulls caused by division by zero, casting to a different dtype
+    if necessary.
+
+    Parameters
+    ----------
+    op : function (operator.add, operator.div, ...)
+    left : object (Index for non-reversed ops)
+    right : object (Index fof reversed ops)
+    result : ndarray
+
+    Returns
+    -------
+    result : ndarray
+    """
+    opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
+    if op is operator.floordiv:
+        # Note: no need to do this for truediv; in py3 numpy behaves the way
+        #  we want.
+        result = mask_zero_div_zero(left, right, result)
+    elif op is operator.mod:
+        result = fill_zeros(result, left, right, opstr, np.nan)
+    elif op is divmod:
+        res0 = mask_zero_div_zero(left, right, result[0])
+        res1 = fill_zeros(result[1], left, right, opstr, np.nan)
+        result = (res0, res1)
+    return result
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -1063,8 +1063,7 @@ def test_replace_series_datetime_tz(self):
 
     # TODO(jreback) commented out to only have a single xfail printed
     @pytest.mark.xfail(
-        reason="different tz, " "currently mask_missing raises SystemError",
-        strict=False,
+        reason="different tz, currently mask_missing raises SystemError", strict=False
     )
     # @pytest.mark.parametrize('how', ['dict', 'series'])
     # @pytest.mark.parametrize('to_key', [

Original file line number	Diff line number	Diff line change
`@@ -1063,8 +1063,7 @@ def test_replace_series_datetime_tz(self):`
`1063`	`1063`
`1064`	`1064`	`# TODO(jreback) commented out to only have a single xfail printed`
`1065`	`1065`	`@pytest.mark.xfail(`
`1066`		`- reason="different tz, " "currently mask_missing raises SystemError",`
`1067`		`- strict=False,`
	`1066`	`+ reason="different tz, currently mask_missing raises SystemError", strict=False`
`1068`	`1067`	`)`
`1069`	`1068`	`# @pytest.mark.parametrize('how', ['dict', 'series'])`
`1070`	`1069`	`# @pytest.mark.parametrize('to_key', [`