From 63faedcc9d894c0bd9d32b980e72eaeade6c6911 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 5 Jul 2019 15:44:10 -0700 Subject: [PATCH 1/3] move ops-masking funcs to ops.masking --- pandas/core/indexes/base.py | 3 +- pandas/core/missing.py | 138 ----------------------- pandas/core/ops/__init__.py | 2 +- pandas/core/ops/missing.py | 147 +++++++++++++++++++++++++ pandas/tests/indexing/test_coercion.py | 2 +- 5 files changed, 151 insertions(+), 141 deletions(-) create mode 100644 pandas/core/ops/missing.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 973a022cfc3f1..d3837617d231a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,6 +69,7 @@ from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name, make_invalid_op +from pandas.core.ops.missing import dispatch_missing import pandas.core.sorting as sorting from pandas.core.strings import StringMethods @@ -154,7 +155,7 @@ def index_arithmetic_method(self, other): with np.errstate(all="ignore"): result = op(values, other) - result = missing.dispatch_missing(op, values, other, result) + result = dispatch_missing(op, values, other, result) attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ad4b5e4523806..8f0abc91f7aef 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,8 +1,6 @@ """ Routines for filling missing data. """ -import operator - import numpy as np from pandas._libs import algos, lib @@ -13,7 +11,6 @@ ensure_float64, is_datetime64_dtype, is_datetime64tz_dtype, - is_float_dtype, is_integer, is_integer_dtype, is_numeric_v_string_like, @@ -578,141 +575,6 @@ def clean_reindex_fill_method(method): return clean_fill_method(method, allow_nearest=True) -def fill_zeros(result, x, y, name, fill): - """ - If this is a reversed op, then flip x,y - - If we have an integer value (or array in y) - and we have 0's, fill them with the fill, - return the result. - - Mask the nan's from x. - """ - if fill is None or is_float_dtype(result): - return result - - if name.startswith(("r", "__r")): - x, y = y, x - - is_variable_type = hasattr(y, "dtype") or hasattr(y, "type") - is_scalar_type = is_scalar(y) - - if not is_variable_type and not is_scalar_type: - return result - - if is_scalar_type: - y = np.array(y) - - if is_integer_dtype(y): - - if (y == 0).any(): - - # GH 7325, mask and nans must be broadcastable (also: PR 9308) - # Raveling and then reshaping makes np.putmask faster - mask = ((y == 0) & ~np.isnan(result)).ravel() - - shape = result.shape - result = result.astype("float64", copy=False).ravel() - - np.putmask(result, mask, fill) - - # if we have a fill of inf, then sign it correctly - # (GH 6178 and PR 9308) - if np.isinf(fill): - signs = y if name.startswith(("r", "__r")) else x - signs = np.sign(signs.astype("float", copy=False)) - negative_inf_mask = (signs.ravel() < 0) & mask - np.putmask(result, negative_inf_mask, -fill) - - if "floordiv" in name: # (PR 9308) - nan_mask = ((y == 0) & (x == 0)).ravel() - np.putmask(result, nan_mask, np.nan) - - result = result.reshape(shape) - - return result - - -def mask_zero_div_zero(x, y, result, copy=False): - """ - Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes - of the numerator or the denominator. - - Parameters - ---------- - x : ndarray - y : ndarray - result : ndarray - copy : bool (default False) - Whether to always create a new array or try to fill in the existing - array if possible. - - Returns - ------- - filled_result : ndarray - - Examples - -------- - >>> x = np.array([1, 0, -1], dtype=np.int64) - >>> y = 0 # int 0; numpy behavior is different with float - >>> result = x / y - >>> result # raw numpy result does not fill division by zero - array([0, 0, 0]) - >>> mask_zero_div_zero(x, y, result) - array([ inf, nan, -inf]) - """ - if is_scalar(y): - y = np.array(y) - - zmask = y == 0 - if zmask.any(): - shape = result.shape - - nan_mask = (zmask & (x == 0)).ravel() - neginf_mask = (zmask & (x < 0)).ravel() - posinf_mask = (zmask & (x > 0)).ravel() - - if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): - # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN - result = result.astype("float64", copy=copy).ravel() - - np.putmask(result, nan_mask, np.nan) - np.putmask(result, posinf_mask, np.inf) - np.putmask(result, neginf_mask, -np.inf) - - result = result.reshape(shape) - - return result - - -def dispatch_missing(op, left, right, result): - """ - Fill nulls caused by division by zero, casting to a different dtype - if necessary. - - Parameters - ---------- - op : function (operator.add, operator.div, ...) - left : object (Index for non-reversed ops) - right : object (Index fof reversed ops) - result : ndarray - - Returns - ------- - result : ndarray - """ - opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__") - if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]: - result = mask_zero_div_zero(left, right, result) - elif op is operator.mod: - result = fill_zeros(result, left, right, opstr, np.nan) - elif op is divmod: - res0 = mask_zero_div_zero(left, right, result[0]) - res1 = fill_zeros(result[1], left, right, opstr, np.nan) - result = (res0, res1) - return result - - def _interp_limit(invalid, fw_limit, bw_limit): """ Get indexers of values that won't be filled diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 4692ec45df0ad..3ce6da6891a7f 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -49,8 +49,8 @@ import pandas as pd from pandas._typing import ArrayLike import pandas.core.common as com -import pandas.core.missing as missing +from . import missing from .roperator import ( # noqa:F401 radd, rand_, diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py new file mode 100644 index 0000000000000..a2aadce8af158 --- /dev/null +++ b/pandas/core/ops/missing.py @@ -0,0 +1,147 @@ +""" +Missing data handling for arithmetic operations. + +In particular, pandas conventions regarding divison by zero differ +from numpy in the following ways: + - FIXME: fill this in +""" +import operator + +import numpy as np + +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar + + +def fill_zeros(result, x, y, name, fill): + """ + If this is a reversed op, then flip x,y + + If we have an integer value (or array in y) + and we have 0's, fill them with the fill, + return the result. + + Mask the nan's from x. + """ + if fill is None or is_float_dtype(result): + return result + + if name.startswith(("r", "__r")): + x, y = y, x + + is_variable_type = hasattr(y, "dtype") or hasattr(y, "type") + is_scalar_type = is_scalar(y) + + if not is_variable_type and not is_scalar_type: + return result + + if is_scalar_type: + y = np.array(y) + + if is_integer_dtype(y): + + if (y == 0).any(): + + # GH 7325, mask and nans must be broadcastable (also: PR 9308) + # Raveling and then reshaping makes np.putmask faster + mask = ((y == 0) & ~np.isnan(result)).ravel() + + shape = result.shape + result = result.astype("float64", copy=False).ravel() + + np.putmask(result, mask, fill) + + # if we have a fill of inf, then sign it correctly + # (GH 6178 and PR 9308) + if np.isinf(fill): + signs = y if name.startswith(("r", "__r")) else x + signs = np.sign(signs.astype("float", copy=False)) + negative_inf_mask = (signs.ravel() < 0) & mask + np.putmask(result, negative_inf_mask, -fill) + + if "floordiv" in name: # (PR 9308) + nan_mask = ((y == 0) & (x == 0)).ravel() + np.putmask(result, nan_mask, np.nan) + + result = result.reshape(shape) + + return result + + +def mask_zero_div_zero(x, y, result, copy=False): + """ + Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + copy : bool (default False) + Whether to always create a new array or try to fill in the existing + array if possible. + + Returns + ------- + filled_result : ndarray + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x / y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + if is_scalar(y): + y = np.array(y) + + zmask = y == 0 + if zmask.any(): + shape = result.shape + + nan_mask = (zmask & (x == 0)).ravel() + neginf_mask = (zmask & (x < 0)).ravel() + posinf_mask = (zmask & (x > 0)).ravel() + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype("float64", copy=copy).ravel() + + np.putmask(result, nan_mask, np.nan) + np.putmask(result, posinf_mask, np.inf) + np.putmask(result, neginf_mask, -np.inf) + + result = result.reshape(shape) + + return result + + +def dispatch_missing(op, left, right, result): + """ + Fill nulls caused by division by zero, casting to a different dtype + if necessary. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (Index for non-reversed ops) + right : object (Index fof reversed ops) + result : ndarray + + Returns + ------- + result : ndarray + """ + opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__") + if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]: + result = mask_zero_div_zero(left, right, result) + elif op is operator.mod: + result = fill_zeros(result, left, right, opstr, np.nan) + elif op is divmod: + res0 = mask_zero_div_zero(left, right, result[0]) + res1 = fill_zeros(result[1], left, right, opstr, np.nan) + result = (res0, res1) + return result diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index f46fbcdb504e9..078bb39653c9d 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1063,7 +1063,7 @@ def test_replace_series_datetime_tz(self): # TODO(jreback) commented out to only have a single xfail printed @pytest.mark.xfail( - reason="different tz, " "currently mask_missing raises SystemError", + reason="different tz, currently mask_missing raises SystemError", strict=False, ) # @pytest.mark.parametrize('how', ['dict', 'series']) From 4e42cd52ab89e9c744f65e9d382a2f6130d74bd3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 5 Jul 2019 18:47:08 -0700 Subject: [PATCH 2/3] avoid op for truediv, docstring --- pandas/core/ops/missing.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index a2aadce8af158..947dfc68ac7c3 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -3,7 +3,23 @@ In particular, pandas conventions regarding divison by zero differ from numpy in the following ways: - - FIXME: fill this in + 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2) + gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for + the remaining pairs + (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN). + + pandas convention is to return [-inf, nan, inf] for all dtype + combinations. + + Note: the numpy behavior described here is py3-specific. + + 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2) + gives precisely the same results as the // operation. + + pandas convention is to return [nan, nan, nan] for all dtype + combinations. + + 3) divmod behavior consistent with 1) and 2). """ import operator @@ -41,7 +57,7 @@ def fill_zeros(result, x, y, name, fill): if (y == 0).any(): - # GH 7325, mask and nans must be broadcastable (also: PR 9308) + # GH#7325, mask and nans must be broadcastable (also: GH#9308) # Raveling and then reshaping makes np.putmask faster mask = ((y == 0) & ~np.isnan(result)).ravel() @@ -51,14 +67,14 @@ def fill_zeros(result, x, y, name, fill): np.putmask(result, mask, fill) # if we have a fill of inf, then sign it correctly - # (GH 6178 and PR 9308) + # (GH#6178 and GH#9308) if np.isinf(fill): signs = y if name.startswith(("r", "__r")) else x signs = np.sign(signs.astype("float", copy=False)) negative_inf_mask = (signs.ravel() < 0) & mask np.putmask(result, negative_inf_mask, -fill) - if "floordiv" in name: # (PR 9308) + if "floordiv" in name: # (GH#9308) nan_mask = ((y == 0) & (x == 0)).ravel() np.putmask(result, nan_mask, np.nan) @@ -136,7 +152,9 @@ def dispatch_missing(op, left, right, result): result : ndarray """ opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__") - if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]: + if op is operator.floordiv: + # Note: no need to do this for truediv; in py3 numpy behaves the way + # we want. result = mask_zero_div_zero(left, right, result) elif op is operator.mod: result = fill_zeros(result, left, right, opstr, np.nan) From 662c25bb4f5e787bd790a0c1e969398038e53a8c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 5 Jul 2019 18:50:56 -0700 Subject: [PATCH 3/3] blackify --- pandas/tests/indexing/test_coercion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 078bb39653c9d..a18f8380f80c1 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1063,8 +1063,7 @@ def test_replace_series_datetime_tz(self): # TODO(jreback) commented out to only have a single xfail printed @pytest.mark.xfail( - reason="different tz, currently mask_missing raises SystemError", - strict=False, + reason="different tz, currently mask_missing raises SystemError", strict=False ) # @pytest.mark.parametrize('how', ['dict', 'series']) # @pytest.mark.parametrize('to_key', [