Skip to content

REF: ops.missing #27257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from pandas.core.indexes.frozen import FrozenList
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name, make_invalid_op
from pandas.core.ops.missing import dispatch_missing
import pandas.core.sorting as sorting
from pandas.core.strings import StringMethods

Expand Down Expand Up @@ -154,7 +155,7 @@ def index_arithmetic_method(self, other):
with np.errstate(all="ignore"):
result = op(values, other)

result = missing.dispatch_missing(op, values, other, result)
result = dispatch_missing(op, values, other, result)

attrs = self._get_attributes_dict()
attrs = self._maybe_update_attributes(attrs)
Expand Down
138 changes: 0 additions & 138 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""
Routines for filling missing data.
"""
import operator

import numpy as np

from pandas._libs import algos, lib
Expand All @@ -13,7 +11,6 @@
ensure_float64,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_float_dtype,
is_integer,
is_integer_dtype,
is_numeric_v_string_like,
Expand Down Expand Up @@ -578,141 +575,6 @@ def clean_reindex_fill_method(method):
return clean_fill_method(method, allow_nearest=True)


def fill_zeros(result, x, y, name, fill):
"""
If this is a reversed op, then flip x,y

If we have an integer value (or array in y)
and we have 0's, fill them with the fill,
return the result.

Mask the nan's from x.
"""
if fill is None or is_float_dtype(result):
return result

if name.startswith(("r", "__r")):
x, y = y, x

is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
is_scalar_type = is_scalar(y)

if not is_variable_type and not is_scalar_type:
return result

if is_scalar_type:
y = np.array(y)

if is_integer_dtype(y):

if (y == 0).any():

# GH 7325, mask and nans must be broadcastable (also: PR 9308)
# Raveling and then reshaping makes np.putmask faster
mask = ((y == 0) & ~np.isnan(result)).ravel()

shape = result.shape
result = result.astype("float64", copy=False).ravel()

np.putmask(result, mask, fill)

# if we have a fill of inf, then sign it correctly
# (GH 6178 and PR 9308)
if np.isinf(fill):
signs = y if name.startswith(("r", "__r")) else x
signs = np.sign(signs.astype("float", copy=False))
negative_inf_mask = (signs.ravel() < 0) & mask
np.putmask(result, negative_inf_mask, -fill)

if "floordiv" in name: # (PR 9308)
nan_mask = ((y == 0) & (x == 0)).ravel()
np.putmask(result, nan_mask, np.nan)

result = result.reshape(shape)

return result


def mask_zero_div_zero(x, y, result, copy=False):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.

Parameters
----------
x : ndarray
y : ndarray
result : ndarray
copy : bool (default False)
Whether to always create a new array or try to fill in the existing
array if possible.

Returns
-------
filled_result : ndarray

Examples
--------
>>> x = np.array([1, 0, -1], dtype=np.int64)
>>> y = 0 # int 0; numpy behavior is different with float
>>> result = x / y
>>> result # raw numpy result does not fill division by zero
array([0, 0, 0])
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if is_scalar(y):
y = np.array(y)

zmask = y == 0
if zmask.any():
shape = result.shape

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = (zmask & (x < 0)).ravel()
posinf_mask = (zmask & (x > 0)).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype("float64", copy=copy).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
np.putmask(result, neginf_mask, -np.inf)

result = result.reshape(shape)

return result


def dispatch_missing(op, left, right, result):
"""
Fill nulls caused by division by zero, casting to a different dtype
if necessary.

Parameters
----------
op : function (operator.add, operator.div, ...)
left : object (Index for non-reversed ops)
right : object (Index fof reversed ops)
result : ndarray

Returns
-------
result : ndarray
"""
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]:
result = mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, opstr, np.nan)
elif op is divmod:
res0 = mask_zero_div_zero(left, right, result[0])
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
result = (res0, res1)
return result


def _interp_limit(invalid, fw_limit, bw_limit):
"""
Get indexers of values that won't be filled
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
import pandas as pd
from pandas._typing import ArrayLike
import pandas.core.common as com
import pandas.core.missing as missing

from . import missing
from .roperator import ( # noqa:F401
radd,
rand_,
Expand Down
165 changes: 165 additions & 0 deletions pandas/core/ops/missing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""
Missing data handling for arithmetic operations.

In particular, pandas conventions regarding divison by zero differ
from numpy in the following ways:
1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
the remaining pairs
(the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).

pandas convention is to return [-inf, nan, inf] for all dtype
combinations.

Note: the numpy behavior described here is py3-specific.

2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
gives precisely the same results as the // operation.

pandas convention is to return [nan, nan, nan] for all dtype
combinations.

3) divmod behavior consistent with 1) and 2).
"""
import operator

import numpy as np

from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar


def fill_zeros(result, x, y, name, fill):
"""
If this is a reversed op, then flip x,y

If we have an integer value (or array in y)
and we have 0's, fill them with the fill,
return the result.

Mask the nan's from x.
"""
if fill is None or is_float_dtype(result):
return result

if name.startswith(("r", "__r")):
x, y = y, x

is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
is_scalar_type = is_scalar(y)

if not is_variable_type and not is_scalar_type:
return result

if is_scalar_type:
y = np.array(y)

if is_integer_dtype(y):

if (y == 0).any():

# GH#7325, mask and nans must be broadcastable (also: GH#9308)
# Raveling and then reshaping makes np.putmask faster
mask = ((y == 0) & ~np.isnan(result)).ravel()

shape = result.shape
result = result.astype("float64", copy=False).ravel()

np.putmask(result, mask, fill)

# if we have a fill of inf, then sign it correctly
# (GH#6178 and GH#9308)
if np.isinf(fill):
signs = y if name.startswith(("r", "__r")) else x
signs = np.sign(signs.astype("float", copy=False))
negative_inf_mask = (signs.ravel() < 0) & mask
np.putmask(result, negative_inf_mask, -fill)

if "floordiv" in name: # (GH#9308)
nan_mask = ((y == 0) & (x == 0)).ravel()
np.putmask(result, nan_mask, np.nan)

result = result.reshape(shape)

return result


def mask_zero_div_zero(x, y, result, copy=False):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.

Parameters
----------
x : ndarray
y : ndarray
result : ndarray
copy : bool (default False)
Whether to always create a new array or try to fill in the existing
array if possible.

Returns
-------
filled_result : ndarray

Examples
--------
>>> x = np.array([1, 0, -1], dtype=np.int64)
>>> y = 0 # int 0; numpy behavior is different with float
>>> result = x / y
>>> result # raw numpy result does not fill division by zero
array([0, 0, 0])
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if is_scalar(y):
y = np.array(y)

zmask = y == 0
if zmask.any():
shape = result.shape

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = (zmask & (x < 0)).ravel()
posinf_mask = (zmask & (x > 0)).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype("float64", copy=copy).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
np.putmask(result, neginf_mask, -np.inf)

result = result.reshape(shape)

return result


def dispatch_missing(op, left, right, result):
"""
Fill nulls caused by division by zero, casting to a different dtype
if necessary.

Parameters
----------
op : function (operator.add, operator.div, ...)
left : object (Index for non-reversed ops)
right : object (Index fof reversed ops)
result : ndarray

Returns
-------
result : ndarray
"""
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
if op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, opstr, np.nan)
elif op is divmod:
res0 = mask_zero_div_zero(left, right, result[0])
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
result = (res0, res1)
return result
3 changes: 1 addition & 2 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,8 +1063,7 @@ def test_replace_series_datetime_tz(self):

# TODO(jreback) commented out to only have a single xfail printed
@pytest.mark.xfail(
reason="different tz, " "currently mask_missing raises SystemError",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a bunch of these appear to have been introduced by black

strict=False,
reason="different tz, currently mask_missing raises SystemError", strict=False
)
# @pytest.mark.parametrize('how', ['dict', 'series'])
# @pytest.mark.parametrize('to_key', [
Expand Down