Skip to content

Commit 8794516

Browse files
jbrockmendeljreback
authored andcommitted
REF: ops.missing (#27257)
1 parent 1219b0f commit 8794516

File tree

5 files changed

+169
-142
lines changed

5 files changed

+169
-142
lines changed

pandas/core/indexes/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from pandas.core.indexes.frozen import FrozenList
7070
import pandas.core.missing as missing
7171
from pandas.core.ops import get_op_result_name, make_invalid_op
72+
from pandas.core.ops.missing import dispatch_missing
7273
import pandas.core.sorting as sorting
7374
from pandas.core.strings import StringMethods
7475

@@ -154,7 +155,7 @@ def index_arithmetic_method(self, other):
154155
with np.errstate(all="ignore"):
155156
result = op(values, other)
156157

157-
result = missing.dispatch_missing(op, values, other, result)
158+
result = dispatch_missing(op, values, other, result)
158159

159160
attrs = self._get_attributes_dict()
160161
attrs = self._maybe_update_attributes(attrs)

pandas/core/missing.py

Lines changed: 0 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
"""
22
Routines for filling missing data.
33
"""
4-
import operator
5-
64
import numpy as np
75

86
from pandas._libs import algos, lib
@@ -13,7 +11,6 @@
1311
ensure_float64,
1412
is_datetime64_dtype,
1513
is_datetime64tz_dtype,
16-
is_float_dtype,
1714
is_integer,
1815
is_integer_dtype,
1916
is_numeric_v_string_like,
@@ -578,141 +575,6 @@ def clean_reindex_fill_method(method):
578575
return clean_fill_method(method, allow_nearest=True)
579576

580577

581-
def fill_zeros(result, x, y, name, fill):
582-
"""
583-
If this is a reversed op, then flip x,y
584-
585-
If we have an integer value (or array in y)
586-
and we have 0's, fill them with the fill,
587-
return the result.
588-
589-
Mask the nan's from x.
590-
"""
591-
if fill is None or is_float_dtype(result):
592-
return result
593-
594-
if name.startswith(("r", "__r")):
595-
x, y = y, x
596-
597-
is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
598-
is_scalar_type = is_scalar(y)
599-
600-
if not is_variable_type and not is_scalar_type:
601-
return result
602-
603-
if is_scalar_type:
604-
y = np.array(y)
605-
606-
if is_integer_dtype(y):
607-
608-
if (y == 0).any():
609-
610-
# GH 7325, mask and nans must be broadcastable (also: PR 9308)
611-
# Raveling and then reshaping makes np.putmask faster
612-
mask = ((y == 0) & ~np.isnan(result)).ravel()
613-
614-
shape = result.shape
615-
result = result.astype("float64", copy=False).ravel()
616-
617-
np.putmask(result, mask, fill)
618-
619-
# if we have a fill of inf, then sign it correctly
620-
# (GH 6178 and PR 9308)
621-
if np.isinf(fill):
622-
signs = y if name.startswith(("r", "__r")) else x
623-
signs = np.sign(signs.astype("float", copy=False))
624-
negative_inf_mask = (signs.ravel() < 0) & mask
625-
np.putmask(result, negative_inf_mask, -fill)
626-
627-
if "floordiv" in name: # (PR 9308)
628-
nan_mask = ((y == 0) & (x == 0)).ravel()
629-
np.putmask(result, nan_mask, np.nan)
630-
631-
result = result.reshape(shape)
632-
633-
return result
634-
635-
636-
def mask_zero_div_zero(x, y, result, copy=False):
637-
"""
638-
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
639-
of the numerator or the denominator.
640-
641-
Parameters
642-
----------
643-
x : ndarray
644-
y : ndarray
645-
result : ndarray
646-
copy : bool (default False)
647-
Whether to always create a new array or try to fill in the existing
648-
array if possible.
649-
650-
Returns
651-
-------
652-
filled_result : ndarray
653-
654-
Examples
655-
--------
656-
>>> x = np.array([1, 0, -1], dtype=np.int64)
657-
>>> y = 0 # int 0; numpy behavior is different with float
658-
>>> result = x / y
659-
>>> result # raw numpy result does not fill division by zero
660-
array([0, 0, 0])
661-
>>> mask_zero_div_zero(x, y, result)
662-
array([ inf, nan, -inf])
663-
"""
664-
if is_scalar(y):
665-
y = np.array(y)
666-
667-
zmask = y == 0
668-
if zmask.any():
669-
shape = result.shape
670-
671-
nan_mask = (zmask & (x == 0)).ravel()
672-
neginf_mask = (zmask & (x < 0)).ravel()
673-
posinf_mask = (zmask & (x > 0)).ravel()
674-
675-
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
676-
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
677-
result = result.astype("float64", copy=copy).ravel()
678-
679-
np.putmask(result, nan_mask, np.nan)
680-
np.putmask(result, posinf_mask, np.inf)
681-
np.putmask(result, neginf_mask, -np.inf)
682-
683-
result = result.reshape(shape)
684-
685-
return result
686-
687-
688-
def dispatch_missing(op, left, right, result):
689-
"""
690-
Fill nulls caused by division by zero, casting to a different dtype
691-
if necessary.
692-
693-
Parameters
694-
----------
695-
op : function (operator.add, operator.div, ...)
696-
left : object (Index for non-reversed ops)
697-
right : object (Index fof reversed ops)
698-
result : ndarray
699-
700-
Returns
701-
-------
702-
result : ndarray
703-
"""
704-
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
705-
if op in [operator.truediv, operator.floordiv, getattr(operator, "div", None)]:
706-
result = mask_zero_div_zero(left, right, result)
707-
elif op is operator.mod:
708-
result = fill_zeros(result, left, right, opstr, np.nan)
709-
elif op is divmod:
710-
res0 = mask_zero_div_zero(left, right, result[0])
711-
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
712-
result = (res0, res1)
713-
return result
714-
715-
716578
def _interp_limit(invalid, fw_limit, bw_limit):
717579
"""
718580
Get indexers of values that won't be filled

pandas/core/ops/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
import pandas as pd
5050
from pandas._typing import ArrayLike
5151
import pandas.core.common as com
52-
import pandas.core.missing as missing
5352

53+
from . import missing
5454
from .roperator import ( # noqa:F401
5555
radd,
5656
rand_,

pandas/core/ops/missing.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""
2+
Missing data handling for arithmetic operations.
3+
4+
In particular, pandas conventions regarding divison by zero differ
5+
from numpy in the following ways:
6+
1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
7+
gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
8+
the remaining pairs
9+
(the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
10+
11+
pandas convention is to return [-inf, nan, inf] for all dtype
12+
combinations.
13+
14+
Note: the numpy behavior described here is py3-specific.
15+
16+
2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
17+
gives precisely the same results as the // operation.
18+
19+
pandas convention is to return [nan, nan, nan] for all dtype
20+
combinations.
21+
22+
3) divmod behavior consistent with 1) and 2).
23+
"""
24+
import operator
25+
26+
import numpy as np
27+
28+
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar
29+
30+
31+
def fill_zeros(result, x, y, name, fill):
32+
"""
33+
If this is a reversed op, then flip x,y
34+
35+
If we have an integer value (or array in y)
36+
and we have 0's, fill them with the fill,
37+
return the result.
38+
39+
Mask the nan's from x.
40+
"""
41+
if fill is None or is_float_dtype(result):
42+
return result
43+
44+
if name.startswith(("r", "__r")):
45+
x, y = y, x
46+
47+
is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
48+
is_scalar_type = is_scalar(y)
49+
50+
if not is_variable_type and not is_scalar_type:
51+
return result
52+
53+
if is_scalar_type:
54+
y = np.array(y)
55+
56+
if is_integer_dtype(y):
57+
58+
if (y == 0).any():
59+
60+
# GH#7325, mask and nans must be broadcastable (also: GH#9308)
61+
# Raveling and then reshaping makes np.putmask faster
62+
mask = ((y == 0) & ~np.isnan(result)).ravel()
63+
64+
shape = result.shape
65+
result = result.astype("float64", copy=False).ravel()
66+
67+
np.putmask(result, mask, fill)
68+
69+
# if we have a fill of inf, then sign it correctly
70+
# (GH#6178 and GH#9308)
71+
if np.isinf(fill):
72+
signs = y if name.startswith(("r", "__r")) else x
73+
signs = np.sign(signs.astype("float", copy=False))
74+
negative_inf_mask = (signs.ravel() < 0) & mask
75+
np.putmask(result, negative_inf_mask, -fill)
76+
77+
if "floordiv" in name: # (GH#9308)
78+
nan_mask = ((y == 0) & (x == 0)).ravel()
79+
np.putmask(result, nan_mask, np.nan)
80+
81+
result = result.reshape(shape)
82+
83+
return result
84+
85+
86+
def mask_zero_div_zero(x, y, result, copy=False):
87+
"""
88+
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
89+
of the numerator or the denominator.
90+
91+
Parameters
92+
----------
93+
x : ndarray
94+
y : ndarray
95+
result : ndarray
96+
copy : bool (default False)
97+
Whether to always create a new array or try to fill in the existing
98+
array if possible.
99+
100+
Returns
101+
-------
102+
filled_result : ndarray
103+
104+
Examples
105+
--------
106+
>>> x = np.array([1, 0, -1], dtype=np.int64)
107+
>>> y = 0 # int 0; numpy behavior is different with float
108+
>>> result = x / y
109+
>>> result # raw numpy result does not fill division by zero
110+
array([0, 0, 0])
111+
>>> mask_zero_div_zero(x, y, result)
112+
array([ inf, nan, -inf])
113+
"""
114+
if is_scalar(y):
115+
y = np.array(y)
116+
117+
zmask = y == 0
118+
if zmask.any():
119+
shape = result.shape
120+
121+
nan_mask = (zmask & (x == 0)).ravel()
122+
neginf_mask = (zmask & (x < 0)).ravel()
123+
posinf_mask = (zmask & (x > 0)).ravel()
124+
125+
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
126+
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
127+
result = result.astype("float64", copy=copy).ravel()
128+
129+
np.putmask(result, nan_mask, np.nan)
130+
np.putmask(result, posinf_mask, np.inf)
131+
np.putmask(result, neginf_mask, -np.inf)
132+
133+
result = result.reshape(shape)
134+
135+
return result
136+
137+
138+
def dispatch_missing(op, left, right, result):
139+
"""
140+
Fill nulls caused by division by zero, casting to a different dtype
141+
if necessary.
142+
143+
Parameters
144+
----------
145+
op : function (operator.add, operator.div, ...)
146+
left : object (Index for non-reversed ops)
147+
right : object (Index fof reversed ops)
148+
result : ndarray
149+
150+
Returns
151+
-------
152+
result : ndarray
153+
"""
154+
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
155+
if op is operator.floordiv:
156+
# Note: no need to do this for truediv; in py3 numpy behaves the way
157+
# we want.
158+
result = mask_zero_div_zero(left, right, result)
159+
elif op is operator.mod:
160+
result = fill_zeros(result, left, right, opstr, np.nan)
161+
elif op is divmod:
162+
res0 = mask_zero_div_zero(left, right, result[0])
163+
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
164+
result = (res0, res1)
165+
return result

pandas/tests/indexing/test_coercion.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,8 +1063,7 @@ def test_replace_series_datetime_tz(self):
10631063

10641064
# TODO(jreback) commented out to only have a single xfail printed
10651065
@pytest.mark.xfail(
1066-
reason="different tz, " "currently mask_missing raises SystemError",
1067-
strict=False,
1066+
reason="different tz, currently mask_missing raises SystemError", strict=False
10681067
)
10691068
# @pytest.mark.parametrize('how', ['dict', 'series'])
10701069
# @pytest.mark.parametrize('to_key', [

0 commit comments

Comments
 (0)