Skip to content

REF: get str_rep in numexpr code #34325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 40 additions & 7 deletions pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Offer fast expression evaluation through numexpr

"""

import operator
import warnings

import numpy as np
Expand All @@ -15,6 +15,7 @@
from pandas.core.dtypes.generic import ABCDataFrame

from pandas.core.computation.check import _NUMEXPR_INSTALLED
from pandas.core.ops import roperator

if _NUMEXPR_INSTALLED:
import numexpr as ne
Expand Down Expand Up @@ -120,6 +121,38 @@ def _evaluate_numexpr(op, op_str, a, b):
return result


_op_str_mapping = {
operator.add: "+",
roperator.radd: "+",
operator.mul: "*",
roperator.rmul: "*",
operator.sub: "-",
roperator.rsub: "-",
operator.truediv: "/",
roperator.rtruediv: "/",
operator.floordiv: "//",
roperator.rfloordiv: "//",
operator.mod: "%",
roperator.rmod: "%",
operator.pow: "**",
roperator.rpow: "**",
operator.eq: "==",
operator.ne: "!=",
operator.le: "<=",
operator.lt: "<",
operator.ge: ">=",
operator.gt: ">",
operator.and_: "&",
roperator.rand_: "&",
operator.or_: "|",
roperator.ror_: "|",
operator.xor: "^",
roperator.rxor: "^",
divmod: None,
roperator.rdivmod: None,
}


def _where_standard(cond, a, b):
# Caller is responsible for extracting ndarray if necessary
return np.where(cond, a, b)
Expand Down Expand Up @@ -178,23 +211,23 @@ def _bool_arith_check(
return True


def evaluate(op, op_str, a, b, use_numexpr=True):
def evaluate(op, a, b, use_numexpr: bool = True):
"""
Evaluate and return the expression of the op on a and b.

Parameters
----------
op : the actual operand
op_str : str
The string version of the op.
a : left operand
b : right operand
use_numexpr : bool, default True
Whether to try to use numexpr.
"""
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
if use_numexpr:
return _evaluate(op, op_str, a, b)
op_str = _op_str_mapping.get(op, None)
if op_str is not None:
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
if use_numexpr:
return _evaluate(op, op_str, a, b) # type: ignore
return _evaluate_standard(op, op_str, a, b)


Expand Down
88 changes: 16 additions & 72 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,51 +179,6 @@ def _get_frame_op_default_axis(name):
return "columns"


def _get_opstr(op):
"""
Find the operation string, if any, to pass to numexpr for this
operation.

Parameters
----------
op : binary operator

Returns
-------
op_str : string or None
"""
return {
operator.add: "+",
radd: "+",
operator.mul: "*",
rmul: "*",
operator.sub: "-",
rsub: "-",
operator.truediv: "/",
rtruediv: "/",
operator.floordiv: "//",
rfloordiv: "//",
operator.mod: "%",
rmod: "%",
operator.pow: "**",
rpow: "**",
operator.eq: "==",
operator.ne: "!=",
operator.le: "<=",
operator.lt: "<",
operator.ge: ">=",
operator.gt: ">",
operator.and_: "&",
rand_: "&",
operator.or_: "|",
ror_: "|",
operator.xor: "^",
rxor: "^",
divmod: None,
rdivmod: None,
}[op]


def _get_op_name(op, special: bool) -> str:
"""
Find the name to attach to this method according to conventions
Expand Down Expand Up @@ -293,7 +248,7 @@ def fill_binop(left, right, fill_value):
# Dispatch logic


def dispatch_to_series(left, right, func, str_rep=None, axis=None):
def dispatch_to_series(left, right, func, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Expand All @@ -303,7 +258,6 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
Expand All @@ -318,14 +272,14 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
if lib.is_scalar(right) or np.ndim(right) == 0:

# Get the appropriate array-op to apply to each block's values.
array_op = get_array_op(func, str_rep=str_rep)
array_op = get_array_op(func)
bm = left._mgr.apply(array_op, right=right)
return type(left)(bm)

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

array_op = get_array_op(func, str_rep=str_rep)
array_op = get_array_op(func)
bm = left._mgr.operate_blockwise(right._mgr, array_op)
return type(left)(bm)

Expand Down Expand Up @@ -358,7 +312,7 @@ def column_op(a, b):
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)
new_data = expressions.evaluate(column_op, left, right)
return new_data


Expand Down Expand Up @@ -391,7 +345,6 @@ def _arith_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

@unpack_zerodim_and_defer(op_name)
Expand All @@ -402,7 +355,7 @@ def wrapper(left, right):

lvalues = extract_array(left, extract_numpy=True)
rvalues = extract_array(right, extract_numpy=True)
result = arithmetic_op(lvalues, rvalues, op, str_rep)
result = arithmetic_op(lvalues, rvalues, op)

return left._construct_result(result, name=res_name)

Expand All @@ -415,7 +368,6 @@ def _comp_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

@unpack_zerodim_and_defer(op_name)
Expand All @@ -429,7 +381,7 @@ def wrapper(self, other):
lvalues = extract_array(self, extract_numpy=True)
rvalues = extract_array(other, extract_numpy=True)

res_values = comparison_op(lvalues, rvalues, op, str_rep)
res_values = comparison_op(lvalues, rvalues, op)

return self._construct_result(res_values, name=res_name)

Expand Down Expand Up @@ -490,7 +442,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# DataFrame


def _combine_series_frame(left, right, func, axis: int, str_rep: str):
def _combine_series_frame(left, right, func, axis: int):
"""
Apply binary operator `func` to self, other using alignment and fill
conventions determined by the axis argument.
Expand All @@ -501,7 +453,6 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):
right : Series
func : binary operator
axis : {0, 1}
str_rep : str

Returns
-------
Expand All @@ -520,7 +471,7 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):

rvalues = np.broadcast_to(rvalues, left.shape)

array_op = get_array_op(func, str_rep=str_rep)
array_op = get_array_op(func)
bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"])
return type(left)(bm)

Expand Down Expand Up @@ -679,12 +630,11 @@ def _frame_arith_method_with_reindex(


def _arith_method_FRAME(cls, op, special):
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)

na_op = define_na_arithmetic_op(op, str_rep)
is_logical = str_rep in ["&", "|", "^"]
na_op = define_na_arithmetic_op(op)
is_logical = op.__name__.strip("_").lstrip("_") in ["and", "or", "xor"]

if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
Expand Down Expand Up @@ -719,15 +669,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
raise NotImplementedError(f"fill_value {fill_value} not supported.")

axis = self._get_axis_number(axis) if axis is not None else 1
new_data = _combine_series_frame(
self, other, pass_op, axis=axis, str_rep=str_rep
)
new_data = _combine_series_frame(self, other, pass_op, axis=axis)
else:
# in this case we always have `np.ndim(other) == 0`
if fill_value is not None:
self = self.fillna(fill_value)

new_data = dispatch_to_series(self, other, op, str_rep)
new_data = dispatch_to_series(self, other, op)

return self._construct_result(new_data)

Expand All @@ -737,7 +685,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):


def _flex_comp_method_FRAME(cls, op, special):
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)

Expand All @@ -752,16 +699,14 @@ def f(self, other, axis=default_axis, level=None):

if isinstance(other, ABCDataFrame):
# Another DataFrame
new_data = dispatch_to_series(self, other, op, str_rep)
new_data = dispatch_to_series(self, other, op)

elif isinstance(other, ABCSeries):
axis = self._get_axis_number(axis) if axis is not None else 1
new_data = _combine_series_frame(
self, other, op, axis=axis, str_rep=str_rep
)
new_data = _combine_series_frame(self, other, op, axis=axis)
else:
# in this case we always have `np.ndim(other) == 0`
new_data = dispatch_to_series(self, other, op, str_rep)
new_data = dispatch_to_series(self, other, op)

return self._construct_result(new_data)

Expand All @@ -771,7 +716,6 @@ def f(self, other, axis=default_axis, level=None):


def _comp_method_FRAME(cls, op, special):
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)

@Appender(f"Wrapper for comparison method {op_name}")
Expand All @@ -783,7 +727,7 @@ def f(self, other):

axis = "columns" # only relevant for Series other case
# See GH#4537 for discussion of scalar op behavior
new_data = dispatch_to_series(self, other, op, str_rep, axis=axis)
new_data = dispatch_to_series(self, other, op, axis=axis)
return self._construct_result(new_data)

f.__name__ = op_name
Expand Down
Loading