diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index d9cd2c7be0093..68d08fb476950 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -5,7 +5,7 @@ Offer fast expression evaluation through numexpr """ - +import operator import warnings import numpy as np @@ -15,6 +15,7 @@ from pandas.core.dtypes.generic import ABCDataFrame from pandas.core.computation.check import _NUMEXPR_INSTALLED +from pandas.core.ops import roperator if _NUMEXPR_INSTALLED: import numexpr as ne @@ -120,6 +121,38 @@ def _evaluate_numexpr(op, op_str, a, b): return result +_op_str_mapping = { + operator.add: "+", + roperator.radd: "+", + operator.mul: "*", + roperator.rmul: "*", + operator.sub: "-", + roperator.rsub: "-", + operator.truediv: "/", + roperator.rtruediv: "/", + operator.floordiv: "//", + roperator.rfloordiv: "//", + operator.mod: "%", + roperator.rmod: "%", + operator.pow: "**", + roperator.rpow: "**", + operator.eq: "==", + operator.ne: "!=", + operator.le: "<=", + operator.lt: "<", + operator.ge: ">=", + operator.gt: ">", + operator.and_: "&", + roperator.rand_: "&", + operator.or_: "|", + roperator.ror_: "|", + operator.xor: "^", + roperator.rxor: "^", + divmod: None, + roperator.rdivmod: None, +} + + def _where_standard(cond, a, b): # Caller is responsible for extracting ndarray if necessary return np.where(cond, a, b) @@ -178,23 +211,23 @@ def _bool_arith_check( return True -def evaluate(op, op_str, a, b, use_numexpr=True): +def evaluate(op, a, b, use_numexpr: bool = True): """ Evaluate and return the expression of the op on a and b. Parameters ---------- op : the actual operand - op_str : str - The string version of the op. a : left operand b : right operand use_numexpr : bool, default True Whether to try to use numexpr. """ - use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) - if use_numexpr: - return _evaluate(op, op_str, a, b) + op_str = _op_str_mapping.get(op, None) + if op_str is not None: + use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) + if use_numexpr: + return _evaluate(op, op_str, a, b) # type: ignore return _evaluate_standard(op, op_str, a, b) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 3cbfa280a4e30..d248d8d8298a7 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -179,51 +179,6 @@ def _get_frame_op_default_axis(name): return "columns" -def _get_opstr(op): - """ - Find the operation string, if any, to pass to numexpr for this - operation. - - Parameters - ---------- - op : binary operator - - Returns - ------- - op_str : string or None - """ - return { - operator.add: "+", - radd: "+", - operator.mul: "*", - rmul: "*", - operator.sub: "-", - rsub: "-", - operator.truediv: "/", - rtruediv: "/", - operator.floordiv: "//", - rfloordiv: "//", - operator.mod: "%", - rmod: "%", - operator.pow: "**", - rpow: "**", - operator.eq: "==", - operator.ne: "!=", - operator.le: "<=", - operator.lt: "<", - operator.ge: ">=", - operator.gt: ">", - operator.and_: "&", - rand_: "&", - operator.or_: "|", - ror_: "|", - operator.xor: "^", - rxor: "^", - divmod: None, - rdivmod: None, - }[op] - - def _get_op_name(op, special: bool) -> str: """ Find the name to attach to this method according to conventions @@ -293,7 +248,7 @@ def fill_binop(left, right, fill_value): # Dispatch logic -def dispatch_to_series(left, right, func, str_rep=None, axis=None): +def dispatch_to_series(left, right, func, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -303,7 +258,6 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator - str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns @@ -318,14 +272,14 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): if lib.is_scalar(right) or np.ndim(right) == 0: # Get the appropriate array-op to apply to each block's values. - array_op = get_array_op(func, str_rep=str_rep) + array_op = get_array_op(func) bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) - array_op = get_array_op(func, str_rep=str_rep) + array_op = get_array_op(func) bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) @@ -358,7 +312,7 @@ def column_op(a, b): # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) - new_data = expressions.evaluate(column_op, str_rep, left, right) + new_data = expressions.evaluate(column_op, left, right) return new_data @@ -391,7 +345,6 @@ def _arith_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ - str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @unpack_zerodim_and_defer(op_name) @@ -402,7 +355,7 @@ def wrapper(left, right): lvalues = extract_array(left, extract_numpy=True) rvalues = extract_array(right, extract_numpy=True) - result = arithmetic_op(lvalues, rvalues, op, str_rep) + result = arithmetic_op(lvalues, rvalues, op) return left._construct_result(result, name=res_name) @@ -415,7 +368,6 @@ def _comp_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ - str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @unpack_zerodim_and_defer(op_name) @@ -429,7 +381,7 @@ def wrapper(self, other): lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - res_values = comparison_op(lvalues, rvalues, op, str_rep) + res_values = comparison_op(lvalues, rvalues, op) return self._construct_result(res_values, name=res_name) @@ -490,7 +442,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # DataFrame -def _combine_series_frame(left, right, func, axis: int, str_rep: str): +def _combine_series_frame(left, right, func, axis: int): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the axis argument. @@ -501,7 +453,6 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): right : Series func : binary operator axis : {0, 1} - str_rep : str Returns ------- @@ -520,7 +471,7 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): rvalues = np.broadcast_to(rvalues, left.shape) - array_op = get_array_op(func, str_rep=str_rep) + array_op = get_array_op(func) bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) return type(left)(bm) @@ -679,12 +630,11 @@ def _frame_arith_method_with_reindex( def _arith_method_FRAME(cls, op, special): - str_rep = _get_opstr(op) op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) - na_op = define_na_arithmetic_op(op, str_rep) - is_logical = str_rep in ["&", "|", "^"] + na_op = define_na_arithmetic_op(op) + is_logical = op.__name__.strip("_").lstrip("_") in ["and", "or", "xor"] if op_name in _op_descriptions: # i.e. include "add" but not "__add__" @@ -719,15 +669,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): raise NotImplementedError(f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 - new_data = _combine_series_frame( - self, other, pass_op, axis=axis, str_rep=str_rep - ) + new_data = _combine_series_frame(self, other, pass_op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) - new_data = dispatch_to_series(self, other, op, str_rep) + new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) @@ -737,7 +685,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): def _flex_comp_method_FRAME(cls, op, special): - str_rep = _get_opstr(op) op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) @@ -752,16 +699,14 @@ def f(self, other, axis=default_axis, level=None): if isinstance(other, ABCDataFrame): # Another DataFrame - new_data = dispatch_to_series(self, other, op, str_rep) + new_data = dispatch_to_series(self, other, op) elif isinstance(other, ABCSeries): axis = self._get_axis_number(axis) if axis is not None else 1 - new_data = _combine_series_frame( - self, other, op, axis=axis, str_rep=str_rep - ) + new_data = _combine_series_frame(self, other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` - new_data = dispatch_to_series(self, other, op, str_rep) + new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) @@ -771,7 +716,6 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(cls, op, special): - str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @Appender(f"Wrapper for comparison method {op_name}") @@ -783,7 +727,7 @@ def f(self, other): axis = "columns" # only relevant for Series other case # See GH#4537 for discussion of scalar op behavior - new_data = dispatch_to_series(self, other, op, str_rep, axis=axis) + new_data = dispatch_to_series(self, other, op, axis=axis) return self._construct_result(new_data) f.__name__ = op_name diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index eef42592d2b30..f1d33e1ae61cb 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -5,7 +5,7 @@ from datetime import timedelta from functools import partial import operator -from typing import Any, Optional, Tuple +from typing import Any, Tuple import warnings import numpy as np @@ -121,14 +121,14 @@ def masked_arith_op(x: np.ndarray, y, op): return result -def define_na_arithmetic_op(op, str_rep: Optional[str]): +def define_na_arithmetic_op(op): def na_op(x, y): - return na_arithmetic_op(x, y, op, str_rep) + return na_arithmetic_op(x, y, op) return na_op -def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): +def na_arithmetic_op(left, right, op, is_cmp: bool = False): """ Return the result of evaluating op on the passed in values. @@ -138,7 +138,6 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal ---------- left : np.ndarray right : np.ndarray or scalar - str_rep : str or None is_cmp : bool, default False If this a comparison operation. @@ -153,7 +152,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, left, right) + result = expressions.evaluate(op, left, right) except TypeError: if is_cmp: # numexpr failed on comparison op, e.g. ndarray[float] > datetime @@ -170,7 +169,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal return missing.dispatch_fill_zeros(op, left, right, result) -def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): +def arithmetic_op(left: ArrayLike, right: Any, op): """ Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... @@ -181,7 +180,6 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): Cannot be a DataFrame or Index. Series is *not* excluded. op : {operator.add, operator.sub, ...} Or one of the reversed variants from roperator. - str_rep : str Returns ------- @@ -201,14 +199,12 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): else: with np.errstate(all="ignore"): - res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep) + res_values = na_arithmetic_op(lvalues, rvalues, op) return res_values -def comparison_op( - left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, -) -> ArrayLike: +def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. @@ -260,9 +256,7 @@ def comparison_op( # suppress warnings from numpy about element-wise comparison warnings.simplefilter("ignore", DeprecationWarning) with np.errstate(all="ignore"): - res_values = na_arithmetic_op( - lvalues, rvalues, op, str_rep, is_cmp=True - ) + res_values = na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) return res_values @@ -373,7 +367,7 @@ def fill_bool(x, left=None): return res_values -def get_array_op(op, str_rep: Optional[str] = None): +def get_array_op(op): """ Return a binary array operation corresponding to the given operator op. @@ -381,8 +375,6 @@ def get_array_op(op, str_rep: Optional[str] = None): ---------- op : function Binary operator from operator or roperator module. - str_rep : str or None, default None - str_rep to pass to arithmetic_op Returns ------- @@ -390,11 +382,11 @@ def get_array_op(op, str_rep: Optional[str] = None): """ op_name = op.__name__.strip("_") if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: - return partial(comparison_op, op=op, str_rep=str_rep) + return partial(comparison_op, op=op) elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: return partial(logical_op, op=op) else: - return partial(arithmetic_op, op=op, str_rep=str_rep) + return partial(arithmetic_op, op=op) def maybe_upcast_datetimelike_array(obj: ArrayLike) -> ArrayLike: diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index b0a369ea65c94..2368e93ddc256 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -178,8 +178,8 @@ def testit(): result = expr._can_use_numexpr(op, op_str, left, left, "evaluate") assert result != left._is_mixed_type - result = expr.evaluate(op, op_str, left, left, use_numexpr=True) - expected = expr.evaluate(op, op_str, left, left, use_numexpr=False) + result = expr.evaluate(op, left, left, use_numexpr=True) + expected = expr.evaluate(op, left, left, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) @@ -219,8 +219,8 @@ def testit(): result = expr._can_use_numexpr(op, op_str, left, f12, "evaluate") assert result != left._is_mixed_type - result = expr.evaluate(op, op_str, left, f12, use_numexpr=True) - expected = expr.evaluate(op, op_str, left, f12, use_numexpr=False) + result = expr.evaluate(op, left, f12, use_numexpr=True) + expected = expr.evaluate(op, left, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: