From d51233617b1cad5d7218c43e68c7c7f2885cfa51 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 15 Aug 2019 08:42:03 -0700 Subject: [PATCH] implement array_ops --- pandas/core/arrays/datetimes.py | 4 +- pandas/core/indexes/base.py | 2 +- pandas/core/ops/__init__.py | 123 ++----------------------------- pandas/core/ops/array_ops.py | 127 ++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 121 deletions(-) create mode 100644 pandas/core/ops/array_ops.py diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1aad130d9a3f5..093334a815938 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -195,11 +195,11 @@ def wrapper(self, other): return invalid_comparison(self, other, op) if is_object_dtype(other): - # We have to use _comp_method_OBJECT_ARRAY instead of numpy + # We have to use comp_method_OBJECT_ARRAY instead of numpy # comparison otherwise it would fail to raise when # comparing tz-aware and tz-naive with np.errstate(all="ignore"): - result = ops._comp_method_OBJECT_ARRAY( + result = ops.comp_method_OBJECT_ARRAY( op, self.astype(object), other ) o_mask = isna(other) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5f17dde01d2c4..b983117478c61 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -109,7 +109,7 @@ def cmp_method(self, other): elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all="ignore"): - result = ops._comp_method_OBJECT_ARRAY(op, self.values, other) + result = ops.comp_method_OBJECT_ARRAY(op, self.values, other) else: with np.errstate(all="ignore"): diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 26672593f98fb..dbcf09a401f27 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -13,11 +13,7 @@ from pandas.errors import NullFrequencyError from pandas.util._decorators import Appender -from pandas.core.dtypes.cast import ( - construct_1d_object_array_from_listlike, - find_common_type, - maybe_upcast_putmask, -) +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( ensure_object, is_bool_dtype, @@ -29,7 +25,6 @@ is_integer_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_scalar, is_timedelta64_dtype, ) @@ -37,7 +32,6 @@ ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, - ABCIndex, ABCIndexClass, ABCSeries, ABCSparseSeries, @@ -47,7 +41,7 @@ import pandas as pd from pandas._typing import ArrayLike from pandas.core.construction import array, extract_array -from pandas.core.ops import missing +from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY, define_na_arithmetic_op from pandas.core.ops.docstrings import ( _arith_doc_FRAME, _flex_comp_doc_FRAME, @@ -398,63 +392,6 @@ def mask_cmp_op(x, y, op): return result -def masked_arith_op(x, y, op): - """ - If the given arithmetic operation fails, attempt it again on - only the non-null elements of the input array(s). - - Parameters - ---------- - x : np.ndarray - y : np.ndarray, Series, Index - op : binary operator - """ - # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes - # the logic valid for both Series and DataFrame ops. - xrav = x.ravel() - assert isinstance(x, np.ndarray), type(x) - if isinstance(y, np.ndarray): - dtype = find_common_type([x.dtype, y.dtype]) - result = np.empty(x.size, dtype=dtype) - - # PeriodIndex.ravel() returns int64 dtype, so we have - # to work around that case. See GH#19956 - yrav = y if is_period_dtype(y) else y.ravel() - mask = notna(xrav) & notna(yrav) - - if yrav.shape != mask.shape: - # FIXME: GH#5284, GH#5035, GH#19448 - # Without specifically raising here we get mismatched - # errors in Py3 (TypeError) vs Py2 (ValueError) - # Note: Only = an issue in DataFrame case - raise ValueError("Cannot broadcast operands together.") - - if mask.any(): - with np.errstate(all="ignore"): - result[mask] = op(xrav[mask], yrav[mask]) - - else: - assert is_scalar(y), type(y) - assert isinstance(x, np.ndarray), type(x) - # mask is only meaningful for x - result = np.empty(x.size, dtype=x.dtype) - mask = notna(xrav) - - # 1 ** np.nan is 1. So we have to unmask those. - if op == pow: - mask = np.where(x == 1, False, mask) - elif op == rpow: - mask = np.where(y == 1, False, mask) - - if mask.any(): - with np.errstate(all="ignore"): - result[mask] = op(xrav[mask], y) - - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) - result = result.reshape(x.shape) # 2D compat - return result - - # ----------------------------------------------------------------------------- # Dispatch logic @@ -673,33 +610,7 @@ def _arith_method_SERIES(cls, op, special): _construct_divmod_result if op in [divmod, rdivmod] else _construct_result ) - def na_op(x, y): - """ - Return the result of evaluating op on the passed in values. - - If native types are not compatible, try coersion to object dtype. - - Parameters - ---------- - x : array-like - y : array-like or scalar - - Returns - ------- - array-like - - Raises - ------ - TypeError : invalid operation - """ - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) - except TypeError: - result = masked_arith_op(x, y, op) - - return missing.dispatch_fill_zeros(op, x, y, result) + na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs) def wrapper(left, right): if isinstance(right, ABCDataFrame): @@ -735,22 +646,6 @@ def wrapper(left, right): return wrapper -def _comp_method_OBJECT_ARRAY(op, x, y): - if isinstance(y, list): - y = construct_1d_object_array_from_listlike(y) - if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): - if not is_object_dtype(y.dtype): - y = y.astype(np.object_) - - if isinstance(y, (ABCSeries, ABCIndex)): - y = y.values - - result = libops.vec_compare(x, y, op) - else: - result = libops.scalar_compare(x, y, op) - return result - - def _comp_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid @@ -764,7 +659,7 @@ def na_op(x, y): # Extension Dtypes are not called here if is_object_dtype(x.dtype): - result = _comp_method_OBJECT_ARRAY(op, x, y) + result = comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): return invalid_comparison(x, y, op) @@ -1091,15 +986,7 @@ def _arith_method_FRAME(cls, op, special): eval_kwargs = _gen_eval_kwargs(op_name) default_axis = _get_frame_op_default_axis(op_name) - def na_op(x, y): - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) - except TypeError: - result = masked_arith_op(x, y, op) - - return missing.dispatch_fill_zeros(op, x, y, result) + na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs) if op_name in _op_descriptions: # i.e. include "add" but not "__add__" diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py new file mode 100644 index 0000000000000..a3bfb2e10bb66 --- /dev/null +++ b/pandas/core/ops/array_ops.py @@ -0,0 +1,127 @@ +""" +Functions for arithmetic and comparison operations on NumPy arrays and +ExtensionArrays. +""" +import numpy as np + +from pandas._libs import ops as libops + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + find_common_type, + maybe_upcast_putmask, +) +from pandas.core.dtypes.common import is_object_dtype, is_period_dtype, is_scalar +from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.missing import notna + +from pandas.core.ops import missing +from pandas.core.ops.roperator import rpow + + +def comp_method_OBJECT_ARRAY(op, x, y): + if isinstance(y, list): + y = construct_1d_object_array_from_listlike(y) + + # TODO: Should the checks below be ABCIndexClass? + if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + # TODO: should this be ABCIndexClass?? + if not is_object_dtype(y.dtype): + y = y.astype(np.object_) + + if isinstance(y, (ABCSeries, ABCIndex)): + y = y.values + + result = libops.vec_compare(x, y, op) + else: + result = libops.scalar_compare(x, y, op) + return result + + +def masked_arith_op(x, y, op): + """ + If the given arithmetic operation fails, attempt it again on + only the non-null elements of the input array(s). + + Parameters + ---------- + x : np.ndarray + y : np.ndarray, Series, Index + op : binary operator + """ + # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes + # the logic valid for both Series and DataFrame ops. + xrav = x.ravel() + assert isinstance(x, np.ndarray), type(x) + if isinstance(y, np.ndarray): + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + + # PeriodIndex.ravel() returns int64 dtype, so we have + # to work around that case. See GH#19956 + yrav = y if is_period_dtype(y) else y.ravel() + mask = notna(xrav) & notna(yrav) + + if yrav.shape != mask.shape: + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + # Note: Only = an issue in DataFrame case + raise ValueError("Cannot broadcast operands together.") + + if mask.any(): + with np.errstate(all="ignore"): + result[mask] = op(xrav[mask], yrav[mask]) + + else: + assert is_scalar(y), type(y) + assert isinstance(x, np.ndarray), type(x) + # mask is only meaningful for x + result = np.empty(x.size, dtype=x.dtype) + mask = notna(xrav) + + # 1 ** np.nan is 1. So we have to unmask those. + if op == pow: + mask = np.where(x == 1, False, mask) + elif op == rpow: + mask = np.where(y == 1, False, mask) + + if mask.any(): + with np.errstate(all="ignore"): + result[mask] = op(xrav[mask], y) + + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) + result = result.reshape(x.shape) # 2D compat + return result + + +def define_na_arithmetic_op(op, str_rep, eval_kwargs): + def na_op(x, y): + """ + Return the result of evaluating op on the passed in values. + + If native types are not compatible, try coersion to object dtype. + + Parameters + ---------- + x : array-like + y : array-like or scalar + + Returns + ------- + array-like + + Raises + ------ + TypeError : invalid operation + """ + import pandas.core.computation.expressions as expressions + + try: + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) + except TypeError: + result = masked_arith_op(x, y, op) + + return missing.dispatch_fill_zeros(op, x, y, result) + + return na_op