From 226fd426c622c3a2a1eed83e5662e659877df7b9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 15 Apr 2020 07:57:01 -0700 Subject: [PATCH] REF: simplify broadcasting code --- pandas/core/internals/managers.py | 16 +++++---- pandas/core/ops/__init__.py | 4 ++- pandas/core/ops/array_ops.py | 56 +------------------------------ 3 files changed, 13 insertions(+), 63 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e693341d10a55..04089e81db331 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -390,11 +390,7 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: if f == "where": align_copy = True - aligned_args = { - k: kwargs[k] - for k in align_keys - if isinstance(kwargs[k], (ABCSeries, ABCDataFrame)) - } + aligned_args = {k: kwargs[k] for k in align_keys} for b in self.blocks: @@ -402,8 +398,14 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: b_items = self.items[b.mgr_locs.indexer] for k, obj in aligned_args.items(): - axis = obj._info_axis_number - kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)._values + if isinstance(obj, (ABCSeries, ABCDataFrame)): + axis = obj._info_axis_number + kwargs[k] = obj.reindex( + b_items, axis=axis, copy=align_copy + )._values + else: + # otherwise we have an ndarray + kwargs[k] = obj[b.mgr_locs.indexer] if callable(f): applied = b.apply(f, **kwargs) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index c14c4a311d66c..9a7c9fdadf90d 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -510,11 +510,13 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): if axis == 0: values = right._values if isinstance(values, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs # We can operate block-wise values = values.reshape(-1, 1) + values = np.broadcast_to(values, left.shape) array_op = get_array_op(func, str_rep=str_rep) - bm = left._mgr.apply(array_op, right=values.T) + bm = left._mgr.apply(array_op, right=values.T, align_keys=["right"]) return type(left)(bm) new_data = dispatch_to_series(left, right, func) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 5dd7af454cbd1..a1d853e38e757 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -75,14 +75,7 @@ def masked_arith_op(x: np.ndarray, y, op): result = np.empty(x.size, dtype=dtype) if len(x) != len(y): - if not _can_broadcast(x, y): - raise ValueError(x.shape, y.shape) - - # Call notna on pre-broadcasted y for performance - ymask = notna(y) - y = np.broadcast_to(y, x.shape) - ymask = np.broadcast_to(ymask, x.shape) - + raise ValueError(x.shape, y.shape) else: ymask = notna(y) @@ -211,51 +204,6 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): return res_values -def _broadcast_comparison_op(lvalues, rvalues, op) -> np.ndarray: - """ - Broadcast a comparison operation between two 2D arrays. - - Parameters - ---------- - lvalues : np.ndarray or ExtensionArray - rvalues : np.ndarray or ExtensionArray - - Returns - ------- - np.ndarray[bool] - """ - if isinstance(rvalues, np.ndarray): - rvalues = np.broadcast_to(rvalues, lvalues.shape) - result = comparison_op(lvalues, rvalues, op) - else: - result = np.empty(lvalues.shape, dtype=bool) - for i in range(len(lvalues)): - result[i, :] = comparison_op(lvalues[i], rvalues[:, 0], op) - return result - - -def _can_broadcast(lvalues, rvalues) -> bool: - """ - Check if we can broadcast rvalues to match the shape of lvalues. - - Parameters - ---------- - lvalues : np.ndarray or ExtensionArray - rvalues : np.ndarray or ExtensionArray - - Returns - ------- - bool - """ - # We assume that lengths dont match - if lvalues.ndim == rvalues.ndim == 2: - # See if we can broadcast unambiguously - if lvalues.shape[1] == rvalues.shape[-1]: - if rvalues.shape[0] == 1: - return True - return False - - def comparison_op( left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, ) -> ArrayLike: @@ -287,8 +235,6 @@ def comparison_op( # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): - if _can_broadcast(lvalues, rvalues): - return _broadcast_comparison_op(lvalues, rvalues, op) raise ValueError( "Lengths must match to compare", lvalues.shape, rvalues.shape )