From c454025af9d96882609209a73f04be9a8fa82134 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Feb 2021 11:45:23 +0100 Subject: [PATCH 1/5] REF: move actual lookup and dispatch to array_op from frame into internals --- pandas/core/frame.py | 38 ++++++++----------------------- pandas/core/internals/managers.py | 32 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 63d238da12101..3c7b8c7f9db44 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6091,14 +6091,10 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): ------- DataFrame """ - # Get the appropriate array-op to apply to each column/block's values. - array_op = ops.get_array_op(func) - right = lib.item_from_zerodim(right) if not is_list_like(right): # i.e. scalar, faster than checking np.ndim(right) == 0 - bm = self._mgr.apply(array_op, right=right) - return type(self)(bm) + bm = self._mgr.operate_scalar(right, func) elif isinstance(right, DataFrame): assert self.index.equals(right.index) @@ -6108,37 +6104,23 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): # _frame_arith_method_with_reindex # TODO operate_blockwise expects a manager of the same type - bm = self._mgr.operate_blockwise( - right._mgr, array_op # type: ignore[arg-type] - ) - return type(self)(bm) - - elif isinstance(right, Series) and axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(self.columns) - - right = right._values - # maybe_align_as_frame ensures we do not have an ndarray here - assert not isinstance(right, np.ndarray) - - arrays = [ - array_op(_left, _right) - for _left, _right in zip(self._iter_column_arrays(), right) - ] + bm = self._mgr.operate_manager(right._mgr, func) # type: ignore[arg-type] elif isinstance(right, Series): - assert right.index.equals(self.index) # Handle other cases later - right = right._values + if axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + else: + assert right.index.equals(self.index) # Handle other cases later - arrays = [array_op(left, right) for left in self._iter_column_arrays()] + right = right._values + bm = self._mgr.operate_array(right, func, axis) else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) - return type(self)._from_arrays( - arrays, self.columns, self.index, verify_integrity=False - ) + return type(self)(bm) def _combine_frame(self, other: DataFrame, func, fill_value=None): # at this point we have `self._indexed_same(other)` diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1c45b39ba990a..4108ea0a07e05 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -35,6 +35,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries from pandas.core.dtypes.missing import array_equals, isna +from pandas.core import ops import pandas.core.algorithms as algos from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import extract_array @@ -366,6 +367,37 @@ def reduce( new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) return new_mgr, indexer + def operate_scalar(self, other, op) -> BlockManager: + """ + TODO fill in + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(op) + return self.apply(array_op, right=other) + + def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager: + """ + TODO fill in + """ + array_op = ops.get_array_op(op) + if axis == 1: + arrays = [ + array_op(self.iget_values(i), _right) for i, _right in enumerate(other) + ] + else: + arrays = [ + array_op(self.iget_values(i), other) for i in range(len(self.items)) + ] + + return create_block_manager_from_arrays(arrays, self.axes[0], self.axes) + + def operate_manager(self, other: BlockManager, op) -> BlockManager: + """ + TODO fill in + """ + array_op = ops.get_array_op(op) + return self.operate_blockwise(other, array_op) + def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: """ Apply array_op blockwise with another (aligned) BlockManager. From c6ed35789daca7be6e7fdc177cfefb672c1be25f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 15:21:37 +0100 Subject: [PATCH 2/5] update comments and docstrings --- pandas/core/frame.py | 4 +-- pandas/core/internals/managers.py | 54 ++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3c7b8c7f9db44..67053b4ef96eb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6103,7 +6103,7 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): # fails in cases with empty columns reached via # _frame_arith_method_with_reindex - # TODO operate_blockwise expects a manager of the same type + # TODO operate_manager expects a manager of the same type bm = self._mgr.operate_manager(right._mgr, func) # type: ignore[arg-type] elif isinstance(right, Series): @@ -6111,7 +6111,7 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): # axis=1 means we want to operate row-by-row assert right.index.equals(self.columns) else: - assert right.index.equals(self.index) # Handle other cases later + assert right.index.equals(self.index) right = right._values bm = self._mgr.operate_array(right, func, axis) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 4108ea0a07e05..17bcb7ed546bc 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -20,7 +20,7 @@ import numpy as np from pandas._libs import internals as libinternals, lib -from pandas._typing import ArrayLike, Dtype, DtypeObj, Shape +from pandas._typing import ArrayLike, Dtype, DtypeObj, Scalar, Shape from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg @@ -367,9 +367,18 @@ def reduce( new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) return new_mgr, indexer - def operate_scalar(self, other, op) -> BlockManager: + def operate_scalar(self, other: Scalar, op) -> BlockManager: """ - TODO fill in + Element-wise (arithmetic/comparison/logical) operation with other scalar. + + Parameters + ---------- + other : scalar + op : operator function (eg ``operator.add``) + + Returns + ------- + BlockManager """ # Get the appropriate array-op to apply to each column/block's values. array_op = ops.get_array_op(op) @@ -377,14 +386,32 @@ def operate_scalar(self, other, op) -> BlockManager: def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager: """ - TODO fill in + Element-wise (arithmetic/comparison/logical) operation with other array. + + The array is already checked to be of the correct length. + + Parameters + ---------- + other : np.ndarray or ExtensionArray + op : operator function (eg ``operator.add``) + axis : int + Whether to match the array on the index and broadcast along the + columns (axis=0) or match the array on the columns and broadcast + along the rows (axis=1). + + Returns + ------- + BlockManager """ array_op = ops.get_array_op(op) if axis == 1: + # match on the columns -> operate on each column array with single + # element from other array arrays = [ array_op(self.iget_values(i), _right) for i, _right in enumerate(other) ] else: + # match on the rows -> operate for each column array with full other array arrays = [ array_op(self.iget_values(i), other) for i in range(len(self.items)) ] @@ -393,15 +420,20 @@ def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager: def operate_manager(self, other: BlockManager, op) -> BlockManager: """ - TODO fill in - """ - array_op = ops.get_array_op(op) - return self.operate_blockwise(other, array_op) + Element-wise (arithmetic/comparison/logical) operation with other BlockManager. - def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: - """ - Apply array_op blockwise with another (aligned) BlockManager. + The other BlockManager is already aligned with `self`. + + Parameters + ---------- + other : BlockManager + op : operator function (eg ``operator.add``) + + Returns + ------- + BlockManager """ + array_op = ops.get_array_op(op) return operate_blockwise(self, other, array_op) def apply( From 1f8622e0d7f0657e9efd27d8e04637a1d4088c97 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 15:42:38 +0100 Subject: [PATCH 3/5] add ArrayManager versions --- pandas/core/internals/array_manager.py | 69 ++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index a8493e647f39a..e97464a473c3d 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import algos as libalgos, lib -from pandas._typing import ArrayLike, DtypeObj, Hashable +from pandas._typing import ArrayLike, DtypeObj, Hashable, Scalar from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar @@ -22,6 +22,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.dtypes.missing import array_equals, isna +from pandas.core import ops import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype @@ -187,11 +188,73 @@ def reduce( indexer = np.arange(self.shape[0]) return new_mgr, indexer - def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: + def operate_scalar(self, other: Scalar, op) -> ArrayManager: """ - Apply array_op blockwise with another (aligned) BlockManager. + Element-wise (arithmetic/comparison/logical) operation with other scalar. + + Parameters + ---------- + other : scalar + op : operator function (eg ``operator.add``) + + Returns + ------- + ArrayManager + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(op) + result_arrays = [array_op(left, other) for left in self.arrays] + return type(self)(result_arrays, self._axes) + + def operate_array(self, other: ArrayLike, op, axis: int) -> ArrayManager: + """ + Element-wise (arithmetic/comparison/logical) operation with other array. + + The array is already checked to be of the correct length. + + Parameters + ---------- + other : np.ndarray or ExtensionArray + op : operator function (eg ``operator.add``) + axis : int + Whether to match the array on the index and broadcast along the + columns (axis=0) or match the array on the columns and broadcast + along the rows (axis=1). + + Returns + ------- + ArrayManager + """ + array_op = ops.get_array_op(op) + if axis == 1: + # match on the columns -> operate on each column array with single + # element from other array + result_arrays = [ + array_op(left, right_scalar) + for left, right_scalar in zip(self.arrays, other) + ] + else: + # match on the rows -> operate for each column array with full other array + result_arrays = [array_op(left, other) for left in self.arrays] + return type(self)(result_arrays, self._axes) + + def operate_manager(self, other: ArrayManager, op) -> ArrayManager: + """ + Element-wise (arithmetic/comparison/logical) operation with other ArrayManager. + + The other ArrayManager is already aligned with `self`. + + Parameters + ---------- + other : ArrayManager + op : operator function (eg ``operator.add``) + + Returns + ------- + ArrayManager """ # TODO what if `other` is BlockManager ? + array_op = ops.get_array_op(op) left_arrays = self.arrays right_arrays = other.arrays result_arrays = [ From a487f0fcf92000a8a04658c58e4bea5315e24b72 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 18:57:31 +0100 Subject: [PATCH 4/5] single index assert for axis=0/1 --- pandas/core/frame.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67053b4ef96eb..84da36258aadc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6076,7 +6076,7 @@ def _arith_method(self, other, op): _logical_method = _arith_method - def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): + def _dispatch_frame_op(self, right, func, axis: int = 0): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -6107,12 +6107,7 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): bm = self._mgr.operate_manager(right._mgr, func) # type: ignore[arg-type] elif isinstance(right, Series): - if axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(self.columns) - else: - assert right.index.equals(self.index) - + assert right.index.equals(self._get_axis(axis)) right = right._values bm = self._mgr.operate_array(right, func, axis) From 9fe7f447765c93fc0b575806a94907b7a1e549c5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 19:02:02 +0100 Subject: [PATCH 5/5] update docstring --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 84da36258aadc..d15c73479273a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6085,7 +6085,8 @@ def _dispatch_frame_op(self, right, func, axis: int = 0): ---------- right : scalar, Series, or DataFrame func : arithmetic or comparison operator - axis : {None, 0, 1} + axis : {0, 1} + Only relevant if `right` is a Series. Returns -------