From 8309f1330fa8925c891863cfb61116fed2d1f64d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 6 Mar 2021 17:10:36 -0800 Subject: [PATCH 1/2] REF: Block.putmask dont use split_and_operate --- pandas/core/array_algos/putmask.py | 2 +- pandas/core/internals/blocks.py | 61 +++++++++++------------------- 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index cfa1f59f3d4ca..93046f476c6ba 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -81,7 +81,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: # n should be the length of the mask or a scalar here if not is_list_like(new): - new = np.repeat(new, len(mask)) + new = np.broadcast_to(new, mask.shape) # see if we are only masking values that if putted # will work in the current dtype diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f2b8499a316b7..458c923ac46eb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1061,21 +1061,17 @@ def putmask(self, mask, new) -> List[Block]: ------- List[Block] """ - transpose = self.ndim == 2 + orig_mask = mask mask, noop = validate_putmask(self.values.T, mask) assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) - new_values = self.values # delay copy if possible. # if we are passed a scalar None, convert it here - if not is_list_like(new) and isna(new) and not self.is_object: - # FIXME: make sure we have compatible NA + if not self.is_object and is_valid_na_for_dtype(new, self.dtype): new = self.fill_value if self._can_hold_element(new): - if transpose: - new_values = new_values.T - putmask_without_repeat(new_values, mask, new) + putmask_without_repeat(self.values.T, mask, new) return [self] elif noop: @@ -1083,42 +1079,31 @@ def putmask(self, mask, new) -> List[Block]: dtype, _ = infer_dtype_from(new) if dtype.kind in ["m", "M"]: - # using putmask with object dtype will incorrect cast to object + # using putmask with object dtype will incorrectly cast to object # Having excluded self._can_hold_element, we know we cannot operate # in-place, so we are safe using `where` return self.where(new, ~mask) - else: - # may need to upcast - if transpose: - mask = mask.T - if isinstance(new, np.ndarray): - new = new.T - - # operate column-by-column - def f(mask, val, idx): - - if idx is None: - # ndim==1 case. - n = new - else: - - if isinstance(new, np.ndarray): - n = np.squeeze(new[idx % new.shape[0]]) - else: - n = np.array(new) - - # type of the new block - dtype = find_common_type([n.dtype, val.dtype]) - - # we need to explicitly astype here to make a copy - n = n.astype(dtype) - - nv = putmask_smart(val, mask, n) - return nv + elif self.ndim == 1 or self.shape[0] == 1: + # no need to split columns + nv = putmask_smart(self.values.T, mask, new).T + return [self.make_block(nv)] - new_blocks = self.split_and_operate(mask, f, True) - return new_blocks + else: + is_array = isinstance(new, np.ndarray) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = new + if is_array: + # we have a different value per-column + n = new.T[[i]].T + + submask = orig_mask.T[[i]].T + rbs = nb.putmask(submask, n) + res_blocks.extend(rbs) + return res_blocks @final def coerce_to_target_dtype(self, other) -> Block: From 18d304f6adcf7054ee1169de0e2202e09f235b38 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 6 Mar 2021 17:31:41 -0800 Subject: [PATCH 2/2] simplify --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 458c923ac46eb..e0af81d22c940 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1098,9 +1098,9 @@ def putmask(self, mask, new) -> List[Block]: n = new if is_array: # we have a different value per-column - n = new.T[[i]].T + n = new[:, i : i + 1] - submask = orig_mask.T[[i]].T + submask = orig_mask[:, i : i + 1] rbs = nb.putmask(submask, n) res_blocks.extend(rbs) return res_blocks