From 8dcad7b11ed90e98289bc496c24d363440082d76 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Sep 2020 15:28:31 -0700 Subject: [PATCH] REF: separate out helpers from iLoc._setitem_with_indexer --- pandas/core/indexing.py | 141 ++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 64 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 51031d9ab1153..64da27a6574a6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -34,7 +34,7 @@ from pandas.core.indexes.api import Index if TYPE_CHECKING: - from pandas import DataFrame # noqa:F401 + from pandas import DataFrame, Series # noqa:F401 # "null slice" _NS = slice(None, None) @@ -1543,13 +1543,10 @@ def _setitem_with_indexer(self, indexer, value): since it goes from positional indexers back to labels when calling BlockManager methods, see GH#12991, GH#22046, GH#15686. """ - - # also has the side effect of consolidating in-place - from pandas import Series - info_axis = self.obj._info_axis_number # maybe partial set + # _is_mixed_type has the side effect of consolidating in-place take_split_path = self.obj._is_mixed_type # if there is only one block/type, still have to take split path @@ -1642,6 +1639,8 @@ def _setitem_with_indexer(self, indexer, value): # align and set the values if take_split_path: + # We have to operate column-wise + # Above we only set take_split_path to True for 2D cases assert self.ndim == 2 assert info_axis == 1 @@ -1682,29 +1681,6 @@ def _setitem_with_indexer(self, indexer, value): pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer - def isetter(loc, v): - # positional setting on column loc - ser = self.obj._ixs(loc, axis=1) - - # perform the equivalent of a setitem on the info axis - # as we have a null slice or a slice with full bounds - # which means essentially reassign to the columns of a - # multi-dim object - # GH6149 (null slice), GH10408 (full bounds) - if isinstance(pi, tuple) and all( - com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) - for idx in pi - ): - ser = v - else: - # set the item, possibly having a dtype change - ser = ser.copy() - ser._mgr = ser._mgr.setitem(indexer=pi, value=v) - ser._maybe_update_cacher(clear=True) - - # reset the sliced object if unique - self.obj._iset_item(loc, ser) - # we need an iterable, with a ndim of at least 1 # eg. don't pass through np.array(0) if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: @@ -1725,7 +1701,7 @@ def isetter(loc, v): else: v = np.nan - isetter(loc, v) + self._setitem_single_column(loc, v, pi) # we have an equal len ndarray/convertible to our labels # hasattr first, to avoid coercing to ndarray without reason. @@ -1744,7 +1720,7 @@ def isetter(loc, v): for i, loc in enumerate(ilocs): # setting with a list, re-coerces - isetter(loc, value[:, i].tolist()) + self._setitem_single_column(loc, value[:, i].tolist(), pi) elif ( len(labels) == 1 @@ -1753,7 +1729,7 @@ def isetter(loc, v): ): # we have an equal len list/ndarray # We only get here with len(labels) == len(ilocs) == 1 - isetter(ilocs[0], value) + self._setitem_single_column(ilocs[0], value, pi) elif lplane_indexer == 0 and len(value) == len(self.obj.index): # We get here in one case via .loc with a all-False mask @@ -1768,50 +1744,87 @@ def isetter(loc, v): ) for loc, v in zip(ilocs, value): - isetter(loc, v) + self._setitem_single_column(loc, v, pi) else: # scalar value for loc in ilocs: - isetter(loc, value) + self._setitem_single_column(loc, value, pi) else: - if isinstance(indexer, tuple): + self._setitem_single_block_inplace(indexer, value) + + def _setitem_single_column(self, loc: int, value, plane_indexer): + # positional setting on column loc + pi = plane_indexer + + ser = self.obj._ixs(loc, axis=1) + + # perform the equivalent of a setitem on the info axis + # as we have a null slice or a slice with full bounds + # which means essentially reassign to the columns of a + # multi-dim object + # GH#6149 (null slice), GH#10408 (full bounds) + if isinstance(pi, tuple) and all( + com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) + for idx in pi + ): + ser = value + else: + # set the item, possibly having a dtype change + ser = ser.copy() + ser._mgr = ser._mgr.setitem(indexer=pi, value=value) + ser._maybe_update_cacher(clear=True) - # if we are setting on the info axis ONLY - # set using those methods to avoid block-splitting - # logic here - if ( - len(indexer) > info_axis - and is_integer(indexer[info_axis]) - and all( - com.is_null_slice(idx) - for i, idx in enumerate(indexer) - if i != info_axis - ) - and item_labels.is_unique - ): - self.obj[item_labels[indexer[info_axis]]] = value - return + # reset the sliced object if unique + self.obj._iset_item(loc, ser) - indexer = maybe_convert_ix(*indexer) + def _setitem_single_block_inplace(self, indexer, value): + """ + _setitem_with_indexer for the case when we have a single Block + and the value can be set into it without casting. + """ + from pandas import Series - if isinstance(value, (ABCSeries, dict)): - # TODO(EA): ExtensionBlock.setitem this causes issues with - # setting for extensionarrays that store dicts. Need to decide - # if it's worth supporting that. - value = self._align_series(indexer, Series(value)) + info_axis = self.obj._info_axis_number + item_labels = self.obj._get_axis(info_axis) - elif isinstance(value, ABCDataFrame): - value = self._align_frame(indexer, value) + if isinstance(indexer, tuple): - # check for chained assignment - self.obj._check_is_chained_assignment_possible() + # if we are setting on the info axis ONLY + # set using those methods to avoid block-splitting + # logic here + if ( + len(indexer) > info_axis + and is_integer(indexer[info_axis]) + and all( + com.is_null_slice(idx) + for i, idx in enumerate(indexer) + if i != info_axis + ) + and item_labels.is_unique + ): + self.obj[item_labels[indexer[info_axis]]] = value + return - # actually do the set - self.obj._consolidate_inplace() - self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) - self.obj._maybe_update_cacher(clear=True) + indexer = maybe_convert_ix(*indexer) + + if isinstance(value, (ABCSeries, dict)): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + + elif isinstance(value, ABCDataFrame): + value = self._align_frame(indexer, value) + + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + + # actually do the set + self.obj._consolidate_inplace() + self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) + self.obj._maybe_update_cacher(clear=True) def _setitem_with_indexer_missing(self, indexer, value): """ @@ -1873,7 +1886,7 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._mgr = self.obj.append(value)._mgr self.obj._maybe_update_cacher(clear=True) - def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False): + def _align_series(self, indexer, ser: "Series", multiindex_indexer: bool = False): """ Parameters ----------