From 695e2a82f444aee1a9fe95bc07cba6d0f5994291 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Mar 2020 10:48:37 -0700 Subject: [PATCH 1/2] TYP: require _update_inplace gets BlockManager --- pandas/core/base.py | 15 ++------------- pandas/core/frame.py | 4 ++-- pandas/core/generic.py | 13 ++++++++----- pandas/core/indexes/base.py | 4 ---- pandas/core/series.py | 20 +++++++++++--------- 5 files changed, 23 insertions(+), 33 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 9ff0d60b9cd6a..547d9aa47cd93 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -13,7 +13,6 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc -from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -1505,18 +1504,14 @@ def factorize(self, sort=False, na_sentinel=-1): def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) - def drop_duplicates(self, keep="first", inplace=False): - inplace = validate_bool_kwarg(inplace, "inplace") + def drop_duplicates(self, keep="first"): if isinstance(self, ABCIndexClass): if self.is_unique: return self._shallow_copy() duplicated = self.duplicated(keep=keep) result = self[np.logical_not(duplicated)] - if inplace: - return self._update_inplace(result) - else: - return result + return result def duplicated(self, keep="first"): if isinstance(self, ABCIndexClass): @@ -1527,9 +1522,3 @@ def duplicated(self, keep="first"): return self._constructor( duplicated(self, keep=keep), index=self.index ).__finalize__(self) - - # ---------------------------------------------------------------------- - # abstracts - - def _update_inplace(self, result, verify_is_copy=True, **kwargs): - raise AbstractMethodError(self) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fbe171a1dade..260c1210f93c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3049,7 +3049,7 @@ def query(self, expr, inplace=False, **kwargs): new_data = self[res] if inplace: - self._update_inplace(new_data) + self._update_inplace(new_data._data) else: return new_data @@ -4629,7 +4629,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): result = self.loc(axis=axis)[mask] if inplace: - self._update_inplace(result) + self._update_inplace(result._data) else: return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fef292306e741..30f051374fe66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3892,7 +3892,7 @@ def drop( obj = obj._drop_axis(labels, axis, level=level, errors=errors) if inplace: - self._update_inplace(obj) + self._update_inplace(obj._data) else: return obj @@ -3951,12 +3951,15 @@ def _drop_axis( return result - def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + def _update_inplace( + self, result: BlockManager, verify_is_copy: bool_t = True + ) -> None: """ Replace self internals with result. Parameters ---------- + result : BlockManager verify_is_copy : bool, default True Provide is_copy checks. """ @@ -3965,7 +3968,7 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: self._reset_cache() self._clear_item_cache() - self._data = getattr(result, "_data", result) + self._data = result self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: @@ -6107,7 +6110,7 @@ def fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) elif isinstance(value, ABCDataFrame) and self.ndim == 2: - new_data = self.where(self.notna(), value) + new_data = self.where(self.notna(), value)._data else: raise ValueError(f"invalid fill value with a {type(value)}") @@ -7244,7 +7247,7 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): result[mask] = np.nan if inplace: - self._update_inplace(result) + self._update_inplace(result._data) else: return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f6a422180b0df..e1f659ad268ba 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -534,10 +534,6 @@ def _shallow_copy_with_infer(self, values, **kwargs): attributes.pop("tz", None) return Index(values, **attributes) - def _update_inplace(self, result, **kwargs): - # guard when called from IndexOpsMixin - raise TypeError("Index can't be updated inplace") - def is_(self, other) -> bool: """ More flexible, faster check like ``is`` but that works through views. diff --git a/pandas/core/series.py b/pandas/core/series.py index 39e1178a3a5c3..e5dbfd46bff5e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -418,10 +418,6 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: # The ensure_index call aabove ensures we have an Index object self._data.set_axis(axis, labels) - def _update_inplace(self, result, **kwargs): - # we want to call the generic version and not the IndexOpsMixin - return generic.NDFrame._update_inplace(self, result, **kwargs) - # ndarray compatibility @property def dtype(self) -> DtypeObj: @@ -1809,7 +1805,7 @@ def unique(self): result = super().unique() return result - def drop_duplicates(self, keep="first", inplace=False) -> "Series": + def drop_duplicates(self, keep="first", inplace=False) -> Optional["Series"]: """ Return Series with duplicate values removed. @@ -1884,7 +1880,13 @@ def drop_duplicates(self, keep="first", inplace=False) -> "Series": 5 hippo Name: animal, dtype: object """ - return super().drop_duplicates(keep=keep, inplace=inplace) + inplace = validate_bool_kwarg(inplace, "inplace") + result = super().drop_duplicates(keep=keep) + if inplace: + self._update_inplace(result._data) + return None + else: + return result def duplicated(self, keep="first") -> "Series": """ @@ -3001,7 +3003,7 @@ def _try_kind_sort(arr): result.index = ibase.default_index(len(sorted_index)) if inplace: - self._update_inplace(result) + self._update_inplace(result._data) else: return result.__finalize__(self) @@ -3179,7 +3181,7 @@ def sort_index( result.index = ibase.default_index(len(result)) if inplace: - self._update_inplace(result) + self._update_inplace(result._data) else: return result.__finalize__(self) @@ -4509,7 +4511,7 @@ def dropna(self, axis=0, inplace=False, how=None): if self._can_hold_na: result = remove_na_arraylike(self) if inplace: - self._update_inplace(result) + self._update_inplace(result._data) else: return result else: From 1f11fa8bc1ec5d69dbc144785994bf17e1805d74 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Mar 2020 14:38:33 -0700 Subject: [PATCH 2/2] standardize on requiring Series/DataFrame, never BlockManager --- pandas/core/frame.py | 38 ++++++++++++++---------------- pandas/core/generic.py | 47 ++++++++++++++++++-------------------- pandas/core/ops/methods.py | 2 +- pandas/core/series.py | 8 +++---- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 260c1210f93c0..e80e085bc9f7c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3042,16 +3042,16 @@ def query(self, expr, inplace=False, **kwargs): res = self.eval(expr, **kwargs) try: - new_data = self.loc[res] + result = self.loc[res] except ValueError: # when res is multi-dimensional loc raises, but this is sometimes a # valid query - new_data = self[res] + result = self[res] if inplace: - self._update_inplace(new_data._data) + self._update_inplace(result) else: - return new_data + return result def eval(self, expr, inplace=False, **kwargs): """ @@ -4629,7 +4629,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): result = self.loc(axis=axis)[mask] if inplace: - self._update_inplace(result._data) + self._update_inplace(result) else: return result @@ -4678,22 +4678,16 @@ def drop_duplicates( inplace = validate_bool_kwarg(inplace, "inplace") duplicated = self.duplicated(subset, keep=keep) - if inplace: - (inds,) = np.asarray(-duplicated).nonzero() - new_data = self._data.take(inds) + result = self[-duplicated] + if ignore_index: + result.index = range(len(result)) - if ignore_index: - new_data.axes[1] = ibase.default_index(len(inds)) - self._update_inplace(new_data) + if inplace: + self._update_inplace(result) + return None else: - result = self[-duplicated] - - if ignore_index: - result.index = ibase.default_index(len(result)) return result - return None - def duplicated( self, subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, @@ -4808,10 +4802,11 @@ def sort_values( if ignore_index: new_data.axes[1] = ibase.default_index(len(indexer)) + result = self._constructor(new_data) if inplace: - return self._update_inplace(new_data) + return self._update_inplace(result) else: - return self._constructor(new_data).__finalize__(self) + return result.__finalize__(self) def sort_index( self, @@ -4943,10 +4938,11 @@ def sort_index( if ignore_index: new_data.axes[1] = ibase.default_index(len(indexer)) + result = self._constructor(new_data) if inplace: - return self._update_inplace(new_data) + return self._update_inplace(result) else: - return self._constructor(new_data).__finalize__(self) + return result.__finalize__(self) def value_counts( self, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 30f051374fe66..63b9bfed00baf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -147,7 +147,7 @@ def _single_replace(self, to_replace, method, inplace, limit): result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) if inplace: - self._update_inplace(result._data) + self._update_inplace(result) return return result @@ -988,7 +988,7 @@ def rename( result._clear_item_cache() if inplace: - self._update_inplace(result._data) + self._update_inplace(result) return None else: return result.__finalize__(self) @@ -3892,7 +3892,7 @@ def drop( obj = obj._drop_axis(labels, axis, level=level, errors=errors) if inplace: - self._update_inplace(obj._data) + self._update_inplace(obj) else: return obj @@ -3951,24 +3951,21 @@ def _drop_axis( return result - def _update_inplace( - self, result: BlockManager, verify_is_copy: bool_t = True - ) -> None: + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: """ Replace self internals with result. Parameters ---------- - result : BlockManager + result : same type as self verify_is_copy : bool, default True Provide is_copy checks. """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. - self._reset_cache() self._clear_item_cache() - self._data = result + self._data = result._data self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: @@ -6114,11 +6111,11 @@ def fillna( else: raise ValueError(f"invalid fill value with a {type(value)}") + result = self._constructor(new_data) if inplace: - self._update_inplace(new_data) - return None + return self._update_inplace(result) else: - return self._constructor(new_data).__finalize__(self) + return result.__finalize__(self) def ffill( self: FrameOrSeries, @@ -6630,10 +6627,11 @@ def replace( f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' ) + result = self._constructor(new_data) if inplace: - self._update_inplace(new_data) + return self._update_inplace(result) else: - return self._constructor(new_data).__finalize__(self) + return result.__finalize__(self) _shared_docs[ "interpolate" @@ -6904,15 +6902,13 @@ def interpolate( **kwargs, ) + result = self._constructor(new_data) + if axis == 1: + result = result.T if inplace: - if axis == 1: - new_data = self._constructor(new_data).T._data - self._update_inplace(new_data) + return self._update_inplace(result) else: - res = self._constructor(new_data).__finalize__(self) - if axis == 1: - res = res.T - return res + return result.__finalize__(self) # ---------------------------------------------------------------------- # Timeseries methods Methods @@ -7247,7 +7243,7 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): result[mask] = np.nan if inplace: - self._update_inplace(result._data) + return self._update_inplace(result) else: return result @@ -8657,7 +8653,8 @@ def _where( new_data = self._data.putmask( mask=cond, new=other, align=align, axis=block_axis, ) - self._update_inplace(new_data) + result = self._constructor(new_data) + return self._update_inplace(result) else: new_data = self._data.where( @@ -8668,8 +8665,8 @@ def _where( try_cast=try_cast, axis=block_axis, ) - - return self._constructor(new_data).__finalize__(self) + result = self._constructor(new_data) + return result.__finalize__(self) _shared_docs[ "where" diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 0cf1ac4d107f6..7c63bc43de67e 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -98,7 +98,7 @@ def f(self, other): # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace( - result.reindex_like(self, copy=False)._data, verify_is_copy=False + result.reindex_like(self, copy=False), verify_is_copy=False ) return self diff --git a/pandas/core/series.py b/pandas/core/series.py index e5dbfd46bff5e..bb849fd91395c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1883,7 +1883,7 @@ def drop_duplicates(self, keep="first", inplace=False) -> Optional["Series"]: inplace = validate_bool_kwarg(inplace, "inplace") result = super().drop_duplicates(keep=keep) if inplace: - self._update_inplace(result._data) + self._update_inplace(result) return None else: return result @@ -3003,7 +3003,7 @@ def _try_kind_sort(arr): result.index = ibase.default_index(len(sorted_index)) if inplace: - self._update_inplace(result._data) + self._update_inplace(result) else: return result.__finalize__(self) @@ -3181,7 +3181,7 @@ def sort_index( result.index = ibase.default_index(len(result)) if inplace: - self._update_inplace(result._data) + self._update_inplace(result) else: return result.__finalize__(self) @@ -4511,7 +4511,7 @@ def dropna(self, axis=0, inplace=False, how=None): if self._can_hold_na: result = remove_na_arraylike(self) if inplace: - self._update_inplace(result._data) + self._update_inplace(result) else: return result else: