Skip to content

TYP: require _update_inplace gets Frame/Series, never BlockManager #33074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly, doc
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1505,18 +1504,14 @@ def factorize(self, sort=False, na_sentinel=-1):
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray:
return algorithms.searchsorted(self._values, value, side=side, sorter=sorter)

def drop_duplicates(self, keep="first", inplace=False):
inplace = validate_bool_kwarg(inplace, "inplace")
def drop_duplicates(self, keep="first"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is only used in Index I think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It exists on Series too

if isinstance(self, ABCIndexClass):
if self.is_unique:
return self._shallow_copy()

duplicated = self.duplicated(keep=keep)
result = self[np.logical_not(duplicated)]
if inplace:
return self._update_inplace(result)
else:
return result
return result

def duplicated(self, keep="first"):
if isinstance(self, ABCIndexClass):
Expand All @@ -1527,9 +1522,3 @@ def duplicated(self, keep="first"):
return self._constructor(
duplicated(self, keep=keep), index=self.index
).__finalize__(self)

# ----------------------------------------------------------------------
# abstracts

def _update_inplace(self, result, verify_is_copy=True, **kwargs):
raise AbstractMethodError(self)
20 changes: 11 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3042,16 +3042,16 @@ def query(self, expr, inplace=False, **kwargs):
res = self.eval(expr, **kwargs)

try:
new_data = self.loc[res]
result = self.loc[res]
except ValueError:
# when res is multi-dimensional loc raises, but this is sometimes a
# valid query
new_data = self[res]
result = self[res]

if inplace:
self._update_inplace(new_data)
self._update_inplace(result)
else:
return new_data
return result

def eval(self, expr, inplace=False, **kwargs):
"""
Expand Down Expand Up @@ -4683,7 +4683,7 @@ def drop_duplicates(
result.index = ibase.default_index(len(result))

if inplace:
self._update_inplace(result._data)
self._update_inplace(result)
return None
else:
return result
Expand Down Expand Up @@ -4802,10 +4802,11 @@ def sort_values(
if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

result = self._constructor(new_data)
if inplace:
return self._update_inplace(new_data)
return self._update_inplace(result)
else:
return self._constructor(new_data).__finalize__(self)
return result.__finalize__(self)

def sort_index(
self,
Expand Down Expand Up @@ -4937,10 +4938,11 @@ def sort_index(
if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

result = self._constructor(new_data)
if inplace:
return self._update_inplace(new_data)
return self._update_inplace(result)
else:
return self._constructor(new_data).__finalize__(self)
return result.__finalize__(self)

def value_counts(
self,
Expand Down
30 changes: 16 additions & 14 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self)

if inplace:
self._update_inplace(result._data)
self._update_inplace(result)
return

return result
Expand Down Expand Up @@ -988,7 +988,7 @@ def rename(
result._clear_item_cache()

if inplace:
self._update_inplace(result._data)
self._update_inplace(result)
return None
else:
return result.__finalize__(self)
Expand Down Expand Up @@ -3957,15 +3957,15 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:

Parameters
----------
result : same type as self
verify_is_copy : bool, default True
Provide is_copy checks.
"""
# NOTE: This does *not* call __finalize__ and that's an explicit
# decision that we may revisit in the future.

self._reset_cache()
self._clear_item_cache()
self._data = getattr(result, "_data", result)
self._data = result._data
self._maybe_update_cacher(verify_is_copy=verify_is_copy)

def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries:
Expand Down Expand Up @@ -6107,15 +6107,15 @@ def fillna(
value=value, limit=limit, inplace=inplace, downcast=downcast
)
elif isinstance(value, ABCDataFrame) and self.ndim == 2:
new_data = self.where(self.notna(), value)
new_data = self.where(self.notna(), value)._data
else:
raise ValueError(f"invalid fill value with a {type(value)}")

result = self._constructor(new_data)
if inplace:
self._update_inplace(new_data)
return None
return self._update_inplace(result)
else:
return self._constructor(new_data).__finalize__(self)
return result.__finalize__(self)

def ffill(
self: FrameOrSeries,
Expand Down Expand Up @@ -6627,10 +6627,11 @@ def replace(
f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
)

result = self._constructor(new_data)
if inplace:
self._update_inplace(new_data)
return self._update_inplace(result)
else:
return self._constructor(new_data).__finalize__(self)
return result.__finalize__(self)

_shared_docs[
"interpolate"
Expand Down Expand Up @@ -7231,7 +7232,7 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
result[mask] = np.nan

if inplace:
self._update_inplace(result)
return self._update_inplace(result)
else:
return result

Expand Down Expand Up @@ -8641,7 +8642,8 @@ def _where(
new_data = self._data.putmask(
mask=cond, new=other, align=align, axis=block_axis,
)
self._update_inplace(new_data)
result = self._constructor(new_data)
return self._update_inplace(result)

else:
new_data = self._data.where(
Expand All @@ -8652,8 +8654,8 @@ def _where(
try_cast=try_cast,
axis=block_axis,
)

return self._constructor(new_data).__finalize__(self)
result = self._constructor(new_data)
return result.__finalize__(self)

_shared_docs[
"where"
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,10 +534,6 @@ def _shallow_copy_with_infer(self, values, **kwargs):
attributes.pop("tz", None)
return Index(values, **attributes)

def _update_inplace(self, result, **kwargs):
# guard when called from IndexOpsMixin
raise TypeError("Index can't be updated inplace")

def is_(self, other) -> bool:
"""
More flexible, faster check like ``is`` but that works through views.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/ops/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def f(self, other):
# this makes sure that we are aligned like the input
# we are updating inplace so we want to ignore is_copy
self._update_inplace(
result.reindex_like(self, copy=False)._data, verify_is_copy=False
result.reindex_like(self, copy=False), verify_is_copy=False
)

return self
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,6 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
# The ensure_index call aabove ensures we have an Index object
self._data.set_axis(axis, labels)

def _update_inplace(self, result, **kwargs):
# we want to call the generic version and not the IndexOpsMixin
return generic.NDFrame._update_inplace(self, result, **kwargs)

# ndarray compatibility
@property
def dtype(self) -> DtypeObj:
Expand Down Expand Up @@ -1802,7 +1798,7 @@ def unique(self):
result = super().unique()
return result

def drop_duplicates(self, keep="first", inplace=False) -> "Series":
def drop_duplicates(self, keep="first", inplace=False) -> Optional["Series"]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we actually testing the return value of inplace ops for drop_duplicates / duplciated? IIRC these are actually returning an object (at least they were).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we actually testing the return value of inplace ops for drop_duplicates / duplciated?

Yes, editing this to return self instead of None breaks a test in tests.frame.test_api

"""
Return Series with duplicate values removed.

Expand Down Expand Up @@ -1877,7 +1873,13 @@ def drop_duplicates(self, keep="first", inplace=False) -> "Series":
5 hippo
Name: animal, dtype: object
"""
return super().drop_duplicates(keep=keep, inplace=inplace)
inplace = validate_bool_kwarg(inplace, "inplace")
result = super().drop_duplicates(keep=keep)
if inplace:
self._update_inplace(result)
return None
else:
return result

def duplicated(self, keep="first") -> "Series":
"""
Expand Down