Skip to content

ENH: Support inplace clip (#15388) #16462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Other Enhancements
- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)

.. _whatsnew_0210.api_breaking:

Expand Down
51 changes: 37 additions & 14 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4120,8 +4120,7 @@ def isnull(self):
def notnull(self):
return notnull(self).__finalize__(self)

def _clip_with_scalar(self, lower, upper):

def _clip_with_scalar(self, lower, upper, inplace=False):
if ((lower is not None and np.any(isnull(lower))) or
(upper is not None and np.any(isnull(upper)))):
raise ValueError("Cannot use an NA value as a clip threshold")
Expand All @@ -4137,10 +4136,16 @@ def _clip_with_scalar(self, lower, upper):
if np.any(mask):
result[mask] = np.nan

return self._constructor(
result, **self._construct_axes_dict()).__finalize__(self)
axes_dict = self._construct_axes_dict()
result = self._constructor(result, **axes_dict).__finalize__(self)

if inplace:
self._update_inplace(result)
else:
return result

def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
def clip(self, lower=None, upper=None, axis=None, inplace=False,
*args, **kwargs):
"""
Trim values at input threshold(s).

Expand All @@ -4150,6 +4155,9 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
upper : float or array_like, default None
axis : int or string axis name, optional
Align object with lower and upper along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs a versionadded tag.

.. versionadded:: 0.21.0

Returns
-------
Expand Down Expand Up @@ -4192,6 +4200,8 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
if isinstance(self, ABCPanel):
raise NotImplementedError("clip is not supported yet for panels")

inplace = validate_bool_kwarg(inplace, 'inplace')

axis = nv.validate_clip_with_axis(axis, args, kwargs)

# GH 2747 (arguments were reversed)
Expand All @@ -4202,17 +4212,20 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
# fast-path for scalars
if ((lower is None or (is_scalar(lower) and is_number(lower))) and
(upper is None or (is_scalar(upper) and is_number(upper)))):
return self._clip_with_scalar(lower, upper)
return self._clip_with_scalar(lower, upper, inplace=inplace)

result = self
if lower is not None:
result = result.clip_lower(lower, axis)
result = result.clip_lower(lower, axis, inplace=inplace)
if upper is not None:
result = result.clip_upper(upper, axis)
if inplace:
result = self

result = result.clip_upper(upper, axis, inplace=inplace)

return result

def clip_upper(self, threshold, axis=None):
def clip_upper(self, threshold, axis=None, inplace=False):
"""
Return copy of input with values above given value(s) truncated.

Expand All @@ -4221,6 +4234,9 @@ def clip_upper(self, threshold, axis=None):
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

.. versionadded:: 0.21.0

See Also
--------
Expand All @@ -4234,12 +4250,14 @@ def clip_upper(self, threshold, axis=None):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(None, threshold)
return self._clip_with_scalar(None, threshold, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.le(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def clip_lower(self, threshold, axis=None):
def clip_lower(self, threshold, axis=None, inplace=False):
"""
Return copy of the input with values below given value(s) truncated.

Expand All @@ -4248,6 +4266,9 @@ def clip_lower(self, threshold, axis=None):
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

.. versionadded:: 0.21.0

See Also
--------
Expand All @@ -4261,10 +4282,12 @@ def clip_lower(self, threshold, axis=None):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(threshold, None)
return self._clip_with_scalar(threshold, None, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.ge(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False, **kwargs):
Expand Down
33 changes: 29 additions & 4 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,7 @@ def test_built_in_round(self):

def test_clip(self):
median = self.frame.median().median()
original = self.frame.copy()

capped = self.frame.clip_upper(median)
assert not (capped.values > median).any()
Expand All @@ -1817,6 +1818,25 @@ def test_clip(self):
double = self.frame.clip(upper=median, lower=median)
assert not (double.values != median).any()

# Verify that self.frame was not changed inplace
assert (self.frame.values == original.values).all()

def test_inplace_clip(self):
# GH #15388
median = self.frame.median().median()
frame_copy = self.frame.copy()

frame_copy.clip_upper(median, inplace=True)
assert not (frame_copy.values > median).any()
frame_copy = self.frame.copy()

frame_copy.clip_lower(median, inplace=True)
assert not (frame_copy.values < median).any()
frame_copy = self.frame.copy()

frame_copy.clip(upper=median, lower=median, inplace=True)
assert not (frame_copy.values != median).any()

def test_dataframe_clip(self):
# GH #2747
df = DataFrame(np.random.randn(1000, 2))
Expand All @@ -1843,18 +1863,23 @@ def test_clip_mixed_numeric(self):
'B': [1., np.nan, 2.]})
tm.assert_frame_equal(result, expected, check_like=True)

def test_clip_against_series(self):
@pytest.mark.parametrize("inplace", [True, False])
def test_clip_against_series(self, inplace):
# GH #6966

df = DataFrame(np.random.randn(1000, 2))
lb = Series(np.random.randn(1000))
ub = lb + 1

clipped_df = df.clip(lb, ub, axis=0)
original = df.copy()
clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)

if inplace:
clipped_df = df

for i in range(2):
lb_mask = df.iloc[:, i] <= lb
ub_mask = df.iloc[:, i] >= ub
lb_mask = original.iloc[:, i] <= lb
ub_mask = original.iloc[:, i] >= ub
mask = ~lb_mask & ~ub_mask

result = clipped_df.loc[lb_mask, i]
Expand Down