From 62ee43b71a06d30a8865e352d14156abe667c39f Mon Sep 17 00:00:00 2001 From: Guy Goldberg Date: Tue, 23 May 2017 12:11:36 -0700 Subject: [PATCH] ENH: Support inplace clip (#15388) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 51 ++++++++++++++++++++-------- pandas/tests/frame/test_analytics.py | 33 +++++++++++++++--- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f1289c490e4fb..b4ca3f011a81d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -36,6 +36,7 @@ Other Enhancements - :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ - :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`) - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) +- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b19a0751ea2e4..f8da6851d18bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4120,8 +4120,7 @@ def isnull(self): def notnull(self): return notnull(self).__finalize__(self) - def _clip_with_scalar(self, lower, upper): - + def _clip_with_scalar(self, lower, upper, inplace=False): if ((lower is not None and np.any(isnull(lower))) or (upper is not None and np.any(isnull(upper)))): raise ValueError("Cannot use an NA value as a clip threshold") @@ -4137,10 +4136,16 @@ def _clip_with_scalar(self, lower, upper): if np.any(mask): result[mask] = np.nan - return self._constructor( - result, **self._construct_axes_dict()).__finalize__(self) + axes_dict = self._construct_axes_dict() + result = self._constructor(result, **axes_dict).__finalize__(self) + + if inplace: + self._update_inplace(result) + else: + return result - def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): + def clip(self, lower=None, upper=None, axis=None, inplace=False, + *args, **kwargs): """ Trim values at input threshold(s). @@ -4150,6 +4155,9 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): upper : float or array_like, default None axis : int or string axis name, optional Align object with lower and upper along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 Returns ------- @@ -4192,6 +4200,8 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") + inplace = validate_bool_kwarg(inplace, 'inplace') + axis = nv.validate_clip_with_axis(axis, args, kwargs) # GH 2747 (arguments were reversed) @@ -4202,17 +4212,20 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): # fast-path for scalars if ((lower is None or (is_scalar(lower) and is_number(lower))) and (upper is None or (is_scalar(upper) and is_number(upper)))): - return self._clip_with_scalar(lower, upper) + return self._clip_with_scalar(lower, upper, inplace=inplace) result = self if lower is not None: - result = result.clip_lower(lower, axis) + result = result.clip_lower(lower, axis, inplace=inplace) if upper is not None: - result = result.clip_upper(upper, axis) + if inplace: + result = self + + result = result.clip_upper(upper, axis, inplace=inplace) return result - def clip_upper(self, threshold, axis=None): + def clip_upper(self, threshold, axis=None, inplace=False): """ Return copy of input with values above given value(s) truncated. @@ -4221,6 +4234,9 @@ def clip_upper(self, threshold, axis=None): threshold : float or array_like axis : int or string axis name, optional Align object with threshold along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 See Also -------- @@ -4234,12 +4250,14 @@ def clip_upper(self, threshold, axis=None): raise ValueError("Cannot use an NA value as a clip threshold") if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(None, threshold) + return self._clip_with_scalar(None, threshold, inplace=inplace) + + inplace = validate_bool_kwarg(inplace, 'inplace') subset = self.le(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) - def clip_lower(self, threshold, axis=None): + def clip_lower(self, threshold, axis=None, inplace=False): """ Return copy of the input with values below given value(s) truncated. @@ -4248,6 +4266,9 @@ def clip_lower(self, threshold, axis=None): threshold : float or array_like axis : int or string axis name, optional Align object with threshold along the given axis. + inplace : boolean, default False + Whether to perform the operation in place on the data + .. versionadded:: 0.21.0 See Also -------- @@ -4261,10 +4282,12 @@ def clip_lower(self, threshold, axis=None): raise ValueError("Cannot use an NA value as a clip threshold") if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(threshold, None) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + inplace = validate_bool_kwarg(inplace, 'inplace') subset = self.ge(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 73c6c0e7279a6..943a93b27a78a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1807,6 +1807,7 @@ def test_built_in_round(self): def test_clip(self): median = self.frame.median().median() + original = self.frame.copy() capped = self.frame.clip_upper(median) assert not (capped.values > median).any() @@ -1817,6 +1818,25 @@ def test_clip(self): double = self.frame.clip(upper=median, lower=median) assert not (double.values != median).any() + # Verify that self.frame was not changed inplace + assert (self.frame.values == original.values).all() + + def test_inplace_clip(self): + # GH #15388 + median = self.frame.median().median() + frame_copy = self.frame.copy() + + frame_copy.clip_upper(median, inplace=True) + assert not (frame_copy.values > median).any() + frame_copy = self.frame.copy() + + frame_copy.clip_lower(median, inplace=True) + assert not (frame_copy.values < median).any() + frame_copy = self.frame.copy() + + frame_copy.clip(upper=median, lower=median, inplace=True) + assert not (frame_copy.values != median).any() + def test_dataframe_clip(self): # GH #2747 df = DataFrame(np.random.randn(1000, 2)) @@ -1843,18 +1863,23 @@ def test_clip_mixed_numeric(self): 'B': [1., np.nan, 2.]}) tm.assert_frame_equal(result, expected, check_like=True) - def test_clip_against_series(self): + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): # GH #6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) ub = lb + 1 - clipped_df = df.clip(lb, ub, axis=0) + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df for i in range(2): - lb_mask = df.iloc[:, i] <= lb - ub_mask = df.iloc[:, i] >= ub + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub mask = ~lb_mask & ~ub_mask result = clipped_df.loc[lb_mask, i]