From 9852ec4011a4d650fea30e8eaa5106a531163273 Mon Sep 17 00:00:00 2001 From: WBare Date: Fri, 26 May 2017 09:41:47 -0400 Subject: [PATCH 1/8] ENH limit_area added to interpolate1d --- doc/source/missing_data.rst | 47 +++++++++++++----- pandas/core/generic.py | 13 +++-- pandas/core/internals.py | 10 ++-- pandas/core/missing.py | 76 +++++++++++++++++------------ pandas/core/resample.py | 4 +- pandas/tests/series/test_missing.py | 39 +++++++++++++++ 6 files changed, 136 insertions(+), 53 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 37930775885e3..056c2909c43c2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -330,6 +330,10 @@ Interpolation The ``limit_direction`` keyword argument was added. +.. versionadded:: 0.21.0 + + The ``limit_area`` keyword argument was added. + Both Series and Dataframe objects have an ``interpolate`` method that, by default, performs linear interpolation at missing datapoints. @@ -458,29 +462,48 @@ Interpolation Limits ^^^^^^^^^^^^^^^^^^^^ Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword -argument. Use this argument to limit the number of consecutive interpolations, -keeping ``NaN`` values for interpolations that are too far from the last valid -observation: +argument. Use this argument to limit the number of consecutive ``NaN`` values +filled since the last valid observation: .. ipython:: python - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) - ser.interpolate(limit=2) + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) -By default, ``limit`` applies in a forward direction, so that only ``NaN`` -values after a non-``NaN`` value can be filled. If you provide ``'backward'`` or -``'both'`` for the ``limit_direction`` keyword argument, you can fill ``NaN`` -values before non-``NaN`` values, or both before and after non-``NaN`` values, -respectively: + # fill all consecutive values in a forward direction + ser.interpolate() -.. ipython:: python + # fill one consecutive value in a forward direction + ser.interpolate(limit=1) - ser.interpolate(limit=1) # limit_direction == 'forward' +By default, ``NaN`` values are filled in a ``forward`` direction. Use +``limit_direction`` parameter to fill ``backward`` or from ``both`` directions. +.. ipython:: python + + # fill one consecutive value backwards ser.interpolate(limit=1, limit_direction='backward') + # fill one consecutive value in both directions ser.interpolate(limit=1, limit_direction='both') + # fill all consecutive values in both directions + ser.interpolate(limit_direction='both') + +By default, ``NaN`` values are filled whether they are inside (surrounded by) +existing valid values, or outside existing valid values. Introduced in v0.21 +the ``limit_area`` parameter restricts filling to either inside or outside values. + +.. ipython:: python + + # fill one consecutive inside value in both directions + ser.interpolate(limit=1, limit_area='inside', limit_direction='both') + + # fill all consecutive outside values backward + ser.interpolate(limit_direction='backward', limit_area='outside') + + # fill all consecutive outside values in both directions + ser.interpolate(limit_direction='both', limit_area='outside') + .. _missing_data.replace: Replacing Generic Values diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8da6851d18bc..dffe39bb2b1b6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3883,10 +3883,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, limit : int, default None. Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' - If limit is specified, consecutive NaNs will be filled in this - direction. - + Consecutive NaNs will be filled in this direction. .. versionadded:: 0.17.0 + limit_area : {'inside', 'outside'}, default None + * 'inside' Only fill NaNs surrounded by valid values (interpolate). + * 'outside' Only fill NaNs outside valid values (extrapolate). + * None: default fill inside and outside + .. versionadded:: 0.21.0 inplace : bool, default False Update the NDFrame in place if possible. @@ -3919,7 +3922,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs) def interpolate(self, method='linear', axis=0, limit=None, inplace=False, - limit_direction='forward', downcast=None, **kwargs): + limit_direction='forward', limit_area=None, + downcast=None, **kwargs): """ Interpolate values according to different methods. """ @@ -3968,6 +3972,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, new_data = data.interpolate(method=method, axis=ax, index=index, values=_maybe_transposed_self, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, inplace=inplace, downcast=downcast, **kwargs) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 15851a17274ca..bedd700356d14 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -907,8 +907,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', - fill_value=None, coerce=False, downcast=None, mgr=None, - **kwargs): + limit_area=None, fill_value=None, coerce=False, + downcast=None, mgr=None, **kwargs): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -949,6 +949,7 @@ def check_int_bool(self, inplace): return self._interpolate(method=m, index=index, values=values, axis=axis, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, fill_value=fill_value, inplace=inplace, downcast=downcast, mgr=mgr, **kwargs) @@ -983,8 +984,8 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, - limit_direction='forward', inplace=False, downcast=None, - mgr=None, **kwargs): + limit_direction='forward', limit_area=None, + inplace=False, downcast=None, mgr=None, **kwargs): """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1012,6 +1013,7 @@ def func(x): # i.e. not an arg to missing.interpolate_1d return missing.interpolate_1d(index, x, method=method, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, fill_value=fill_value, bounds_error=False, **kwargs) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 51778684d68f5..acc32d2c1bc83 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -111,7 +111,7 @@ def clean_interp_method(method, **kwargs): def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', fill_value=None, + limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs @@ -155,28 +155,12 @@ def _interp_limit(invalid, fw_limit, bw_limit): raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) - from pandas import Series - ys = Series(yvalues) - start_nans = set(range(ys.first_valid_index())) - end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - - # violate_limit is a list of the indexes in the series whose yvalue is - # currently NaN, and should still be NaN after the interpolation. - # Specifically: - # - # If limit_direction='forward' or None then the list will contain NaNs at - # the beginning of the series, and NaNs that are more than 'limit' away - # from the prior non-NaN. - # - # If limit_direction='backward' then the list will contain NaNs at - # the end of the series, and NaNs that are more than 'limit' away - # from the subsequent non-NaN. - # - # If limit_direction='both' then the list will contain NaNs that - # are more than 'limit' away from any non-NaN. - # - # If limit=None, then use default behavior of filling an unlimited number - # of NaNs in the direction specified by limit_direction + if not limit_area is None: + valid_limit_areas = ['inside', 'outside'] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError('Invalid limit_area: expecting one of %r, got %r.' + % (valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: @@ -186,15 +170,43 @@ def _interp_limit(invalid, fw_limit, bw_limit): elif limit < 1: raise ValueError('Limit must be greater than 0') - # each possible limit_direction + from pandas import Series + ys = Series(yvalues) + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + start_nans = set(range(ys.first_valid_index())) + end_nans = set(range(1 + ys.last_valid_index(), len(valid))) + mid_nans = all_nans - start_nans - end_nans + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit if limit_direction == 'forward': - violate_limit = sorted(start_nans | - set(_interp_limit(invalid, limit, 0))) + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': - violate_limit = sorted(end_nans | - set(_interp_limit(invalid, 0, limit))) - elif limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + else: + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == 'inside': + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == 'outside': + # preserve NaNs on the inside + preserve_nans |= mid_nans + + # sort preserve_nans and covert to list + preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) @@ -211,7 +223,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) - result[violate_limit] = np.nan + result[preserve_nans] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', @@ -230,7 +242,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) - result[violate_limit] = np.nan + result[preserve_nans] = np.nan return result diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 631b91c3aad11..00d18aedbe5f0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -487,7 +487,8 @@ def fillna(self, method, limit=None): @Appender(_shared_docs['interpolate'] % _shared_docs_kwargs) def interpolate(self, method='linear', axis=0, limit=None, inplace=False, - limit_direction='forward', downcast=None, **kwargs): + limit_direction='forward', limit_area=None, + downcast=None, **kwargs): """ Interpolate values according to different methods. @@ -497,6 +498,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, return result.interpolate(method=method, axis=axis, limit=limit, inplace=inplace, limit_direction=limit_direction, + limit_area=limit_area, downcast=downcast, **kwargs) def asfreq(self, fill_value=None): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8e73c17684a16..bedd4d9b578d8 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -959,6 +959,45 @@ def test_interp_limit_bad_direction(self): pytest.raises(ValueError, s.interpolate, method='linear', limit_direction='abc') + # limit_area introduced GH #16284 + def test_interp_limit_area(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan]) + + expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside') + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside', + limit=1) + + expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside', + limit_direction='both', limit=1) + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.]) + result = s.interpolate(method='linear', limit_area='outside') + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan]) + result = s.interpolate(method='linear', limit_area='outside', + limit=1) + + expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan]) + result = s.interpolate(method='linear', limit_area='outside', + limit_direction='both', limit=1) + assert_series_equal(result, expected) + + expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='outside', + direction='backward') + + # raises an error even if limit type is wrong. + pytest.raises(ValueError, s.interpolate, method='linear', + limit_area='abc') + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 4bacc45f8a534d8800d27f7e08113625ae314fa7 Mon Sep 17 00:00:00 2001 From: WBare Date: Fri, 26 May 2017 10:08:24 -0400 Subject: [PATCH 2/8] DOC: Added limit_area to whatsnew --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b4ca3f011a81d..98203eabc4f98 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -24,6 +24,7 @@ New features `_ on most readers and writers (:issue:`13823`) - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added `limit_area` parameter to `DataFrame.interpolate()` method allowing further control of which NaNs are replaced (:issue:`16284`) .. _whatsnew_0210.enhancements.other: From 80d67b7bbe1742cc65578328e393769214b1d35b Mon Sep 17 00:00:00 2001 From: WBare Date: Fri, 26 May 2017 11:32:56 -0400 Subject: [PATCH 3/8] Fix code style - is not --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index acc32d2c1bc83..ca3f89a0af766 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -155,7 +155,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) - if not limit_area is None: + if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: From d83246c50c55033258de605585ecf6271d627a85 Mon Sep 17 00:00:00 2001 From: WBare Date: Wed, 31 May 2017 14:12:51 -0400 Subject: [PATCH 4/8] requested doc changes --- doc/source/missing_data.rst | 2 ++ doc/source/whatsnew/v0.21.0.txt | 4 +++- pandas/core/generic.py | 4 +++- pandas/core/missing.py | 8 ++++---- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 056c2909c43c2..d325f197af77f 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -458,6 +458,8 @@ at the new values. .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation .. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _missing_data.interp_limits: + Interpolation Limits ^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 98203eabc4f98..87b88b03b3842 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -24,7 +24,9 @@ New features `_ on most readers and writers (:issue:`13823`) - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) -- Added `limit_area` parameter to `DataFrame.interpolate()` method allowing further control of which NaNs are replaced (:issue:`16284`) +- Added `limit_area` parameter to `DataFrame.interpolate()` method allowing further control of which NaNs are replaced. + Use `limit-area='inside'` to fill only NaNs surrounded by valid values or use `limit-area='outside'` to fill only NaNs outside the existing valid values while preserving those inside. (:issue:`16284`) + Full documentation and examples are :ref:`here `. .. _whatsnew_0210.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dffe39bb2b1b6..c01fd0826302a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3884,11 +3884,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' Consecutive NaNs will be filled in this direction. + .. versionadded:: 0.17.0 + limit_area : {'inside', 'outside'}, default None + * None: (default) no fill restriction * 'inside' Only fill NaNs surrounded by valid values (interpolate). * 'outside' Only fill NaNs outside valid values (extrapolate). - * None: default fill inside and outside .. versionadded:: 0.21.0 inplace : bool, default False diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ca3f89a0af766..f2eb3b097255b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -152,15 +152,15 @@ def _interp_limit(invalid, fw_limit, bw_limit): valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: - raise ValueError('Invalid limit_direction: expecting one of %r, got ' - '%r.' % (valid_limit_directions, limit_direction)) + raise ValueError('Invalid limit_direction: expecting one of {}, got ' + '{}.'.format(valid_limit_directions, limit_direction)) if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: - raise ValueError('Invalid limit_area: expecting one of %r, got %r.' - % (valid_limit_areas, limit_area)) + raise ValueError('Invalid limit_area: expecting one of {}, got ' + '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: From b24e488aa88047ca7aaf006bdc132e862649f0b7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Jun 2017 07:15:59 -0500 Subject: [PATCH 5/8] Lint and doc fix --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 87b88b03b3842..fa3f715a3aed1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -25,7 +25,7 @@ New features - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) - Added `limit_area` parameter to `DataFrame.interpolate()` method allowing further control of which NaNs are replaced. - Use `limit-area='inside'` to fill only NaNs surrounded by valid values or use `limit-area='outside'` to fill only NaNs outside the existing valid values while preserving those inside. (:issue:`16284`) + Use `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only NaNs outside the existing valid values while preserving those inside. (:issue:`16284`) Full documentation and examples are :ref:`here `. .. _whatsnew_0210.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c01fd0826302a..51e2ad249e66f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3884,7 +3884,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' Consecutive NaNs will be filled in this direction. - + .. versionadded:: 0.17.0 limit_area : {'inside', 'outside'}, default None From 41af8e3c47cc0d9db55a4fbd0af738662112e57d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 21 Jan 2018 16:48:59 -0500 Subject: [PATCH 6/8] whatsnew --- doc/source/whatsnew/v0.21.0.txt | 10 ---------- doc/source/whatsnew/v0.23.0.txt | 4 +++- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0b42f066fcfdc..3e673bd4cbc28 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -30,15 +30,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ -<<<<<<< HEAD -- Support for `PEP 519 -- Adding a file system path protocol - `_ on most readers and writers (:issue:`13823`) -- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, - and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) -- Added `limit_area` parameter to `DataFrame.interpolate()` method allowing further control of which NaNs are replaced. - Use `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only NaNs outside the existing valid values while preserving those inside. (:issue:`16284`) - Full documentation and examples are :ref:`here `. -======= .. _whatsnew_0210.enhancements.parquet: Integration with Apache Parquet file format @@ -295,7 +286,6 @@ as in :meth:`DataFrame.rename`. [0, 0, 1] Categories (2, int64): [0, 1] ->>>>>>> master .. _whatsnew_0210.enhancements.other: diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fe5342c520196..1d6a7e87bae98 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -208,7 +208,9 @@ Other Enhancements to register custom accessors like ``.cat`` on pandas objects. See :ref:`Registering Custom Accessors ` for more (:issue:`14781`). - +- :meth:`DataFrame.interpolate` gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. Use + `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only ``NaN`` s + outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) .. _whatsnew_0230.api_breaking: From 7c53e7815b09cbe1cb2eedd042da51ee6b673503 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 21 Jan 2018 17:07:29 -0500 Subject: [PATCH 7/8] cleanup --- pandas/core/missing.py | 47 ++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3aa1ef6baef3a..2eccc5777bca6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -651,8 +651,24 @@ def fill_zeros(result, x, y, name, fill): def _interp_limit(invalid, fw_limit, bw_limit): - """Get idx of values that won't be filled b/c they exceed the limits. + """ + Get indexers of values that won't be filled + because they exceed the limits. + + Parameters + ---------- + invalid : boolean ndarray + fw_limit : int or None + forward limit to index + bw_limit : int or None + backward limit to index + + Returns + ------- + set of indexers + Notes + ----- This is equivalent to the more readable, but slower .. code-block:: python @@ -665,6 +681,8 @@ def _interp_limit(invalid, fw_limit, bw_limit): # 1. operate on the reversed array # 2. subtract the returned indicies from N - 1 N = len(invalid) + f_idx = set() + b_idx = set() def inner(invalid, limit): limit = min(limit, N) @@ -673,18 +691,25 @@ def inner(invalid, limit): set(np.where((~invalid[:limit + 1]).cumsum() == 0)[0])) return idx - if fw_limit == 0: - f_idx = set(np.where(invalid)[0]) - else: - f_idx = inner(invalid, fw_limit) + if fw_limit is not None: - if bw_limit == 0: - # then we don't even need to care about backwards, just use forwards - return f_idx - else: - b_idx = set(N - 1 - np.asarray(list(inner(invalid[::-1], bw_limit)))) if fw_limit == 0: - return b_idx + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit is not None: + + if bw_limit == 0: + # then we don't even need to care about backwards + # just use forwards + return f_idx + else: + b_idx = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx)) + if fw_limit == 0: + return b_idx + return f_idx & b_idx From 596f14537d3cc89dbb2593fd4148a8c1d1bd4582 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 24 Jan 2018 06:38:43 -0500 Subject: [PATCH 8/8] more docs --- doc/source/missing_data.rst | 4 ++-- doc/source/whatsnew/v0.23.0.txt | 34 +++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index b3532afb5b092..ee0e2c7462f66 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -515,13 +515,13 @@ By default, ``NaN`` values are filled in a ``forward`` direction. Use ser.interpolate(limit_direction='both') By default, ``NaN`` values are filled whether they are inside (surrounded by) -existing valid values, or outside existing valid values. Introduced in v0.21 +existing valid values, or outside existing valid values. Introduced in v0.23 the ``limit_area`` parameter restricts filling to either inside or outside values. .. ipython:: python # fill one consecutive inside value in both directions - ser.interpolate(limit=1, limit_area='inside', limit_direction='both') + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) # fill all consecutive outside values backward ser.interpolate(limit_direction='backward', limit_area='outside') diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b988b8a5972d0..5caf54472c4a4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -13,10 +13,35 @@ version. New features ~~~~~~~~~~~~ -- -- -- +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:meth:`DataFrame.interpolate` gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) + ser + +fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +fill all consecutive outside values in both directions + +.. ipython:: python + ser.interpolate(limit_direction='both', limit_area='outside') .. _whatsnew_0210.enhancements.get_dummies_dtype: @@ -208,9 +233,6 @@ Other Enhancements to register custom accessors like ``.cat`` on pandas objects. See :ref:`Registering Custom Accessors ` for more (:issue:`14781`). -- :meth:`DataFrame.interpolate` gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. Use - `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only ``NaN`` s - outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) .. _whatsnew_0230.api_breaking: