Skip to content

Commit 9852ec4

Browse files
committed
ENH limit_area added to interpolate1d
1 parent e41fe7f commit 9852ec4

File tree

6 files changed

+136
-53
lines changed

6 files changed

+136
-53
lines changed

doc/source/missing_data.rst

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,10 @@ Interpolation
330330

331331
The ``limit_direction`` keyword argument was added.
332332

333+
.. versionadded:: 0.21.0
334+
335+
The ``limit_area`` keyword argument was added.
336+
333337
Both Series and Dataframe objects have an ``interpolate`` method that, by default,
334338
performs linear interpolation at missing datapoints.
335339

@@ -458,29 +462,48 @@ Interpolation Limits
458462
^^^^^^^^^^^^^^^^^^^^
459463

460464
Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword
461-
argument. Use this argument to limit the number of consecutive interpolations,
462-
keeping ``NaN`` values for interpolations that are too far from the last valid
463-
observation:
465+
argument. Use this argument to limit the number of consecutive ``NaN`` values
466+
filled since the last valid observation:
464467

465468
.. ipython:: python
466469
467-
ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13])
468-
ser.interpolate(limit=2)
470+
ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan])
469471
470-
By default, ``limit`` applies in a forward direction, so that only ``NaN``
471-
values after a non-``NaN`` value can be filled. If you provide ``'backward'`` or
472-
``'both'`` for the ``limit_direction`` keyword argument, you can fill ``NaN``
473-
values before non-``NaN`` values, or both before and after non-``NaN`` values,
474-
respectively:
472+
# fill all consecutive values in a forward direction
473+
ser.interpolate()
475474
476-
.. ipython:: python
475+
# fill one consecutive value in a forward direction
476+
ser.interpolate(limit=1)
477477
478-
ser.interpolate(limit=1) # limit_direction == 'forward'
478+
By default, ``NaN`` values are filled in a ``forward`` direction. Use
479+
``limit_direction`` parameter to fill ``backward`` or from ``both`` directions.
479480

481+
.. ipython:: python
482+
483+
# fill one consecutive value backwards
480484
ser.interpolate(limit=1, limit_direction='backward')
481485
486+
# fill one consecutive value in both directions
482487
ser.interpolate(limit=1, limit_direction='both')
483488
489+
# fill all consecutive values in both directions
490+
ser.interpolate(limit_direction='both')
491+
492+
By default, ``NaN`` values are filled whether they are inside (surrounded by)
493+
existing valid values, or outside existing valid values. Introduced in v0.21
494+
the ``limit_area`` parameter restricts filling to either inside or outside values.
495+
496+
.. ipython:: python
497+
498+
# fill one consecutive inside value in both directions
499+
ser.interpolate(limit=1, limit_area='inside', limit_direction='both')
500+
501+
# fill all consecutive outside values backward
502+
ser.interpolate(limit_direction='backward', limit_area='outside')
503+
504+
# fill all consecutive outside values in both directions
505+
ser.interpolate(limit_direction='both', limit_area='outside')
506+
484507
.. _missing_data.replace:
485508

486509
Replacing Generic Values

pandas/core/generic.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3883,10 +3883,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
38833883
limit : int, default None.
38843884
Maximum number of consecutive NaNs to fill. Must be greater than 0.
38853885
limit_direction : {'forward', 'backward', 'both'}, default 'forward'
3886-
If limit is specified, consecutive NaNs will be filled in this
3887-
direction.
3888-
3886+
Consecutive NaNs will be filled in this direction.
38893887
.. versionadded:: 0.17.0
3888+
limit_area : {'inside', 'outside'}, default None
3889+
* 'inside' Only fill NaNs surrounded by valid values (interpolate).
3890+
* 'outside' Only fill NaNs outside valid values (extrapolate).
3891+
* None: default fill inside and outside
3892+
.. versionadded:: 0.21.0
38903893
38913894
inplace : bool, default False
38923895
Update the NDFrame in place if possible.
@@ -3919,7 +3922,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
39193922

39203923
@Appender(_shared_docs['interpolate'] % _shared_doc_kwargs)
39213924
def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
3922-
limit_direction='forward', downcast=None, **kwargs):
3925+
limit_direction='forward', limit_area=None,
3926+
downcast=None, **kwargs):
39233927
"""
39243928
Interpolate values according to different methods.
39253929
"""
@@ -3968,6 +3972,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
39683972
new_data = data.interpolate(method=method, axis=ax, index=index,
39693973
values=_maybe_transposed_self, limit=limit,
39703974
limit_direction=limit_direction,
3975+
limit_area=limit_area,
39713976
inplace=inplace, downcast=downcast,
39723977
**kwargs)
39733978

pandas/core/internals.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -907,8 +907,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
907907

908908
def interpolate(self, method='pad', axis=0, index=None, values=None,
909909
inplace=False, limit=None, limit_direction='forward',
910-
fill_value=None, coerce=False, downcast=None, mgr=None,
911-
**kwargs):
910+
limit_area=None, fill_value=None, coerce=False,
911+
downcast=None, mgr=None, **kwargs):
912912

913913
inplace = validate_bool_kwarg(inplace, 'inplace')
914914

@@ -949,6 +949,7 @@ def check_int_bool(self, inplace):
949949
return self._interpolate(method=m, index=index, values=values,
950950
axis=axis, limit=limit,
951951
limit_direction=limit_direction,
952+
limit_area=limit_area,
952953
fill_value=fill_value, inplace=inplace,
953954
downcast=downcast, mgr=mgr, **kwargs)
954955

@@ -983,8 +984,8 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
983984

984985
def _interpolate(self, method=None, index=None, values=None,
985986
fill_value=None, axis=0, limit=None,
986-
limit_direction='forward', inplace=False, downcast=None,
987-
mgr=None, **kwargs):
987+
limit_direction='forward', limit_area=None,
988+
inplace=False, downcast=None, mgr=None, **kwargs):
988989
""" interpolate using scipy wrappers """
989990

990991
inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -1012,6 +1013,7 @@ def func(x):
10121013
# i.e. not an arg to missing.interpolate_1d
10131014
return missing.interpolate_1d(index, x, method=method, limit=limit,
10141015
limit_direction=limit_direction,
1016+
limit_area=limit_area,
10151017
fill_value=fill_value,
10161018
bounds_error=False, **kwargs)
10171019

pandas/core/missing.py

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def clean_interp_method(method, **kwargs):
111111

112112

113113
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
114-
limit_direction='forward', fill_value=None,
114+
limit_direction='forward', limit_area=None, fill_value=None,
115115
bounds_error=False, order=None, **kwargs):
116116
"""
117117
Logic for the 1-d interpolation. The result should be 1-d, inputs
@@ -155,28 +155,12 @@ def _interp_limit(invalid, fw_limit, bw_limit):
155155
raise ValueError('Invalid limit_direction: expecting one of %r, got '
156156
'%r.' % (valid_limit_directions, limit_direction))
157157

158-
from pandas import Series
159-
ys = Series(yvalues)
160-
start_nans = set(range(ys.first_valid_index()))
161-
end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
162-
163-
# violate_limit is a list of the indexes in the series whose yvalue is
164-
# currently NaN, and should still be NaN after the interpolation.
165-
# Specifically:
166-
#
167-
# If limit_direction='forward' or None then the list will contain NaNs at
168-
# the beginning of the series, and NaNs that are more than 'limit' away
169-
# from the prior non-NaN.
170-
#
171-
# If limit_direction='backward' then the list will contain NaNs at
172-
# the end of the series, and NaNs that are more than 'limit' away
173-
# from the subsequent non-NaN.
174-
#
175-
# If limit_direction='both' then the list will contain NaNs that
176-
# are more than 'limit' away from any non-NaN.
177-
#
178-
# If limit=None, then use default behavior of filling an unlimited number
179-
# of NaNs in the direction specified by limit_direction
158+
if not limit_area is None:
159+
valid_limit_areas = ['inside', 'outside']
160+
limit_area = limit_area.lower()
161+
if limit_area not in valid_limit_areas:
162+
raise ValueError('Invalid limit_area: expecting one of %r, got %r.'
163+
% (valid_limit_areas, limit_area))
180164

181165
# default limit is unlimited GH #16282
182166
if limit is None:
@@ -186,15 +170,43 @@ def _interp_limit(invalid, fw_limit, bw_limit):
186170
elif limit < 1:
187171
raise ValueError('Limit must be greater than 0')
188172

189-
# each possible limit_direction
173+
from pandas import Series
174+
ys = Series(yvalues)
175+
176+
# These are sets of index pointers to invalid values... i.e. {0, 1, etc...
177+
all_nans = set(np.flatnonzero(invalid))
178+
start_nans = set(range(ys.first_valid_index()))
179+
end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
180+
mid_nans = all_nans - start_nans - end_nans
181+
182+
# Like the sets above, preserve_nans contains indices of invalid values,
183+
# but in this case, it is the final set of indices that need to be
184+
# preserved as NaN after the interpolation.
185+
186+
# For example if limit_direction='forward' then preserve_nans will
187+
# contain indices of NaNs at the beginning of the series, and NaNs that
188+
# are more than'limit' away from the prior non-NaN.
189+
190+
# set preserve_nans based on direction using _interp_limit
190191
if limit_direction == 'forward':
191-
violate_limit = sorted(start_nans |
192-
set(_interp_limit(invalid, limit, 0)))
192+
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
193193
elif limit_direction == 'backward':
194-
violate_limit = sorted(end_nans |
195-
set(_interp_limit(invalid, 0, limit)))
196-
elif limit_direction == 'both':
197-
violate_limit = sorted(_interp_limit(invalid, limit, limit))
194+
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
195+
else:
196+
# both directions... just use _interp_limit
197+
preserve_nans = set(_interp_limit(invalid, limit, limit))
198+
199+
# if limit_area is set, add either mid or outside indices
200+
# to preserve_nans GH #16284
201+
if limit_area == 'inside':
202+
# preserve NaNs on the outside
203+
preserve_nans |= start_nans | end_nans
204+
elif limit_area == 'outside':
205+
# preserve NaNs on the inside
206+
preserve_nans |= mid_nans
207+
208+
# sort preserve_nans and covert to list
209+
preserve_nans = sorted(preserve_nans)
198210

199211
xvalues = getattr(xvalues, 'values', xvalues)
200212
yvalues = getattr(yvalues, 'values', yvalues)
@@ -211,7 +223,7 @@ def _interp_limit(invalid, fw_limit, bw_limit):
211223
else:
212224
inds = xvalues
213225
result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
214-
result[violate_limit] = np.nan
226+
result[preserve_nans] = np.nan
215227
return result
216228

217229
sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
@@ -230,7 +242,7 @@ def _interp_limit(invalid, fw_limit, bw_limit):
230242
fill_value=fill_value,
231243
bounds_error=bounds_error,
232244
order=order, **kwargs)
233-
result[violate_limit] = np.nan
245+
result[preserve_nans] = np.nan
234246
return result
235247

236248

pandas/core/resample.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,8 @@ def fillna(self, method, limit=None):
487487

488488
@Appender(_shared_docs['interpolate'] % _shared_docs_kwargs)
489489
def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
490-
limit_direction='forward', downcast=None, **kwargs):
490+
limit_direction='forward', limit_area=None,
491+
downcast=None, **kwargs):
491492
"""
492493
Interpolate values according to different methods.
493494
@@ -497,6 +498,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
497498
return result.interpolate(method=method, axis=axis, limit=limit,
498499
inplace=inplace,
499500
limit_direction=limit_direction,
501+
limit_area=limit_area,
500502
downcast=downcast, **kwargs)
501503

502504
def asfreq(self, fill_value=None):

pandas/tests/series/test_missing.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,45 @@ def test_interp_limit_bad_direction(self):
959959
pytest.raises(ValueError, s.interpolate, method='linear',
960960
limit_direction='abc')
961961

962+
# limit_area introduced GH #16284
963+
def test_interp_limit_area(self):
964+
# These tests are for issue #9218 -- fill NaNs in both directions.
965+
s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan])
966+
967+
expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan])
968+
result = s.interpolate(method='linear', limit_area='inside')
969+
assert_series_equal(result, expected)
970+
971+
expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan])
972+
result = s.interpolate(method='linear', limit_area='inside',
973+
limit=1)
974+
975+
expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan])
976+
result = s.interpolate(method='linear', limit_area='inside',
977+
limit_direction='both', limit=1)
978+
assert_series_equal(result, expected)
979+
980+
expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.])
981+
result = s.interpolate(method='linear', limit_area='outside')
982+
assert_series_equal(result, expected)
983+
984+
expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan])
985+
result = s.interpolate(method='linear', limit_area='outside',
986+
limit=1)
987+
988+
expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan])
989+
result = s.interpolate(method='linear', limit_area='outside',
990+
limit_direction='both', limit=1)
991+
assert_series_equal(result, expected)
992+
993+
expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan])
994+
result = s.interpolate(method='linear', limit_area='outside',
995+
direction='backward')
996+
997+
# raises an error even if limit type is wrong.
998+
pytest.raises(ValueError, s.interpolate, method='linear',
999+
limit_area='abc')
1000+
9621001
def test_interp_limit_direction(self):
9631002
# These tests are for issue #9218 -- fill NaNs in both directions.
9641003
s = Series([1, 3, np.nan, np.nan, np.nan, 11])

0 commit comments

Comments
 (0)