-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Added max_gap keyword for series.interpolate #25141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
5e4b2ee
b752602
839b11a
fcdc4e4
20b70b7
3cb371e
8c6ff7a
4aaf8dc
1f0406f
eaacefd
f274d16
c72acdb
e0aee3a
af15eaf
12d2e5b
c25d1f8
4d40722
255518e
2015e84
4d7b0f1
cbf7388
5128b9d
3c55e1e
f9e4044
d1bbcd6
21b3091
c96c604
bd84fc9
908ffe5
380ef7c
5a1718a
16755bd
b58d721
aa58ffa
ae16124
28b442c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,7 +108,7 @@ def clean_interp_method(method, **kwargs): | |
return method | ||
|
||
|
||
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, | ||
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, maxgap=None, | ||
limit_direction='forward', limit_area=None, fill_value=None, | ||
bounds_error=False, order=None, **kwargs): | ||
""" | ||
|
@@ -165,6 +165,16 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, | |
elif limit < 1: | ||
raise ValueError('Limit must be greater than 0') | ||
|
||
if (maxgap is not None) and (limit is not None): | ||
raise ValueError('maxgap cannot be used together with limit') | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if maxgap is None: | ||
pass | ||
elif not is_integer(maxgap): | ||
raise ValueError('maxgap must be an integer') | ||
elif maxgap < 1: | ||
raise ValueError('maxgap must be greater than 0') | ||
|
||
from pandas import Series | ||
ys = Series(yvalues) | ||
|
||
|
@@ -182,14 +192,40 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, | |
# contain indices of NaNs at the beginning of the series, and NaNs that | ||
# are more than'limit' away from the prior non-NaN. | ||
|
||
# In case that maxgap is provided, preserve_nans is derived so that | ||
# gaps with continuous NaN values of width > maxgap will be preserved. | ||
|
||
# set preserve_nans based on direction using _interp_limit | ||
if limit_direction == 'forward': | ||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||
elif limit_direction == 'backward': | ||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||
if maxgap is None: | ||
if limit_direction == 'forward': | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||
elif limit_direction == 'backward': | ||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||
else: | ||
# both directions... just use _interp_limit | ||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||
else: | ||
# both directions... just use _interp_limit | ||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||
def bfill_nan(arr): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the benefit to making this a separate closure? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no real reason. Maybe at some point I thought the function definition would make things clearer. Should I just put the content of the function in-line starting at L350? |
||
""" Backward-fill NaNs """ | ||
mask = np.isnan(arr) | ||
idx = np.where(~mask, np.arange(mask.shape[0]), mask.shape[0] - 1) | ||
idx = np.minimum.accumulate(idx[::-1], axis=0)[::-1] | ||
out = arr[idx] | ||
return out | ||
|
||
# Generate array where the NaN-gap-width is filled in as value | ||
# at each NaN location. | ||
cumsum = np.cumsum(invalid).astype('float') | ||
diff = np.zeros_like(yvalues) | ||
diff[~invalid] = np.pad(np.diff(cumsum[~invalid]), | ||
(1, 0), mode='constant') | ||
diff[invalid] = np.nan | ||
diff = bfill_nan(diff) | ||
# hack to avoid having trailing NaNs in `diff`. Fill these | ||
# with `maxgap`. Everthing smaller than `maxgap` won't matter | ||
# in the following. | ||
diff[np.isnan(diff)] = maxgap | ||
preserve_nans = set(np.flatnonzero((diff > maxgap) & invalid)) | ||
|
||
# if limit_area is set, add either mid or outside indices | ||
# to preserve_nans GH #16284 | ||
|
Uh oh!
There was an error while loading. Please reload this page.