pandas-dev · cchwala · Feb 4, 2019 · Feb 4, 2019 · Feb 4, 2019 · Mar 26, 2019
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6648,7 +6648,12 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             * 'outside': Only fill NaNs outside valid values (extrapolate).
 
             .. versionadded:: 0.21.0
+        maxgap : int, optional
+            Maximum number of consecutive NaN values up to which a NaN-gap
+            will be interpolated. For all NaN-gaps wider than that no
+            interpolation is carried out. Must be greater than 0.
 
+            .. versionadded:: 0.25.0
         downcast : optional, 'infer' or None, defaults to None
             Downcast dtypes if possible.
         **kwargs
@@ -6783,7 +6788,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
 
     @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs)
     def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
-                    limit_direction='forward', limit_area=None,
+                    limit_direction='forward', limit_area=None, maxgap=None,
                     downcast=None, **kwargs):
         """
         Interpolate values according to different methods.
@@ -6836,6 +6841,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
                                     values=_maybe_transposed_self, limit=limit,
                                     limit_direction=limit_direction,
                                     limit_area=limit_area,
+                                    maxgap=maxgap,
                                     inplace=inplace, downcast=downcast,
                                     **kwargs)
 

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -108,7 +108,7 @@ def clean_interp_method(method, **kwargs):
     return method
 
 
-def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
+def interpolate_1d(xvalues, yvalues, method='linear', limit=None, maxgap=None,
                    limit_direction='forward', limit_area=None, fill_value=None,
                    bounds_error=False, order=None, **kwargs):
     """
@@ -165,6 +165,16 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
     elif limit < 1:
         raise ValueError('Limit must be greater than 0')
 
+    if (maxgap is not None) and (limit is not None):
+        raise ValueError('maxgap cannot be used together with limit')
+
+    if maxgap is None:
+        pass
+    elif not is_integer(maxgap):
+        raise ValueError('maxgap must be an integer')
+    elif maxgap < 1:
+        raise ValueError('maxgap must be greater than 0')
+
     from pandas import Series
     ys = Series(yvalues)
 
@@ -182,14 +192,40 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
     # contain indices of NaNs at the beginning of the series, and NaNs that
     # are more than'limit' away from the prior non-NaN.
 
+    # In case that maxgap is provided, preserve_nans is derived so that
+    # gaps with continuous NaN values of width > maxgap will be preserved.
+
     # set preserve_nans based on direction using _interp_limit
-    if limit_direction == 'forward':
-        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
-    elif limit_direction == 'backward':
-        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
+    if maxgap is None:
+        if limit_direction == 'forward':
+            preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
+        elif limit_direction == 'backward':
+            preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
+        else:
+            # both directions... just use _interp_limit
+            preserve_nans = set(_interp_limit(invalid, limit, limit))
     else:
-        # both directions... just use _interp_limit
-        preserve_nans = set(_interp_limit(invalid, limit, limit))
+        def bfill_nan(arr):
+            """ Backward-fill NaNs """
+            mask = np.isnan(arr)
+            idx = np.where(~mask, np.arange(mask.shape[0]), mask.shape[0] - 1)
+            idx = np.minimum.accumulate(idx[::-1], axis=0)[::-1]
+            out = arr[idx]
+            return out
+
+        # Generate array where the NaN-gap-width is filled in as value
+        # at each NaN location.
+        cumsum = np.cumsum(invalid).astype('float')
+        diff = np.zeros_like(yvalues)
+        diff[~invalid] = np.pad(np.diff(cumsum[~invalid]),
+                                (1, 0), mode='constant')
+        diff[invalid] = np.nan
+        diff = bfill_nan(diff)
+        # hack to avoid having trailing NaNs in `diff`. Fill these
+        # with `maxgap`. Everthing smaller than `maxgap` won't matter
+        # in the following.
+        diff[np.isnan(diff)] = maxgap
+        preserve_nans = set(np.flatnonzero((diff > maxgap) & invalid))
 
     # if limit_area is set, add either mid or outside indices
     # to preserve_nans GH #16284

diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
@@ -1204,6 +1204,41 @@ def test_interp_limit_to_ends(self):
                                limit_direction='both')
         assert_series_equal(result, expected)
 
+    def test_interp_maxgap(self):
+        s = Series([
+            np.nan,
+            1., np.nan,
+            2., np.nan, np.nan,
+            5., np.nan, np.nan, np.nan,
+            -1., np.nan, np.nan
+        ])
+
+        excpected = Series([
+            1.,
+            1., 1.5,
+            2., 3., 4.,
+            5., np.nan, np.nan, np.nan,
+            -1., -1, -1
+        ])
+
+        result = s.interpolate(method='linear', maxgap=2)
+        assert_series_equal(result, excpected)
+
+        excpected = Series([
+            np.nan,
+            1., 1.5,
+            2., 3., 4.,
+            5., np.nan, np.nan, np.nan,
+            -1., np.nan, np.nan
+        ])
+
+        result = s.interpolate(method='linear', maxgap=2, limit_area='inside')
+        assert_series_equal(result, excpected)
+
+        with pytest.raises(ValueError,
+                           match='maxgap cannot be used together with limit'):
+            s.interpolate(method='linear', maxgap=2, limit=3)
+
     def test_interp_limit_before_ends(self):
         # These test are for issue #11115 -- limit ends properly.
         s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])