From 716a01e71cc53d49b2043f072dabb1ba275d831c Mon Sep 17 00:00:00 2001 From: Max Chen Date: Sun, 1 Dec 2019 23:02:50 +0800 Subject: [PATCH 1/2] fix series interpolate bug with unsorted index GH21037 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/missing.py | 5 ++++- pandas/tests/series/test_missing.py | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3f8d9d3916797..444476f0ffee1 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -550,6 +550,7 @@ Numeric - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) - Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) +- Bug in :meth:`Series.interpolate` when using method ``index`` with unsorted index. (:issue:`21037`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 744cde95cb668..e79627b145c0b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -277,7 +277,10 @@ def interpolate_1d( inds = lib.maybe_convert_objects(inds) else: inds = xvalues - result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) + seq = np.argsort(inds[valid]) + result[invalid] = np.interp( + inds[invalid], inds[valid][seq], yvalues[valid][seq] + ) result[preserve_nans] = np.nan return result diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 81bf1edbe86df..1655f508f52b5 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1655,3 +1655,10 @@ def test_interpolate_timedelta_index(self, interp_methods_ind): pytest.skip( "This interpolation method is not supported for Timedelta Index yet." ) + + def test_interpolate_unsorted_index(self): + # GH 21037 + ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) + result = ts.interpolate(method="index").sort_index(ascending=True) + expected = ts.sort_index(ascending=True).interpolate(method="index") + tm.assert_series_equal(result, expected) From 226ba98e156ce8c1eb87003873d01f602ee26d65 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Tue, 3 Dec 2019 00:10:04 +0800 Subject: [PATCH 2/2] revised as requested --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/missing.py | 5 +++-- pandas/tests/series/test_missing.py | 10 +++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 444476f0ffee1..2a65c5c8eb458 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -550,7 +550,7 @@ Numeric - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) - Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) -- Bug in :meth:`Series.interpolate` when using method ``index`` with unsorted index. (:issue:`21037`) +- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e79627b145c0b..9f4f7445509c9 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -277,9 +277,10 @@ def interpolate_1d( inds = lib.maybe_convert_objects(inds) else: inds = xvalues - seq = np.argsort(inds[valid]) + # np.interp requires sorted X values, #21037 + indexer = np.argsort(inds[valid]) result[invalid] = np.interp( - inds[invalid], inds[valid][seq], yvalues[valid][seq] + inds[invalid], inds[valid][indexer], yvalues[valid][indexer] ) result[preserve_nans] = np.nan return result diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 1655f508f52b5..b1c9b67eb951e 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1656,9 +1656,13 @@ def test_interpolate_timedelta_index(self, interp_methods_ind): "This interpolation method is not supported for Timedelta Index yet." ) - def test_interpolate_unsorted_index(self): + @pytest.mark.parametrize( + "ascending, expected_values", + [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], + ) + def test_interpolate_unsorted_index(self, ascending, expected_values): # GH 21037 ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) - result = ts.interpolate(method="index").sort_index(ascending=True) - expected = ts.sort_index(ascending=True).interpolate(method="index") + result = ts.sort_index(ascending=ascending).interpolate(method="index") + expected = pd.Series(data=expected_values, index=expected_values, dtype=float) tm.assert_series_equal(result, expected)