From 6c1f8954c753c0225b6e872eeb6c2dd99e18affe Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 28 Dec 2019 14:47:29 +0800 Subject: [PATCH 1/3] BUG: pct_change wrong result when there are duplicated indices (GH30463) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/generic.py | 10 +++++----- pandas/tests/frame/methods/test_pct_change.py | 19 +++++++++++++++++++ .../tests/series/methods/test_pct_change.py | 9 +++++++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 96ea682dd3caf..09e8d8679bc7c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -765,6 +765,7 @@ Numeric - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) - Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :meth:`NDFrame.pct_change` when there are duplicated indices (:issue:`30463`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 08c7f38ce4c82..fdd6f652c7642 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9901,11 +9901,11 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar data = self.fillna(method=fill_method, limit=limit, axis=axis) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 - rs = rs.loc[~rs.index.duplicated()] - rs = rs.reindex_like(data) - if freq is None: - mask = isna(com.values_from_object(data)) - np.putmask(rs.values, mask, np.nan) + if freq is not None: + # Shift method is implemented differently when freq is not None + # We want to restore the original index + rs = rs.loc[~rs.index.duplicated()] + rs = rs.reindex_like(data) return rs def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 0c15533c37f01..aae0180498263 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -76,3 +76,22 @@ def test_pct_change_periods_freq( rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_frame_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ['pad', 'ffill', None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + data = DataFrame( + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, + index=['a', 'b'] * 3 + ) + result = data.pct_change(fill_method=fill_method) + if fill_method is None: + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] + else: + second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + expected = DataFrame({ + 0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], + 1: second_column, + }, index=['a', 'b'] * 3) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index abc5c498813ef..56c9e2106b98c 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -68,3 +68,12 @@ def test_pct_change_periods_freq( rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_series_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ['pad', 'ffill', None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + s = Series([np.nan, 1, 2, 3, 9, 18], index=['a', 'b'] * 3) + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=['a', 'b'] * 3) + tm.assert_series_equal(result, expected) From 672fd1a05b82d1d058ca9c2106f92516349628bf Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 28 Dec 2019 14:50:58 +0800 Subject: [PATCH 2/3] CLN: reformatted code using black --- pandas/tests/frame/methods/test_pct_change.py | 13 ++++++------- pandas/tests/series/methods/test_pct_change.py | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index aae0180498263..ac13a5e146043 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -78,20 +78,19 @@ def test_pct_change_periods_freq( tm.assert_frame_equal(rs_freq, rs_periods) -@pytest.mark.parametrize("fill_method", ['pad', 'ffill', None]) +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) def test_pct_change_with_duplicated_indices(fill_method): # GH30463 data = DataFrame( - {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, - index=['a', 'b'] * 3 + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) result = data.pct_change(fill_method=fill_method) if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] else: second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] - expected = DataFrame({ - 0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], - 1: second_column, - }, index=['a', 'b'] * 3) + expected = DataFrame( + {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, + index=["a", "b"] * 3, + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 56c9e2106b98c..aa01543132841 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -70,10 +70,10 @@ def test_pct_change_periods_freq( tm.assert_series_equal(rs_freq, rs_periods) -@pytest.mark.parametrize("fill_method", ['pad', 'ffill', None]) +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) def test_pct_change_with_duplicated_indices(fill_method): # GH30463 - s = Series([np.nan, 1, 2, 3, 9, 18], index=['a', 'b'] * 3) + s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) result = s.pct_change(fill_method=fill_method) - expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=['a', 'b'] * 3) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) tm.assert_series_equal(result, expected) From 9df2f82ee25c09b4d1f63d625ea88944796ad8c9 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 1 Jan 2020 16:40:38 +0800 Subject: [PATCH 3/3] DOC: minor correction in whatsnew (GH30463) --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7b0c4a977a919..c1a5c452e1df4 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -765,7 +765,7 @@ Numeric - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) - Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) -- Bug in :meth:`NDFrame.pct_change` when there are duplicated indices (:issue:`30463`) +- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) Conversion