From a6b1a68cba7bee26d1eac65066d7892171425b58 Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Mon, 30 Sep 2019 19:25:17 +0900 Subject: [PATCH 01/18] BUG: make pct_change can handle the anchored freq #28664 pct_change didn't work when the freq is anchored(like 1W, 1M, BM) so when the freq is anchored, use data.asfreq(freq) instead of the raw data. --- pandas/core/generic.py | 2 ++ pandas/tests/series/test_timeseries.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a3b9bec494854..569139783f5c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10413,6 +10413,8 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar else: data = self.fillna(method=fill_method, limit=limit, axis=axis) + if freq and to_offset(freq).isAnchored(): + data = data.asfreq(freq) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 rs = rs.reindex_like(data) if freq is None: diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d0ca5d82c6b33..2f88acc050913 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -415,6 +415,12 @@ def test_pct_change(self): rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) + rs = self.ts.pct_change(freq="1W") + filled = self.ts.fillna(method="pad") + assert_series_equal( + rs, self.ts.asfreq("1W") + ) + def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) From 5eaa983c3c4e44e6b76b963f3db6f2ec28086e01 Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Mon, 30 Sep 2019 19:29:11 +0900 Subject: [PATCH 02/18] black pandas/tests/series/test_timeseries.py --- pandas/tests/series/test_timeseries.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 2f88acc050913..6519f5fcfdc0e 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -417,9 +417,7 @@ def test_pct_change(self): rs = self.ts.pct_change(freq="1W") filled = self.ts.fillna(method="pad") - assert_series_equal( - rs, self.ts.asfreq("1W") - ) + assert_series_equal(rs, self.ts.asfreq("1W")) def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) From 9565b40537066fb0ae552369ed4b99c9b4080dbc Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Mon, 30 Sep 2019 19:32:37 +0900 Subject: [PATCH 03/18] add whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index eb4b72d01d59a..fbf40d1055b40 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -301,6 +301,7 @@ Other - Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) +- :meth:`Series.pct_change` will can handle the anchored freq such as `1M`, `1W`, `BM`. (:issue:`28664`) .. _whatsnew_1000.contributors: From 82950223e79e4c7193e4c90bd3eca002c9a72a2f Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Tue, 1 Oct 2019 13:41:25 +0900 Subject: [PATCH 04/18] make its own test about pct_change with anchored freq --- pandas/tests/series/test_timeseries.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 6519f5fcfdc0e..11f0bcedb4fa8 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -415,10 +415,6 @@ def test_pct_change(self): rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) - rs = self.ts.pct_change(freq="1W") - filled = self.ts.fillna(method="pad") - assert_series_equal(rs, self.ts.asfreq("1W")) - def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) @@ -426,6 +422,13 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) assert_series_equal(chg, expected) + def test_pct_change_anchored_freq(self): + s = pd.Series([1, 2, 3, 4, 5] * 3, pd.date_range("2019-09", periods=15)) + + result = s.pct_change(freq="1W") + expected = s.asfreq("1W").pct_change() + assert_series_equal(result, expected) + @pytest.mark.parametrize( "freq, periods, fill_method, limit", [ From e7579a8cf9beab39bebb6a84f3f335b5912cd746 Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 00:41:56 +0900 Subject: [PATCH 05/18] make the test more exhaustive --- pandas/core/generic.py | 6 +++-- pandas/tests/series/test_timeseries.py | 36 ++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 569139783f5c5..2acb4f60428d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10413,8 +10413,10 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - if freq and to_offset(freq).isAnchored(): - data = data.asfreq(freq) + if freq and to_offset(re.sub("\\d", "", freq)).isAnchored(): + return data.asfreq(freq).pct_change( + periods=periods, fill_method=fill_method, limit=limit, **kwargs + ) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 rs = rs.reindex_like(data) if freq is None: diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 11f0bcedb4fa8..597414d475742 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -422,11 +422,37 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) assert_series_equal(chg, expected) - def test_pct_change_anchored_freq(self): - s = pd.Series([1, 2, 3, 4, 5] * 3, pd.date_range("2019-09", periods=15)) - - result = s.pct_change(freq="1W") - expected = s.asfreq("1W").pct_change() + @pytest.mark.parametrize( + "offset_1, n, offset_2", + product( + ["D", "W", "M"], + ["1", "7"], + [ + "B", + "C", + "W", + "M", + "SM", + "BM", + "CBM", + "MS", + "SMS", + "BMS", + "CBMS", + "Q", + "BQ", + "BA", + "A", + "AS", + ], + ), + ) + def test_pct_change_anchored_freq(self, offset_1, n, offset_2): + s = Series( + range(1, 16), date_range("2019-09", periods=15, freq="15" + offset_1) + ) + result = s.pct_change(freq=n + offset_2) + expected = s.asfreq(n + offset_2).pct_change() assert_series_equal(result, expected) @pytest.mark.parametrize( From 910adb31cfb84d40bd66c5cecff174973adb29d4 Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 01:36:53 +0900 Subject: [PATCH 06/18] fix n as 1 because offset that is more than 1 won't anchored --- pandas/core/generic.py | 2 +- pandas/tests/series/test_timeseries.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2acb4f60428d9..414a385c57ac5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10413,7 +10413,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - if freq and to_offset(re.sub("\\d", "", freq)).isAnchored(): + if freq and to_offset(freq).isAnchored(): return data.asfreq(freq).pct_change( periods=periods, fill_method=fill_method, limit=limit, **kwargs ) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 597414d475742..7e5d0e3e3178d 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -423,10 +423,9 @@ def test_pct_change_shift_over_nas(self): assert_series_equal(chg, expected) @pytest.mark.parametrize( - "offset_1, n, offset_2", + "offset_1, offset_2", product( ["D", "W", "M"], - ["1", "7"], [ "B", "C", @@ -447,12 +446,12 @@ def test_pct_change_shift_over_nas(self): ], ), ) - def test_pct_change_anchored_freq(self, offset_1, n, offset_2): + def test_pct_change_anchored_freq(self, offset_1, offset_2): s = Series( range(1, 16), date_range("2019-09", periods=15, freq="15" + offset_1) ) - result = s.pct_change(freq=n + offset_2) - expected = s.asfreq(n + offset_2).pct_change() + result = s.pct_change(freq="1" + offset_2) + expected = s.asfreq("1" + offset_2).pct_change() assert_series_equal(result, expected) @pytest.mark.parametrize( From 4c6433b1480a1e094a5d8100c0e5869c98e31c36 Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 12:28:07 +0900 Subject: [PATCH 07/18] make parametrizing idiomatic, add issue number --- pandas/tests/series/test_timeseries.py | 43 +++++++++++++------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 7e5d0e3e3178d..57792fed334fe 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -422,31 +422,30 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) assert_series_equal(chg, expected) + @pytest.mark.parametrize("offset_1", ["D", "W", "M"]) @pytest.mark.parametrize( - "offset_1, offset_2", - product( - ["D", "W", "M"], - [ - "B", - "C", - "W", - "M", - "SM", - "BM", - "CBM", - "MS", - "SMS", - "BMS", - "CBMS", - "Q", - "BQ", - "BA", - "A", - "AS", - ], - ), + "offset_2", + [ + "B", + "C", + "W", + "M", + "SM", + "BM", + "CBM", + "MS", + "SMS", + "BMS", + "CBMS", + "Q", + "BQ", + "BA", + "A", + "AS", + ], ) def test_pct_change_anchored_freq(self, offset_1, offset_2): + # GH 28664 s = Series( range(1, 16), date_range("2019-09", periods=15, freq="15" + offset_1) ) From 1c77e5939cd77bbd07c2de2a599193eb851a9c9b Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 15:00:08 +0900 Subject: [PATCH 08/18] remove .isAnchored logic --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 414a385c57ac5..037745a2b196f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10413,7 +10413,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - if freq and to_offset(freq).isAnchored(): + if freq: return data.asfreq(freq).pct_change( periods=periods, fill_method=fill_method, limit=limit, **kwargs ) From 64043b2e89c12ef7e31c6d282dc254010d722d4f Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 15:25:16 +0900 Subject: [PATCH 09/18] revert pct_change changes --- pandas/core/generic.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 037745a2b196f..a3b9bec494854 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10413,10 +10413,6 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar else: data = self.fillna(method=fill_method, limit=limit, axis=axis) - if freq: - return data.asfreq(freq).pct_change( - periods=periods, fill_method=fill_method, limit=limit, **kwargs - ) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 rs = rs.reindex_like(data) if freq is None: From f61e62a75ea1d203bd917ae559a1e95a02f473cf Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Wed, 2 Oct 2019 17:32:06 +0900 Subject: [PATCH 10/18] take a new approach, dropping duplicate indices --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/generic.py | 1 + pandas/tests/series/test_timeseries.py | 49 ++++++++++---------------- 3 files changed, 20 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fbf40d1055b40..0f7b140117fc4 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -301,7 +301,7 @@ Other - Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- :meth:`Series.pct_change` will can handle the anchored freq such as `1M`, `1W`, `BM`. (:issue:`28664`) +- :meth:`Series.pct_change` will can handle duplicate axis (:issue:`28664`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a3b9bec494854..9f89aeb62fdee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10414,6 +10414,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, **kwar data = self.fillna(method=fill_method, limit=limit, axis=axis) rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 + rs = rs.loc[~rs.index.duplicated()] rs = rs.reindex_like(data) if freq is None: mask = isna(com.values_from_object(data)) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 57792fed334fe..af1ce43543042 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -415,6 +415,24 @@ def test_pct_change(self): rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) + @pytest.mark.parametrize("n_1", ["1", "15"]) + @pytest.mark.parametrize("offset_1", ["D", "BM"]) + @pytest.mark.parametrize("n_2", ["1", "3"]) + @pytest.mark.parametrize("offset_2", ["D", "BM"]) + def test_pct_change_with_duplicate_axis(self, n_1, offset_1, n_2, offset_2): + # GH 28664 + + freq_1 = n_1 + offset_1 + freq_2 = n_2 + offset_2 + + s = Series([1, 2, 3, 4, 5] * 3, date_range("2019-09", periods=15, freq=freq_1)) + + result = s.pct_change(freq=freq_2) + expected = s / s.shift(freq=freq_2) - 1 + expected = expected[expected.index.isin(s.index) & ~expected.index.duplicated()] + + assert_series_equal(result, expected) + def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) @@ -422,37 +440,6 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) assert_series_equal(chg, expected) - @pytest.mark.parametrize("offset_1", ["D", "W", "M"]) - @pytest.mark.parametrize( - "offset_2", - [ - "B", - "C", - "W", - "M", - "SM", - "BM", - "CBM", - "MS", - "SMS", - "BMS", - "CBMS", - "Q", - "BQ", - "BA", - "A", - "AS", - ], - ) - def test_pct_change_anchored_freq(self, offset_1, offset_2): - # GH 28664 - s = Series( - range(1, 16), date_range("2019-09", periods=15, freq="15" + offset_1) - ) - result = s.pct_change(freq="1" + offset_2) - expected = s.asfreq("1" + offset_2).pct_change() - assert_series_equal(result, expected) - @pytest.mark.parametrize( "freq, periods, fill_method, limit", [ From 92301ff1a8555051e85192a6fcad0eb6de7db98f Mon Sep 17 00:00:00 2001 From: donghojung Date: Fri, 18 Oct 2019 22:30:29 +0900 Subject: [PATCH 11/18] add more test cases --- pandas/tests/series/test_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 175ffa444cdca..0e5898ca118dc 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -373,9 +373,9 @@ def test_pct_change(self): rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) - @pytest.mark.parametrize("n_1", ["1", "15"]) + @pytest.mark.parametrize("n_1", ["", "1", "15", "70"]) @pytest.mark.parametrize("offset_1", ["D", "BM"]) - @pytest.mark.parametrize("n_2", ["1", "3"]) + @pytest.mark.parametrize("n_2", ["", "1", "3", "70"]) @pytest.mark.parametrize("offset_2", ["D", "BM"]) def test_pct_change_with_duplicate_axis(self, n_1, offset_1, n_2, offset_2): # GH 28664 From d38fff5180c31771407a36c12810225216f68b1a Mon Sep 17 00:00:00 2001 From: donghojung Date: Fri, 8 Nov 2019 14:08:18 +0900 Subject: [PATCH 12/18] replace the test with a sanity test --- pandas/tests/series/test_timeseries.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 6b9738cd74f29..e602677ed9ea5 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -370,23 +370,11 @@ def test_pct_change(self, datetime_series): rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) ) - @pytest.mark.parametrize("n_1", ["", "1", "15", "70"]) - @pytest.mark.parametrize("offset_1", ["D", "BM"]) - @pytest.mark.parametrize("n_2", ["", "1", "3", "70"]) - @pytest.mark.parametrize("offset_2", ["D", "BM"]) - def test_pct_change_with_duplicate_axis(self, n_1, offset_1, n_2, offset_2): + def test_pct_change_with_duplicate_axis(self): # GH 28664 - - freq_1 = n_1 + offset_1 - freq_2 = n_2 + offset_2 - - s = Series([1, 2, 3, 4, 5] * 3, date_range("2019-09", periods=15, freq=freq_1)) - - result = s.pct_change(freq=freq_2) - expected = s / s.shift(freq=freq_2) - 1 - expected = expected[expected.index.isin(s.index) & ~expected.index.duplicated()] - - assert_series_equal(result, expected) + Series(range(70), date_range("2019", periods=70, freq="D")).pct_change( + freq="BM" + ) def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) From 0c9dd602ca74587bb2c44bea60329ac5d126294e Mon Sep 17 00:00:00 2001 From: donghojung Date: Fri, 8 Nov 2019 18:12:02 +0900 Subject: [PATCH 13/18] enhance the test --- pandas/tests/series/test_timeseries.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index e602677ed9ea5..6f01f482df0c5 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -372,9 +372,13 @@ def test_pct_change(self, datetime_series): def test_pct_change_with_duplicate_axis(self): # GH 28664 - Series(range(70), date_range("2019", periods=70, freq="D")).pct_change( - freq="BM" - ) + original_idx = date_range("2019-11-14", periods=3, freq="D") + s = Series(range(3), original_idx) + + result = s.pct_change(freq="B") + expected = s.asfreq("B").pct_change().reindex(original_idx) + + tm.assert_series_equal(result, expected) def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) From bdbd5fd59fdd5dfbb44c1806c605fd7dceb06838 Mon Sep 17 00:00:00 2001 From: donghojung Date: Sat, 9 Nov 2019 12:53:39 +0900 Subject: [PATCH 14/18] move a whatsnew entry from Other to Reshaping --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4273246b01d8e..c85b7ff8f8ccf 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -435,6 +435,7 @@ Reshaping - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- :meth:`Series.pct_change` will can handle duplicate axis (:issue:`28664`) Sparse ^^^^^^ @@ -456,7 +457,6 @@ Other - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- :meth:`Series.pct_change` will can handle duplicate axis (:issue:`28664`) - :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) - Fix :class:`AbstractHolidayCalendar` to return correct results for From bd5673d61149234864aff04d2a6bc1950faf03c7 Mon Sep 17 00:00:00 2001 From: Jung Dong Ho Date: Sat, 9 Nov 2019 14:47:26 +0900 Subject: [PATCH 15/18] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: William Ayd --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4273246b01d8e..70cc785594fba 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -456,7 +456,7 @@ Other - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- :meth:`Series.pct_change` will can handle duplicate axis (:issue:`28664`) +- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`) - :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) - Fix :class:`AbstractHolidayCalendar` to return correct results for From 8e129597b9098ef564a6dd21425de073a1bded6f Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Sat, 9 Nov 2019 19:24:21 +0900 Subject: [PATCH 16/18] move a whatsnew entry from Other to Reshaping --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 16f736567b9fd..50d5b27170c05 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -437,6 +437,7 @@ Reshaping - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`) Sparse ^^^^^^ @@ -458,7 +459,6 @@ Other - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`) - :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) - Fix :class:`AbstractHolidayCalendar` to return correct results for From 3d70ebeca9db8d6c9e62346660b9af55635c915e Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Sun, 10 Nov 2019 13:09:18 +0900 Subject: [PATCH 17/18] make the test more explicit --- pandas/tests/series/test_timeseries.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 6f01f482df0c5..223b03edadf28 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -372,11 +372,10 @@ def test_pct_change(self, datetime_series): def test_pct_change_with_duplicate_axis(self): # GH 28664 - original_idx = date_range("2019-11-14", periods=3, freq="D") - s = Series(range(3), original_idx) - - result = s.pct_change(freq="B") - expected = s.asfreq("B").pct_change().reindex(original_idx) + result = Series( + range(3), date_range("2019-11-14", periods=3, freq="D") + ).pct_change(freq="B") + expected = Series([np.NaN, np.inf, np.NaN], date_range("2019-11-14", periods=3)) tm.assert_series_equal(result, expected) From 5fb77cddf6bd7a2134f508b1fde9f5cb378155aa Mon Sep 17 00:00:00 2001 From: 0xF4D3C0D3 Date: Sun, 10 Nov 2019 13:42:04 +0900 Subject: [PATCH 18/18] increase the periods from 3 to 5 --- pandas/tests/series/test_timeseries.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 223b03edadf28..4ae00bca3e832 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -372,10 +372,11 @@ def test_pct_change(self, datetime_series): def test_pct_change_with_duplicate_axis(self): # GH 28664 - result = Series( - range(3), date_range("2019-11-14", periods=3, freq="D") - ).pct_change(freq="B") - expected = Series([np.NaN, np.inf, np.NaN], date_range("2019-11-14", periods=3)) + common_idx = date_range("2019-11-14", periods=5, freq="D") + result = Series(range(5), common_idx).pct_change(freq="B") + + # the reason that the expected should be like this is documented at PR 28681 + expected = Series([np.NaN, np.inf, np.NaN, np.NaN, 3.0], common_idx) tm.assert_series_equal(result, expected)