From 33165b06db024fd5f3e176248d3e321b5cb8c2e6 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 18:38:08 +0200 Subject: [PATCH 1/6] Add test for 31286 --- pandas/tests/window/test_rolling.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 88afcec0f7bf4..e7b1d19f8f047 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -771,3 +771,17 @@ def test_rolling_numerical_too_large_numbers(): index=dates, ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("index", "window"), + [([0, 1, 2, 3, 4], 2), (pd.date_range("2001-01-01", freq="D", periods=5), "2D")], +) +def test_rolling_corr_timedelta_index(index, window): + # GH: 31286 + x = pd.Series(np.random.randn(5), index=index) + y = x.copy() + x[0:2] = 0.0 + result = x.rolling(window).corr(y) + expected = pd.Series([np.nan, np.nan, 1, 1, 1], index=index) + tm.assert_almost_equal(result, expected) From 4cd47344f77dd7001154a63fab48b2fe0d6dd908 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 21:43:51 +0200 Subject: [PATCH 2/6] Replace std with var --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 21a7164411fb7..d03113e5f02e1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1901,7 +1901,7 @@ def _get_corr(a, b): window=window, min_periods=self.min_periods, center=self.center ) - return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) + return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5 return flex_binary_moment( self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) From edc9f5720095aead11cd32e7096d149fd4f2c47e Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 21:47:40 +0200 Subject: [PATCH 3/6] Fix test --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index e7b1d19f8f047..6e2d607e463df 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -779,7 +779,7 @@ def test_rolling_numerical_too_large_numbers(): ) def test_rolling_corr_timedelta_index(index, window): # GH: 31286 - x = pd.Series(np.random.randn(5), index=index) + x = pd.Series([1, 2, 3, 4, 5], index=index) y = x.copy() x[0:2] = 0.0 result = x.rolling(window).corr(y) From 64c9cf12673f70e197c01143b9897a501a3d1427 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 19 Sep 2020 00:15:15 +0200 Subject: [PATCH 4/6] Add comment to code --- pandas/core/window/rolling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d03113e5f02e1..59894bb24d885 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1900,7 +1900,9 @@ def _get_corr(a, b): b = b.rolling( window=window, min_periods=self.min_periods, center=self.center ) - + # GH 31286: Through using var instead of std we can avoid numerical + # issues when the result of var is withing floating proint precision + # while std is not. return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5 return flex_binary_moment( From 9cadf038cf2e0189164379610ab01b60834fb370 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 19 Sep 2020 00:19:07 +0200 Subject: [PATCH 5/6] Add whats new --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6923b42d3340b..4c773c31056a7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -338,7 +338,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) -- +- Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`) Reshaping ^^^^^^^^^ From 2b5581b43c2e8657603bf258edeb4504deaab409 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 23 Oct 2020 00:08:41 +0200 Subject: [PATCH 6/6] Fix pattern --- pandas/tests/window/test_rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 16f2458fb670c..90f4e7ac91b2e 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -925,9 +925,9 @@ def test_rolling_var_numerical_issues(func, third_value, values): ) def test_rolling_corr_timedelta_index(index, window): # GH: 31286 - x = pd.Series([1, 2, 3, 4, 5], index=index) + x = Series([1, 2, 3, 4, 5], index=index) y = x.copy() x[0:2] = 0.0 result = x.rolling(window).corr(y) - expected = pd.Series([np.nan, np.nan, 1, 1, 1], index=index) + expected = Series([np.nan, np.nan, 1, 1, 1], index=index) tm.assert_almost_equal(result, expected)