From 712876977b7ac2b625880caddeec0689b87f7bac Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 23 Apr 2021 00:31:46 +0200 Subject: [PATCH 1/6] BUG: rolling returning mean 0 for all nan window --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/_libs/window/aggregations.pyx | 2 +- pandas/tests/window/test_rolling.py | 64 ++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1a11fffbf6b4e..2033a70069d99 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -832,6 +832,7 @@ Groupby/resample/rolling - Bug in :class:`core.window.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`) - Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) +- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 8c8629ad6f032..aa49b3c60efe2 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -170,7 +170,7 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, cdef: float64_t result - if nobs >= minp: + if nobs >= minp and nobs > 0: result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9abae632e5da3..1d834ec5f46e4 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1357,3 +1357,67 @@ def test_rolling_std_small_values(): result = s.rolling(2).std() expected = Series([np.nan, 7.071068e-9, 7.071068e-9]) tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15) + + +def test_rolling_mean_all_nan_window_floating_artifacts(): + # GH#41053 + df = DataFrame( + [ + 0.03, + 0.03, + 0.001, + np.NaN, + 0.002, + 0.008, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + 0.005, + 0.2, + ] + ) + + expected = DataFrame( + [ + 0.03, + 0.0155, + 0.0155, + 0.011, + 0.01025, + 0.00366666, + 0.005, + 0.005, + 0.008, + np.NaN, + np.NaN, + 0.005, + 0.102500, + ], + index=list(range(1, 14)), + ) + result = ( + df.iloc[1:].rolling(5, min_periods=0).mean() + ) # Returns 0 at indexes 10 and 11 + tm.assert_frame_equal(result, expected) + expected = DataFrame( + [ + 0.001, + 0.001, + 0.0015, + 0.00366666, + 0.00366666, + 0.005, + 0.005, + 0.008, + np.NaN, + np.NaN, + 0.005, + 0.102500, + ], + index=list(range(2, 14)), + ) + result = df.iloc[2:].rolling(5, min_periods=0).mean() + tm.assert_frame_equal(result, expected) From 193bc36e5d6e56c455107cdadd087d2f06e334a9 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 24 Apr 2021 01:00:56 +0200 Subject: [PATCH 2/6] Parametrize test --- pandas/tests/window/test_rolling.py | 60 ++++++++++------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 1d834ec5f46e4..4f22e8a607b4f 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1359,7 +1359,14 @@ def test_rolling_std_small_values(): tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15) -def test_rolling_mean_all_nan_window_floating_artifacts(): +@pytest.mark.parametrize( + "start, exp_values", + [ + (1, [0.03, 0.0155, 0.0155, 0.011, 0.01025]), + (2, [0.001, 0.001, 0.0015, 0.00366666]), + ], +) +def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values): # GH#41053 df = DataFrame( [ @@ -1380,44 +1387,19 @@ def test_rolling_mean_all_nan_window_floating_artifacts(): ] ) + values = exp_values + [ + 0.00366666, + 0.005, + 0.005, + 0.008, + np.NaN, + np.NaN, + 0.005, + 0.102500, + ] expected = DataFrame( - [ - 0.03, - 0.0155, - 0.0155, - 0.011, - 0.01025, - 0.00366666, - 0.005, - 0.005, - 0.008, - np.NaN, - np.NaN, - 0.005, - 0.102500, - ], - index=list(range(1, 14)), - ) - result = ( - df.iloc[1:].rolling(5, min_periods=0).mean() - ) # Returns 0 at indexes 10 and 11 - tm.assert_frame_equal(result, expected) - expected = DataFrame( - [ - 0.001, - 0.001, - 0.0015, - 0.00366666, - 0.00366666, - 0.005, - 0.005, - 0.008, - np.NaN, - np.NaN, - 0.005, - 0.102500, - ], - index=list(range(2, 14)), + values, + index=list(range(start, len(values) + start)), ) - result = df.iloc[2:].rolling(5, min_periods=0).mean() + result = df.iloc[start:].rolling(5, min_periods=0).mean() tm.assert_frame_equal(result, expected) From 4599fedc0d67cf0113ff0da5d48a0aa0ceed17c5 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 24 Apr 2021 23:28:10 +0200 Subject: [PATCH 3/6] Add fix for sum --- pandas/_libs/window/aggregations.pyx | 4 +++- pandas/tests/window/test_rolling.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index aa49b3c60efe2..8d6f899d6f3ca 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -73,7 +73,9 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogi cdef: float64_t result - if nobs >= minp: + if nobs == 0 == minp: + result = 0 + elif nobs >= minp: result = sum_x else: result = NaN diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4f22e8a607b4f..28465e3a979a7 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1403,3 +1403,11 @@ def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values): ) result = df.iloc[start:].rolling(5, min_periods=0).mean() tm.assert_frame_equal(result, expected) + + +def test_rolling_sum_all_nan_window_floating_artifacts(): + # GH#41053 + df = DataFrame([0.002, 0.008, 0.005, np.NaN, np.NaN, np.NaN]) + result = df.rolling(3, min_periods=0).sum() + expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0]) + tm.assert_frame_equal(result, expected) From 958fe595583b137d71085efb704f9f515b7e4d68 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 24 Apr 2021 23:33:39 +0200 Subject: [PATCH 4/6] Change whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 8b8514641ae38..2244368e61474 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -846,7 +846,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`) - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) -- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`DataFrame.rolling` returning mean zero, respectively sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) Reshaping ^^^^^^^^^ From 393386740a2229825bf64be40e5b4183bd4e7769 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 25 Apr 2021 03:08:14 +0200 Subject: [PATCH 5/6] Change whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 2244368e61474..c2f51ae2da247 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -847,6 +847,8 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`) - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) - Bug in :meth:`DataFrame.rolling` returning mean zero, respectively sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) +- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) Reshaping ^^^^^^^^^ From 8711cce976d76ae5caf28b5dac9789af556dff33 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 25 Apr 2021 03:09:01 +0200 Subject: [PATCH 6/6] Change whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c2f51ae2da247..e168c2191d47b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -846,7 +846,6 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`) - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) -- Bug in :meth:`DataFrame.rolling` returning mean zero, respectively sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) - Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) - Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)