1.remove inf check

auderson · auderson · commit 949c94901a1e · 2022-03-24T15:28:32.000+08:00
2.reformat file
3.add tests
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -313,7 +313,8 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x,
     if val == prev_value[0]:
         num_consecutive_same_value[0] += 1
     else:
-        num_consecutive_same_value[0] = 1  # reset to 1 (include current value itself)
+        # reset to 1 (include current value itself)
+        num_consecutive_same_value[0] = 1
     prev_value[0] = val
 
     # Welford's method for the online variance-calculation
@@ -361,8 +362,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
     """
     cdef:
         float64_t mean_x, ssqdm_x, nobs, compensation_add,
-        float64_t compensation_remove,
-        float64_t val, prev, delta, mean_x_old, prev_value
+        float64_t compensation_remove, prev_value
         int64_t s, e
         Py_ssize_t i, j, N = len(start), num_consecutive_same_value = 0
         ndarray[float64_t] output
diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py
@@ -26,7 +26,7 @@ def add_var(
 ) -> tuple[int, float, float, float, int, float]:
     if not np.isnan(val):
 
-        if val == prev_value and not np.isinf(val):
+        if val == prev_value:
             num_consecutive_same_value += 1
         else:
             num_consecutive_same_value = 1
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1781,3 +1781,103 @@ def test_step_not_integer_raises():
 def test_step_not_positive_raises():
     with pytest.raises(ValueError, match="step must be >= 0"):
         DataFrame(range(2)).rolling(1, step=-1)
+
+
+@pytest.mark.parametrize(
+    ["values", "window", "min_periods", "expected"],
+    [
+        [
+            np.array([20, 10, 10, np.inf, 1, 1, 2, 3]),
+            3,
+            1,
+            np.array(
+                [
+                    np.nan,
+                    50.0,
+                    33.33333333333333,
+                    0.0,
+                    40.5,
+                    0.0,
+                    0.3333333333333333,
+                    1.0,
+                ]
+            ),
+        ],
+        [
+            np.array([20, 10, 10, np.nan, 10, 1, 2, 3]),
+            3,
+            1,
+            np.array(
+                [
+                    np.nan,
+                    50.0,
+                    33.33333333333333,
+                    0.0,
+                    0.0,
+                    40.5,
+                    24.333333333333332,
+                    1.0,
+                ]
+            ),
+        ],
+        [
+            np.array([np.nan, 5, 6, 7, 5, 5, 5]),
+            3,
+            3,
+            np.array([np.nan, np.nan, np.nan, 1.0, 1.0, 1.3333333333333335, 0.0]),
+        ],
+        [
+            np.array([5, 7, 7, 7, np.nan, np.inf, 4, 3, 3, 3]),
+            3,
+            3,
+            np.array(
+                [
+                    np.nan,
+                    np.nan,
+                    1.3333333333333335,
+                    0.0,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    0.33333333333333337,
+                    0.0,
+                ]
+            ),
+        ],
+        [
+            np.array([5, 7, 7, 7, np.nan, np.inf, 7, 3, 3, 3]),
+            3,
+            3,
+            np.array(
+                [
+                    np.nan,
+                    np.nan,
+                    1.3333333333333335,
+                    0.0,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    5.333333333333333,
+                    0.0,
+                ]
+            ),
+        ],
+    ],
+)
+def test_rolling_var_same_value_count_logic(values, window, min_periods, expected):
+    # GH 42064
+
+    sr = Series(values)
+    result_var = sr.rolling(window, min_periods=min_periods).var()
+    # 1. result should be close to correct value
+    # non-zero values can still differ slightly as the result of online algorithm
+    assert np.isclose(result_var, expected, equal_nan=True).all()
+    # 2. zeros should be exactly the same since the new algo takes effect here
+    assert (result_var[expected == 0] == 0).all()
+
+    # std should also pass as it's just a sqrt of var
+    result_std = sr.rolling(window, min_periods=min_periods).std()
+    assert np.isclose(result_std, np.sqrt(expected), equal_nan=True).all()
+    assert (result_std[expected == 0] == 0).all()