From a0100e69ab134af3cbc3805aca529d4040804fd6 Mon Sep 17 00:00:00 2001
From: jaimefrio <jaime.frio@gmail.com>
Date: Mon, 18 Aug 2014 11:20:00 -0700
Subject: [PATCH] WIP: Experimental changes in `rolling_var` related to #7900

Added logic to `rolling_var` to detect windows where all non-NaN
values are identical.

Need to assess both correctness and performance impact.
---
 pandas/algos.pyx | 79 ++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 1c1d32e1d2a20..22d6a5ba9af7b 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -1160,75 +1160,68 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
     """
     Numerically stable implementation using Welford's method.
     """
-    cdef double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
-    cdef Py_ssize_t i
+    cdef double val, prev, mean_x = 0, ssqdm_x = 0, delta, rep = NaN
+    cdef Py_ssize_t nobs = 0, nrep = 0, i
     cdef Py_ssize_t N = len(input)
 
     cdef ndarray[double_t] output = np.empty(N, dtype=float)
 
     minp = _check_minp(win, minp, N)
 
-    # Check for windows larger than array, addresses #7297
-    win = min(win, N)
-
-    # Over the first window, observations can only be added, never removed
-    for i from 0 <= i < win:
+    for i from 0 <= i < N:
         val = input[i]
+        prev = NaN if i < win else input[i - win]
+
+        # First, count the number of observations and consecutive repeats
+        if prev == prev:
+            # prev is not NaN, removing an observation...
+            if nobs == nrep:
+                # ...and removing a repeat
+                nrep -= 1
+                if nrep == 0:
+                    rep = NaN
+            nobs -= 1
 
-        # Not NaN
         if val == val:
-            nobs += 1
-            delta = (val - mean_x)
-            mean_x += delta / nobs
-            ssqdm_x += delta * (val - mean_x)
-
-        if nobs >= minp:
-            #pathological case
-            if nobs == 1:
-                val = 0
+            # next is not NaN, adding an observation...
+            if val == prev:
+                # ...and adding a repeat
+                nrep += 1
             else:
-                val = ssqdm_x / (nobs - ddof)
-                if val < 0:
-                    val = 0
-        else:
-            val = NaN
-
-        output[i] = val
-
-    # After the first window, observations can both be added and removed
-    for i from win <= i < N:
-        val = input[i]
-        prev = input[i - win]
+                # ...and resetting repeats
+                nrep = 1
+                rep = val
+            nobs += 1
 
-        if val == val:
+        # Then, compute the new mean and sum of squared differences
+        if nobs == nrep:
+            # All non-NaN values in window are identical...
+            ssqdm_x = 0
+            mean_x = rep if nobs > 0 else 0
+        elif val == val:
+            # Adding one observation...
             if prev == prev:
-                # Adding one observation and removing another one
+                # ...and removing another
                 delta = val - prev
                 prev -= mean_x
                 mean_x += delta / nobs
                 val -= mean_x
                 ssqdm_x += (val + prev) * delta
             else:
-                # Adding one observation and not removing any
-                nobs += 1
+                # ...and not removing any
                 delta = (val - mean_x)
                 mean_x += delta / nobs
                 ssqdm_x += delta * (val - mean_x)
         elif prev == prev:
             # Adding no new observation, but removing one
-            nobs -= 1
-            if nobs:
-                delta = (prev - mean_x)
-                mean_x -= delta  / nobs
-                ssqdm_x -= delta * (prev - mean_x)
-            else:
-                mean_x = 0
-                ssqdm_x = 0
+            delta = (prev - mean_x)
+            mean_x -= delta  / nobs
+            ssqdm_x -= delta * (prev - mean_x)
         # Variance is unchanged if no observation is added or removed
 
+        # Finally, compute and write the rolling variance to the output array
         if nobs >= minp:
-            #pathological case
-            if nobs == 1:
+            if nobs <= ddof:
                 val = 0
             else:
                 val = ssqdm_x / (nobs - ddof)