From 3c1410f56de973ea0e4932f37bb83461280b110e Mon Sep 17 00:00:00 2001
From: stahlous <n.stahl@gmail.com>
Date: Sun, 7 Sep 2014 12:59:56 -0700
Subject: [PATCH] BUG: rolling_window() properly averages weights in mean=True
 mode; removed scikits-timeseries dependency for testing; added further tests
 for rolling_window()

---
 doc/source/computation.rst         |  17 ++-
 doc/source/v0.15.0.txt             |  28 +++++
 pandas/algos.pyx                   |   6 +-
 pandas/stats/tests/test_moments.py | 191 ++++++++++++++++++++++-------
 pandas/util/print_versions.py      |   1 -
 5 files changed, 192 insertions(+), 51 deletions(-)

diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index b8559eb51ece8..56dc551268a37 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -310,7 +310,7 @@ keyword. The list of recognized types are:
 
    rolling_window(ser, 5, 'triang')
 
-Note that the ``boxcar`` window is equivalent to ``rolling_mean``:
+Note that the ``boxcar`` window is equivalent to ``rolling_mean``.
 
 .. ipython:: python
 
@@ -336,6 +336,19 @@ This keyword is available in other rolling functions as well.
 
    rolling_mean(ser, 5, center=True)
 
+.. _stats.moments.normalization
+
+.. note::
+
+    In rolling sum mode (``mean=False``) there is no normalization done to the 
+    weights. Passing custom weights of ``[1, 1, 1]`` will yield a different 
+    result than passing weights of ``[2, 2, 2]``, for example. When passing a 
+    ``win_type`` instead of explicitly specifying the weights, the weights are 
+    already normalized so that the largest weight is 1. 
+
+    In contrast, the nature of the rolling mean calculation (``mean=True``)is 
+    such that the weights are normalized with respect to each other. Weights 
+    of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result.
 
 .. _stats.moments.binary:
 
@@ -610,4 +623,4 @@ are scaled by debiasing factors
 (For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor,
 with :math:`N = t + 1`.)
 See http://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance
-for further details.
\ No newline at end of file
+for further details.
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
index f49c919e80d50..73b8b7ddbcba3 100644
--- a/doc/source/v0.15.0.txt
+++ b/doc/source/v0.15.0.txt
@@ -383,6 +383,34 @@ Rolling/Expanding Moments API changes
 
     rolling_sum(Series(range(4)), window=3, min_periods=0, center=True)
 
+- :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that 
+  the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those 
+  calculated without weighting (i.e. 'boxcar'). See :ref:`the note on normalization 
+  <stats.moments.normalization>` for further details. (:issue:`7618`)
+
+  .. ipython:: python
+
+    s = Series([10.5, 8.8, 11.4, 9.7, 9.3])
+
+  Behavior prior to 0.15.0:
+  
+  .. code-block:: python
+
+    In [39]: rolling_window(s, window=3, win_type='triang', center=True)
+    Out[39]:
+    0         NaN
+    1    6.583333
+    2    6.883333
+    3    6.683333
+    4         NaN
+    dtype: float64
+
+  New behavior
+
+  .. ipython:: python
+
+    rolling_window(s, window=3, win_type='triang', center=True)
+
 - Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`,
   :func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`,
   :func:`expanding_skew`, :func:`expanding_kurt`, :func:`expanding_quantile`, :func:`expanding_count`,
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 8f37d76e50f9c..316a282b71609 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -1897,7 +1897,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
 
 def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
                 ndarray[float64_t, ndim=1, cast=True] weights,
-                int minp, bint avg=True, bint avg_wgt=False):
+                int minp, bint avg=True):
     """
     Assume len(weights) << len(input)
     """
@@ -1915,7 +1915,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
 
     minp = _check_minp(len(weights), minp, in_n)
 
-    if avg_wgt:
+    if avg:
         for win_i from 0 <= win_i < win_n:
             val_win = weights[win_i]
             if val_win != val_win:
@@ -1956,8 +1956,6 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
             c = counts[in_i]
             if c < minp:
                 output[in_i] = NaN
-            elif avg:
-                output[in_i] /= c
 
     return output
 
diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
index 94c2521ff6938..fab25f955fa76 100644
--- a/pandas/stats/tests/test_moments.py
+++ b/pandas/stats/tests/test_moments.py
@@ -65,47 +65,40 @@ def test_rolling_mean(self):
         self._check_moment_func(mom.rolling_mean, np.mean)
 
     def test_cmov_mean(self):
+        # GH 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_mean
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
-        vals = np.random.randn(10)
-        xp = cmov_mean(vals, 5)
+        vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 
+                         16.68, 9.48, 10.63, 14.48])
+        xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
+                       12.818,  12.952, np.nan, np.nan])
 
         rs = mom.rolling_mean(vals, 5, center=True)
-        assert_almost_equal(xp.compressed(), rs[2:-2])
-        assert_almost_equal(xp.mask, np.isnan(rs))
+        assert_almost_equal(xp, rs)
 
         xp = Series(rs)
         rs = mom.rolling_mean(Series(vals), 5, center=True)
         assert_series_equal(xp, rs)
 
     def test_cmov_window(self):
+        # GH 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
-        vals = np.random.randn(10)
-        xp = cmov_window(vals, 5, 'boxcar')
+        vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 
+                         13.49, 16.68, 9.48, 10.63, 14.48])
+        xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
+                       12.818,  12.952, np.nan, np.nan])
 
         rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
-        assert_almost_equal(xp.compressed(), rs[2:-2])
-        assert_almost_equal(xp.mask, np.isnan(rs))
+        assert_almost_equal(xp, rs)
 
         xp = Series(rs)
         rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
         assert_series_equal(xp, rs)
 
     def test_cmov_window_corner(self):
+        # GH 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
         # all nan
         vals = np.empty(10, dtype=float)
@@ -125,24 +118,37 @@ def test_cmov_window_corner(self):
         self.assertEqual(len(rs), 5)
 
     def test_cmov_window_frame(self):
+        # Gh 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
+
+        vals = np.array([[ 12.18,   3.64],
+                         [ 10.18,   9.16],
+                         [ 13.24,  14.61],
+                         [  4.51,   8.11],
+                         [  6.15,  11.44],
+                         [  9.14,   6.21],
+                         [ 11.31,  10.67],
+                         [  2.94,   6.51],
+                         [  9.42,   8.39],
+                         [ 12.44,   7.34 ]])
+
+        xp = np.array([[ np.nan,  np.nan],
+                       [ np.nan,  np.nan],
+                       [  9.252,   9.392],
+                       [  8.644,   9.906],
+                       [  8.87 ,  10.208],
+                       [  6.81 ,   8.588],
+                       [  7.792,   8.644],
+                       [  9.05 ,   7.824],
+                       [ np.nan,  np.nan],
+                       [ np.nan,  np.nan]])
 
         # DataFrame
-        vals = np.random.randn(10, 2)
-        xp = cmov_window(vals, 5, 'boxcar')
         rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
         assert_frame_equal(DataFrame(xp), rs)
 
     def test_cmov_window_na_min_periods(self):
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
         # min_periods
         vals = Series(np.random.randn(10))
@@ -155,39 +161,136 @@ def test_cmov_window_na_min_periods(self):
         assert_series_equal(xp, rs)
 
     def test_cmov_window_regular(self):
+        # GH 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
         win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
                      'blackmanharris', 'nuttall', 'barthann']
+
+        vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
+                         13.49, 16.68, 9.48, 10.63, 14.48])
+        xps = {
+            'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009, 
+                        14.03687, 13.8567, 11.81473, np.nan, np.nan], 
+            'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556, 
+                       13.33889, 13.38, 12.33667, np.nan, np.nan], 
+            'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 
+                         14.3675, 14.0825, 11.5675, np.nan, np.nan], 
+            'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559, 
+                       14.17267, 14.65923, 11.10401, np.nan, np.nan], 
+            'blackmanharris': [np.nan, np.nan, 6.97691, 9.16438, 13.05052, 
+                               14.02156, 15.10512, 10.74574, np.nan, np.nan],
+            'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671, 
+                        14.03559, 15.05657, 10.78514, np.nan, np.nan], 
+            'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607, 
+                         14.20036, 14.57726, 11.16988, np.nan, np.nan], 
+            'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 
+                         14.3675, 14.0825, 11.5675, np.nan, np.nan]}
+
         for wt in win_types:
-            vals = np.random.randn(10)
-            xp = cmov_window(vals, 5, wt)
+            xp = Series(xps[wt])
+            rs = mom.rolling_window(Series(vals), 5, wt, center=True)
+            assert_series_equal(xp, rs)
+
+    def test_cmov_window_regular_linear_range(self):
+        # GH 8238
+        tm._skip_if_no_scipy()
 
+        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
+                     'blackmanharris', 'nuttall', 'barthann']
+
+        vals = np.array(range(10), dtype=np.float)
+        xp = vals.copy()
+        xp[:2] = np.nan
+        xp[-2:] = np.nan
+        xp = Series(xp)
+
+        for wt in win_types:
             rs = mom.rolling_window(Series(vals), 5, wt, center=True)
-            assert_series_equal(Series(xp), rs)
+            assert_series_equal(xp, rs)
+
+    def test_cmov_window_regular_missing_data(self):
+        # GH 8238
+        tm._skip_if_no_scipy()
+
+        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
+                     'blackmanharris', 'nuttall', 'barthann']
+
+        vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 
+                         13.49, 16.68, np.nan, 10.63, 14.48])
+        xps = {
+            'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 
+                         9.1925, 12.5575, 14.3675, 15.61667, 13.655], 
+            'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345, 
+                         9.17869, 12.79607, 14.20036, 15.8706, 13.655], 
+            'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 
+                         9.1925, 12.5575, 14.3675, 15.61667, 13.655], 
+            'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599, 
+                       9.1764, 12.83559, 14.17267, 15.90976, 13.655], 
+            'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384, 
+                        9.56348, 12.38009, 14.20565, 15.24694, 13.69758], 
+            'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618, 
+                        9.16786, 13.02671, 14.03673, 16.08759, 13.65553], 
+            'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667, 
+                       10.34667, 12.00556, 13.82125, 14.49429, 13.765], 
+            'blackmanharris': [np.nan, np.nan, 8.42526, 12.36824, 6.97691,
+                               9.16438, 13.05052, 14.02175, 16.1098, 
+                               13.65509]
+            }
+
+        for wt in win_types:
+            xp = Series(xps[wt])
+            rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3)
+            assert_series_equal(xp, rs)
 
     def test_cmov_window_special(self):
+        # GH 8238
         tm._skip_if_no_scipy()
-        try:
-            from scikits.timeseries.lib import cmov_window
-        except ImportError:
-            raise nose.SkipTest("no scikits.timeseries")
 
         win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
         kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
                 {'width': 0.5}]
 
+        vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
+                         13.49, 16.68, 9.48, 10.63, 14.48])
+
+        xps = {
+            'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763, 
+                         13.89053, 13.65671, 12.01002, np.nan, np.nan], 
+            'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589, 
+                                 11.73161, 13.08516, 12.95111, 12.74577, 
+                                 np.nan, np.nan], 
+            'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284, 
+                        12.88331, 12.96079, 12.77008, np.nan, np.nan], 
+            'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161, 
+                       12.75129, 12.90702, 12.83757, np.nan, np.nan]
+        }
+
         for wt, k in zip(win_types, kwds):
-            vals = np.random.randn(10)
-            xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
+            xp = Series(xps[wt])
 
             rs = mom.rolling_window(Series(vals), 5, wt, center=True,
                                     **k)
-            assert_series_equal(Series(xp), rs)
+            assert_series_equal(xp, rs)
+
+    def test_cmov_window_special_linear_range(self):
+        # GH 8238
+        tm._skip_if_no_scipy()
+
+        win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
+        kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
+                {'width': 0.5}]
+
+        vals = np.array(range(10), dtype=np.float)
+        xp = vals.copy()
+        xp[:2] = np.nan
+        xp[-2:] = np.nan
+        xp = Series(xp)
+
+        for wt, k in zip(win_types, kwds):
+            rs = mom.rolling_window(Series(vals), 5, wt, center=True,
+                                    **k)
+            assert_series_equal(xp, rs)
 
     def test_rolling_median(self):
         self._check_moment_func(mom.rolling_median, np.median)
diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py
index 3d793698c7caa..d3dbeef1af4d2 100644
--- a/pandas/util/print_versions.py
+++ b/pandas/util/print_versions.py
@@ -68,7 +68,6 @@ def show_versions(as_json=False):
         ("IPython", lambda mod: mod.__version__),
         ("sphinx", lambda mod: mod.__version__),
         ("patsy", lambda mod: mod.__version__),
-        ("scikits.timeseries", lambda mod: mod.__version__),
         ("dateutil", lambda mod: mod.__version__),
         ("pytz", lambda mod: mod.VERSION),
         ("bottleneck", lambda mod: mod.__version__),