Skip to content

BUG: normalize rolling_window() weights, remove scikits-timeseries dependency for testing #8238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 23, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions doc/source/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ keyword. The list of recognized types are:

rolling_window(ser, 5, 'triang')

Note that the ``boxcar`` window is equivalent to ``rolling_mean``:
Note that the ``boxcar`` window is equivalent to ``rolling_mean``.

.. ipython:: python

Expand All @@ -336,6 +336,19 @@ This keyword is available in other rolling functions as well.

rolling_mean(ser, 5, center=True)

.. _stats.moments.normalization

.. note::

In rolling sum mode (``mean=False``) there is no normalization done to the
weights. Passing custom weights of ``[1, 1, 1]`` will yield a different
result than passing weights of ``[2, 2, 2]``, for example. When passing a
``win_type`` instead of explicitly specifying the weights, the weights are
already normalized so that the largest weight is 1.

In contrast, the nature of the rolling mean calculation (``mean=True``)is
such that the weights are normalized with respect to each other. Weights
of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result.

.. _stats.moments.binary:

Expand Down Expand Up @@ -610,4 +623,4 @@ are scaled by debiasing factors
(For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor,
with :math:`N = t + 1`.)
See http://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance
for further details.
for further details.
28 changes: 28 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,34 @@ Rolling/Expanding Moments API changes

rolling_sum(Series(range(4)), window=3, min_periods=0, center=True)

- :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that
the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those
calculated without weighting (i.e. 'boxcar'). See :ref:`the note on normalization
<stats.moments.normalization>` for further details. (:issue:`7618`)

.. ipython:: python

s = Series([10.5, 8.8, 11.4, 9.7, 9.3])

Behavior prior to 0.15.0:

.. code-block:: python

In [39]: rolling_window(s, window=3, win_type='triang', center=True)
Out[39]:
0 NaN
1 6.583333
2 6.883333
3 6.683333
4 NaN
dtype: float64

New behavior

.. ipython:: python

rolling_window(s, window=3, win_type='triang', center=True)

- Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`,
:func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`,
:func:`expanding_skew`, :func:`expanding_kurt`, :func:`expanding_quantile`, :func:`expanding_count`,
Expand Down
6 changes: 2 additions & 4 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1897,7 +1897,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,

def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
ndarray[float64_t, ndim=1, cast=True] weights,
int minp, bint avg=True, bint avg_wgt=False):
int minp, bint avg=True):
"""
Assume len(weights) << len(input)
"""
Expand All @@ -1915,7 +1915,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,

minp = _check_minp(len(weights), minp, in_n)

if avg_wgt:
if avg:
for win_i from 0 <= win_i < win_n:
val_win = weights[win_i]
if val_win != val_win:
Expand Down Expand Up @@ -1956,8 +1956,6 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
c = counts[in_i]
if c < minp:
output[in_i] = NaN
elif avg:
output[in_i] /= c

return output

Expand Down
191 changes: 147 additions & 44 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,47 +65,40 @@ def test_rolling_mean(self):
self._check_moment_func(mom.rolling_mean, np.mean)

def test_cmov_mean(self):
# GH 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_mean
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

vals = np.random.randn(10)
xp = cmov_mean(vals, 5)
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49,
16.68, 9.48, 10.63, 14.48])
xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
12.818, 12.952, np.nan, np.nan])

rs = mom.rolling_mean(vals, 5, center=True)
assert_almost_equal(xp.compressed(), rs[2:-2])
assert_almost_equal(xp.mask, np.isnan(rs))
assert_almost_equal(xp, rs)

xp = Series(rs)
rs = mom.rolling_mean(Series(vals), 5, center=True)
assert_series_equal(xp, rs)

def test_cmov_window(self):
# GH 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

vals = np.random.randn(10)
xp = cmov_window(vals, 5, 'boxcar')
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
13.49, 16.68, 9.48, 10.63, 14.48])
xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
12.818, 12.952, np.nan, np.nan])

rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
assert_almost_equal(xp.compressed(), rs[2:-2])
assert_almost_equal(xp.mask, np.isnan(rs))
assert_almost_equal(xp, rs)

xp = Series(rs)
rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
assert_series_equal(xp, rs)

def test_cmov_window_corner(self):
# GH 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

# all nan
vals = np.empty(10, dtype=float)
Expand All @@ -125,24 +118,37 @@ def test_cmov_window_corner(self):
self.assertEqual(len(rs), 5)

def test_cmov_window_frame(self):
# Gh 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

vals = np.array([[ 12.18, 3.64],
[ 10.18, 9.16],
[ 13.24, 14.61],
[ 4.51, 8.11],
[ 6.15, 11.44],
[ 9.14, 6.21],
[ 11.31, 10.67],
[ 2.94, 6.51],
[ 9.42, 8.39],
[ 12.44, 7.34 ]])

xp = np.array([[ np.nan, np.nan],
[ np.nan, np.nan],
[ 9.252, 9.392],
[ 8.644, 9.906],
[ 8.87 , 10.208],
[ 6.81 , 8.588],
[ 7.792, 8.644],
[ 9.05 , 7.824],
[ np.nan, np.nan],
[ np.nan, np.nan]])

# DataFrame
vals = np.random.randn(10, 2)
xp = cmov_window(vals, 5, 'boxcar')
rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
assert_frame_equal(DataFrame(xp), rs)

def test_cmov_window_na_min_periods(self):
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

# min_periods
vals = Series(np.random.randn(10))
Expand All @@ -155,39 +161,136 @@ def test_cmov_window_na_min_periods(self):
assert_series_equal(xp, rs)

def test_cmov_window_regular(self):
# GH 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
'blackmanharris', 'nuttall', 'barthann']

vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
13.49, 16.68, 9.48, 10.63, 14.48])
xps = {
'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009,
14.03687, 13.8567, 11.81473, np.nan, np.nan],
'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556,
13.33889, 13.38, 12.33667, np.nan, np.nan],
'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575,
14.3675, 14.0825, 11.5675, np.nan, np.nan],
'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559,
14.17267, 14.65923, 11.10401, np.nan, np.nan],
'blackmanharris': [np.nan, np.nan, 6.97691, 9.16438, 13.05052,
14.02156, 15.10512, 10.74574, np.nan, np.nan],
'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671,
14.03559, 15.05657, 10.78514, np.nan, np.nan],
'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607,
14.20036, 14.57726, 11.16988, np.nan, np.nan],
'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575,
14.3675, 14.0825, 11.5675, np.nan, np.nan]}

for wt in win_types:
vals = np.random.randn(10)
xp = cmov_window(vals, 5, wt)
xp = Series(xps[wt])
rs = mom.rolling_window(Series(vals), 5, wt, center=True)
assert_series_equal(xp, rs)

def test_cmov_window_regular_linear_range(self):
# GH 8238
tm._skip_if_no_scipy()

win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
'blackmanharris', 'nuttall', 'barthann']

vals = np.array(range(10), dtype=np.float)
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
xp = Series(xp)

for wt in win_types:
rs = mom.rolling_window(Series(vals), 5, wt, center=True)
assert_series_equal(Series(xp), rs)
assert_series_equal(xp, rs)

def test_cmov_window_regular_missing_data(self):
# GH 8238
tm._skip_if_no_scipy()

win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
'blackmanharris', 'nuttall', 'barthann']

vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
13.49, 16.68, np.nan, 10.63, 14.48])
xps = {
'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425,
9.1925, 12.5575, 14.3675, 15.61667, 13.655],
'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345,
9.17869, 12.79607, 14.20036, 15.8706, 13.655],
'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425,
9.1925, 12.5575, 14.3675, 15.61667, 13.655],
'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599,
9.1764, 12.83559, 14.17267, 15.90976, 13.655],
'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384,
9.56348, 12.38009, 14.20565, 15.24694, 13.69758],
'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618,
9.16786, 13.02671, 14.03673, 16.08759, 13.65553],
'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667,
10.34667, 12.00556, 13.82125, 14.49429, 13.765],
'blackmanharris': [np.nan, np.nan, 8.42526, 12.36824, 6.97691,
9.16438, 13.05052, 14.02175, 16.1098,
13.65509]
}

for wt in win_types:
xp = Series(xps[wt])
rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3)
assert_series_equal(xp, rs)

def test_cmov_window_special(self):
# GH 8238
tm._skip_if_no_scipy()
try:
from scikits.timeseries.lib import cmov_window
except ImportError:
raise nose.SkipTest("no scikits.timeseries")

win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
{'width': 0.5}]

vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
13.49, 16.68, 9.48, 10.63, 14.48])

xps = {
'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763,
13.89053, 13.65671, 12.01002, np.nan, np.nan],
'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589,
11.73161, 13.08516, 12.95111, 12.74577,
np.nan, np.nan],
'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284,
12.88331, 12.96079, 12.77008, np.nan, np.nan],
'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161,
12.75129, 12.90702, 12.83757, np.nan, np.nan]
}

for wt, k in zip(win_types, kwds):
vals = np.random.randn(10)
xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
xp = Series(xps[wt])

rs = mom.rolling_window(Series(vals), 5, wt, center=True,
**k)
assert_series_equal(Series(xp), rs)
assert_series_equal(xp, rs)

def test_cmov_window_special_linear_range(self):
# GH 8238
tm._skip_if_no_scipy()

win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
{'width': 0.5}]

vals = np.array(range(10), dtype=np.float)
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
xp = Series(xp)

for wt, k in zip(win_types, kwds):
rs = mom.rolling_window(Series(vals), 5, wt, center=True,
**k)
assert_series_equal(xp, rs)

def test_rolling_median(self):
self._check_moment_func(mom.rolling_median, np.median)
Expand Down
1 change: 0 additions & 1 deletion pandas/util/print_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def show_versions(as_json=False):
("IPython", lambda mod: mod.__version__),
("sphinx", lambda mod: mod.__version__),
("patsy", lambda mod: mod.__version__),
("scikits.timeseries", lambda mod: mod.__version__),
("dateutil", lambda mod: mod.__version__),
("pytz", lambda mod: mod.VERSION),
("bottleneck", lambda mod: mod.__version__),
Expand Down