diff --git a/pandas/tests/window/moments/conftest.py b/pandas/tests/window/moments/conftest.py index b192f72c8f08b..8f7c20fe03a02 100644 --- a/pandas/tests/window/moments/conftest.py +++ b/pandas/tests/window/moments/conftest.py @@ -10,41 +10,50 @@ ) -# create the data only once as we are not setting it -def _create_consistency_data(): - def create_series(): - return [ - Series(dtype=np.float64, name="a"), - Series([np.nan] * 5), - Series([1.0] * 5), - Series(range(5, 0, -1)), - Series(range(5)), - Series([np.nan, 1.0, np.nan, 1.0, 1.0]), - Series([np.nan, 1.0, np.nan, 2.0, 3.0]), - Series([np.nan, 1.0, np.nan, 3.0, 2.0]), - ] - - def create_dataframes(): - return [ - DataFrame(columns=["a", "a"]), - DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]), - ] + [DataFrame(s) for s in create_series()] - - def is_constant(x): - values = x.values.ravel("K") - return len(set(values[notna(values)])) == 1 - - def no_nans(x): - return x.notna().all().all() - +def create_series(): return [ - (x, is_constant(x), no_nans(x)) - for x in itertools.chain(create_dataframes(), create_dataframes()) + Series(dtype=np.float64, name="a"), + Series([np.nan] * 5), + Series([1.0] * 5), + Series(range(5, 0, -1)), + Series(range(5)), + Series([np.nan, 1.0, np.nan, 1.0, 1.0]), + Series([np.nan, 1.0, np.nan, 2.0, 3.0]), + Series([np.nan, 1.0, np.nan, 3.0, 2.0]), ] -@pytest.fixture(params=_create_consistency_data()) -def consistency_data(request): +def create_dataframes(): + return [ + DataFrame(columns=["a", "a"]), + DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]), + ] + [DataFrame(s) for s in create_series()] + + +def is_constant(x): + values = x.values.ravel("K") + return len(set(values[notna(values)])) == 1 + + +@pytest.fixture( + params=( + obj + for obj in itertools.chain(create_series(), create_dataframes()) + if is_constant(obj) + ), + scope="module", +) +def consistent_data(request): + return request.param + + +@pytest.fixture(params=create_series()) +def series_data(request): + return request.param + + +@pytest.fixture(params=itertools.chain(create_series(), create_dataframes())) +def all_data(request): """ Test: - Empty Series / DataFrame diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 8feec32ba99c5..f9f09bffb14b1 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -30,7 +30,7 @@ def create_mock_weights(obj, com, adjust, ignore_na): def create_mock_series_weights(s, com, adjust, ignore_na): - w = Series(np.nan, index=s.index) + w = Series(np.nan, index=s.index, name=s.name) alpha = 1.0 / (1.0 + com) if adjust: count = 0 @@ -58,63 +58,66 @@ def create_mock_series_weights(s, com, adjust, ignore_na): return w -def test_ewm_consistency_mean(consistency_data, adjust, ignore_na, min_periods): - x, is_constant, no_nans = consistency_data +def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods): com = 3.0 - result = x.ewm( + result = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).mean() - weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) expected = ( - x.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method="ffill") + all_data.multiply(weights) + .cumsum() + .divide(weights.cumsum()) + .fillna(method="ffill") ) expected[ - x.expanding().count() < (max(min_periods, 1) if min_periods else 1) + all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1) ] = np.nan tm.assert_equal(result, expected.astype("float64")) -def test_ewm_consistency_consistent(consistency_data, adjust, ignore_na, min_periods): - x, is_constant, no_nans = consistency_data +def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods): com = 3.0 - if is_constant: - count_x = x.expanding().count() - mean_x = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean() - # check that correlation of a series with itself is either 1 or NaN - corr_x_x = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(x) - exp = x.max() if isinstance(x, Series) else x.max().max() + count_x = consistent_data.expanding().count() + mean_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(consistent_data) + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) - # check mean of constant series - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = exp - tm.assert_equal(mean_x, expected) + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) - # check correlation of constant series with itself is NaN - expected[:] = np.nan - tm.assert_equal(corr_x_x, expected) + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) def test_ewm_consistency_var_debiasing_factors( - consistency_data, adjust, ignore_na, min_periods + all_data, adjust, ignore_na, min_periods ): - x, is_constant, no_nans = consistency_data com = 3.0 # check variance debiasing factors - var_unbiased_x = x.ewm( + var_unbiased_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=False) - var_biased_x = x.ewm( + var_biased_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=True) - weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) cum_sum = weights.cumsum().fillna(method="ffill") cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") numerator = cum_sum * cum_sum @@ -126,16 +129,13 @@ def test_ewm_consistency_var_debiasing_factors( @pytest.mark.parametrize("bias", [True, False]) -def test_moments_consistency_var( - consistency_data, adjust, ignore_na, min_periods, bias -): - x, is_constant, no_nans = consistency_data +def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias): com = 3.0 - mean_x = x.ewm( + mean_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).mean() - var_x = x.ewm( + var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) assert not (var_x < 0).any().any() @@ -143,7 +143,7 @@ def test_moments_consistency_var( if bias: # check that biased var(x) == mean(x^2) - mean(x)^2 mean_x2 = ( - (x * x) + (all_data * all_data) .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) .mean() ) @@ -152,35 +152,32 @@ def test_moments_consistency_var( @pytest.mark.parametrize("bias", [True, False]) def test_moments_consistency_var_constant( - consistency_data, adjust, ignore_na, min_periods, bias + consistent_data, adjust, ignore_na, min_periods, bias ): - x, is_constant, no_nans = consistency_data com = 3.0 - if is_constant: - count_x = x.expanding(min_periods=min_periods).count() - var_x = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=bias) + count_x = consistent_data.expanding(min_periods=min_periods).count() + var_x = consistent_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) - # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = 0.0 - if not bias: - expected[count_x < 2] = np.nan - tm.assert_equal(var_x, expected) + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if not bias: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) @pytest.mark.parametrize("bias", [True, False]) -def test_ewm_consistency_std(consistency_data, adjust, ignore_na, min_periods, bias): - x, is_constant, no_nans = consistency_data +def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): com = 3.0 - var_x = x.ewm( + var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) assert not (var_x < 0).any().any() - std_x = x.ewm( + std_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).std(bias=bias) assert not (std_x < 0).any().any() @@ -188,9 +185,9 @@ def test_ewm_consistency_std(consistency_data, adjust, ignore_na, min_periods, b # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) - cov_x_x = x.ewm( + cov_x_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(x, bias=bias) + ).cov(all_data, bias=bias) assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) @@ -199,57 +196,53 @@ def test_ewm_consistency_std(consistency_data, adjust, ignore_na, min_periods, b @pytest.mark.parametrize("bias", [True, False]) def test_ewm_consistency_series_cov_corr( - consistency_data, adjust, ignore_na, min_periods, bias + series_data, adjust, ignore_na, min_periods, bias ): - x, is_constant, no_nans = consistency_data com = 3.0 - if isinstance(x, Series): - var_x_plus_y = ( - (x + x) - .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) - .var(bias=bias) - ) - var_x = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=bias) - var_y = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=bias) - cov_x_y = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(x, bias=bias) - # check that cov(x, y) == (var(x+y) - var(x) - - # var(y)) / 2 - tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) - - # check that corr(x, y) == cov(x, y) / (std(x) * - # std(y)) - corr_x_y = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(x, bias=bias) - std_x = x.ewm( + var_x_plus_y = ( + (series_data + series_data) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .var(bias=bias) + ) + var_x = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + var_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + cov_x_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(series_data, bias=bias) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(series_data, bias=bias) + std_x = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + std_y = series_data.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if bias: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=bias) - std_y = x.ewm( + ).mean() + mean_y = series_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=bias) - tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - - if bias: - # check that biased cov(x, y) == mean(x*y) - - # mean(x)*mean(y) - mean_x = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean() - mean_y = x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean() - mean_x_times_y = ( - (x * x) - .ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ) - .mean() - ) - tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + ).mean() + mean_x_times_y = ( + (series_data * series_data) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 14314f80f152c..dafc60a057c0f 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -5,67 +5,68 @@ import pandas._testing as tm +def no_nans(x): + return x.notna().all().all() + + +def all_na(x): + return x.isnull().all().all() + + @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) -def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f): - x, is_constant, no_nans = consistency_data - - if f is np.nansum and min_periods == 0: - pass - elif f is np.sum and not no_nans: - pass - else: - expanding_f_result = x.expanding(min_periods=min_periods).sum() - expanding_apply_f_result = x.expanding(min_periods=min_periods).apply( - func=f, raw=True - ) - tm.assert_equal(expanding_f_result, expanding_apply_f_result) +def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f): + if f is np.sum: + if not no_nans(all_data) and not ( + all_na(all_data) and not all_data.empty and min_periods > 0 + ): + request.node.add_marker( + pytest.mark.xfail(reason="np.sum has different behavior with NaNs") + ) + expanding_f_result = all_data.expanding(min_periods=min_periods).sum() + expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply( + func=f, raw=True + ) + tm.assert_equal(expanding_f_result, expanding_apply_f_result) @pytest.mark.parametrize("ddof", [0, 1]) -def test_moments_consistency_var(consistency_data, min_periods, ddof): - x, is_constant, no_nans = consistency_data - - var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) +def test_moments_consistency_var(all_data, min_periods, ddof): + var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 - mean_x2 = (x * x).expanding(min_periods=min_periods).mean() - mean_x = x.expanding(min_periods=min_periods).mean() + mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean() + mean_x = all_data.expanding(min_periods=min_periods).mean() tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) @pytest.mark.parametrize("ddof", [0, 1]) -def test_moments_consistency_var_constant(consistency_data, min_periods, ddof): - x, is_constant, no_nans = consistency_data +def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): + count_x = consistent_data.expanding(min_periods=min_periods).count() + var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof) - if is_constant: - count_x = x.expanding(min_periods=min_periods).count() - var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) - - # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = 0.0 - if ddof == 1: - expected[count_x < 2] = np.nan - tm.assert_equal(var_x, expected) + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if ddof == 1: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) @pytest.mark.parametrize("ddof", [0, 1]) -def test_expanding_consistency_var_std_cov(consistency_data, min_periods, ddof): - x, is_constant, no_nans = consistency_data - - var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) +def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof): + var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) assert not (var_x < 0).any().any() - std_x = x.expanding(min_periods=min_periods).std(ddof=ddof) + std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof) assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) - cov_x_x = x.expanding(min_periods=min_periods).cov(x, ddof=ddof) + cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof) assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) @@ -73,73 +74,71 @@ def test_expanding_consistency_var_std_cov(consistency_data, min_periods, ddof): @pytest.mark.parametrize("ddof", [0, 1]) -def test_expanding_consistency_series_cov_corr(consistency_data, min_periods, ddof): - x, is_constant, no_nans = consistency_data - - if isinstance(x, Series): - var_x_plus_y = (x + x).expanding(min_periods=min_periods).var(ddof=ddof) - var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) - var_y = x.expanding(min_periods=min_periods).var(ddof=ddof) - cov_x_y = x.expanding(min_periods=min_periods).cov(x, ddof=ddof) - # check that cov(x, y) == (var(x+y) - var(x) - - # var(y)) / 2 - tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) - - # check that corr(x, y) == cov(x, y) / (std(x) * - # std(y)) - corr_x_y = x.expanding(min_periods=min_periods).corr(x) - std_x = x.expanding(min_periods=min_periods).std(ddof=ddof) - std_y = x.expanding(min_periods=min_periods).std(ddof=ddof) - tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - - if ddof == 0: - # check that biased cov(x, y) == mean(x*y) - - # mean(x)*mean(y) - mean_x = x.expanding(min_periods=min_periods).mean() - mean_y = x.expanding(min_periods=min_periods).mean() - mean_x_times_y = (x * x).expanding(min_periods=min_periods).mean() - tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) - - -def test_expanding_consistency_mean(consistency_data, min_periods): - x, is_constant, no_nans = consistency_data - - result = x.expanding(min_periods=min_periods).mean() - expected = ( - x.expanding(min_periods=min_periods).sum() - / x.expanding(min_periods=min_periods).count() +def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof): + var_x_plus_y = ( + (series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof) ) - tm.assert_equal(result, expected.astype("float64")) + var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof) + var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof) + cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data) + std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof) + std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + if ddof == 0: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.expanding(min_periods=min_periods).mean() + mean_y = series_data.expanding(min_periods=min_periods).mean() + mean_x_times_y = ( + (series_data * series_data).expanding(min_periods=min_periods).mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) -def test_expanding_consistency_constant(consistency_data, min_periods): - x, is_constant, no_nans = consistency_data - if is_constant: - count_x = x.expanding().count() - mean_x = x.expanding(min_periods=min_periods).mean() - # check that correlation of a series with itself is either 1 or NaN - corr_x_x = x.expanding(min_periods=min_periods).corr(x) +def test_expanding_consistency_mean(all_data, min_periods): + result = all_data.expanding(min_periods=min_periods).mean() + expected = ( + all_data.expanding(min_periods=min_periods).sum() + / all_data.expanding(min_periods=min_periods).count() + ) + tm.assert_equal(result, expected.astype("float64")) - exp = x.max() if isinstance(x, Series) else x.max().max() - # check mean of constant series - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = exp - tm.assert_equal(mean_x, expected) +def test_expanding_consistency_constant(consistent_data, min_periods): + count_x = consistent_data.expanding().count() + mean_x = consistent_data.expanding(min_periods=min_periods).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data) + + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) - # check correlation of constant series with itself is NaN - expected[:] = np.nan - tm.assert_equal(corr_x_x, expected) + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) -def test_expanding_consistency_var_debiasing_factors(consistency_data, min_periods): - x, is_constant, no_nans = consistency_data +def test_expanding_consistency_var_debiasing_factors(all_data, min_periods): # check variance debiasing factors - var_unbiased_x = x.expanding(min_periods=min_periods).var() - var_biased_x = x.expanding(min_periods=min_periods).var(ddof=0) - var_debiasing_factors_x = x.expanding().count() / ( - x.expanding().count() - 1.0 + var_unbiased_x = all_data.expanding(min_periods=min_periods).var() + var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0) + var_debiasing_factors_x = all_data.expanding().count() / ( + all_data.expanding().count() - 1.0 ).replace(0.0, np.nan) tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 49bc5af4e9d69..daca19b0993bf 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -5,44 +5,52 @@ import pandas._testing as tm +def no_nans(x): + return x.notna().all().all() + + +def all_na(x): + return x.isnull().all().all() + + @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) def test_rolling_apply_consistency_sum( - consistency_data, rolling_consistency_cases, center, f + request, all_data, rolling_consistency_cases, center, f ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases - if f is np.nansum and min_periods == 0: - pass - elif f is np.sum and not no_nans: - pass - else: - rolling_f_result = x.rolling( - window=window, min_periods=min_periods, center=center - ).sum() - rolling_apply_f_result = x.rolling( - window=window, min_periods=min_periods, center=center - ).apply(func=f, raw=True) - tm.assert_equal(rolling_f_result, rolling_apply_f_result) + if f is np.sum: + if not no_nans(all_data) and not ( + all_na(all_data) and not all_data.empty and min_periods > 0 + ): + request.node.add_marker( + pytest.mark.xfail(reason="np.sum has different behavior with NaNs") + ) + rolling_f_result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).sum() + rolling_apply_f_result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).apply(func=f, raw=True) + tm.assert_equal(rolling_f_result, rolling_apply_f_result) @pytest.mark.parametrize("ddof", [0, 1]) -def test_moments_consistency_var( - consistency_data, rolling_consistency_cases, center, ddof -): - x, is_constant, no_nans = consistency_data +def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof): window, min_periods = rolling_consistency_cases - var_x = x.rolling(window=window, min_periods=min_periods, center=center).var( + var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 - mean_x = x.rolling(window=window, min_periods=min_periods, center=center).mean() + mean_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() mean_x2 = ( - (x * x) + (all_data * all_data) .rolling(window=window, min_periods=min_periods, center=center) .mean() ) @@ -51,41 +59,38 @@ def test_moments_consistency_var( @pytest.mark.parametrize("ddof", [0, 1]) def test_moments_consistency_var_constant( - consistency_data, rolling_consistency_cases, center, ddof + consistent_data, rolling_consistency_cases, center, ddof ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases - if is_constant: - count_x = x.rolling( - window=window, min_periods=min_periods, center=center - ).count() - var_x = x.rolling(window=window, min_periods=min_periods, center=center).var( - ddof=ddof - ) + count_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + var_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) - # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = 0.0 - if ddof == 1: - expected[count_x < 2] = np.nan - tm.assert_equal(var_x, expected) + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if ddof == 1: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) @pytest.mark.parametrize("ddof", [0, 1]) def test_rolling_consistency_var_std_cov( - consistency_data, rolling_consistency_cases, center, ddof + all_data, rolling_consistency_cases, center, ddof ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases - var_x = x.rolling(window=window, min_periods=min_periods, center=center).var( + var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) assert not (var_x < 0).any().any() - std_x = x.rolling(window=window, min_periods=min_periods, center=center).std( + std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std( ddof=ddof ) assert not (std_x < 0).any().any() @@ -93,9 +98,9 @@ def test_rolling_consistency_var_std_cov( # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) - cov_x_x = x.rolling(window=window, min_periods=min_periods, center=center).cov( - x, ddof=ddof - ) + cov_x_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).cov(all_data, ddof=ddof) assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) @@ -104,122 +109,128 @@ def test_rolling_consistency_var_std_cov( @pytest.mark.parametrize("ddof", [0, 1]) def test_rolling_consistency_series_cov_corr( - consistency_data, rolling_consistency_cases, center, ddof + series_data, rolling_consistency_cases, center, ddof ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases - if isinstance(x, Series): - var_x_plus_y = ( - (x + x) - .rolling(window=window, min_periods=min_periods, center=center) - .var(ddof=ddof) - ) - var_x = x.rolling(window=window, min_periods=min_periods, center=center).var( - ddof=ddof - ) - var_y = x.rolling(window=window, min_periods=min_periods, center=center).var( - ddof=ddof - ) - cov_x_y = x.rolling(window=window, min_periods=min_periods, center=center).cov( - x, ddof=ddof - ) - # check that cov(x, y) == (var(x+y) - var(x) - - # var(y)) / 2 - tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + var_x_plus_y = ( + (series_data + series_data) + .rolling(window=window, min_periods=min_periods, center=center) + .var(ddof=ddof) + ) + var_x = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) + var_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=ddof) + cov_x_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).cov(series_data, ddof=ddof) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).corr(series_data) + std_x = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).std(ddof=ddof) + std_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).std(ddof=ddof) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - # check that corr(x, y) == cov(x, y) / (std(x) * - # std(y)) - corr_x_y = x.rolling( + if ddof == 0: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = series_data.rolling( window=window, min_periods=min_periods, center=center - ).corr(x) - std_x = x.rolling(window=window, min_periods=min_periods, center=center).std( - ddof=ddof - ) - std_y = x.rolling(window=window, min_periods=min_periods, center=center).std( - ddof=ddof + ).mean() + mean_y = series_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + mean_x_times_y = ( + (series_data * series_data) + .rolling(window=window, min_periods=min_periods, center=center) + .mean() ) - tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - - if ddof == 0: - # check that biased cov(x, y) == mean(x*y) - - # mean(x)*mean(y) - mean_x = x.rolling( - window=window, min_periods=min_periods, center=center - ).mean() - mean_y = x.rolling( - window=window, min_periods=min_periods, center=center - ).mean() - mean_x_times_y = ( - (x * x) - .rolling(window=window, min_periods=min_periods, center=center) - .mean() - ) - tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) -def test_rolling_consistency_mean(consistency_data, rolling_consistency_cases, center): - x, is_constant, no_nans = consistency_data +def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center): window, min_periods = rolling_consistency_cases - result = x.rolling(window=window, min_periods=min_periods, center=center).mean() + result = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() expected = ( - x.rolling(window=window, min_periods=min_periods, center=center) + all_data.rolling(window=window, min_periods=min_periods, center=center) .sum() .divide( - x.rolling(window=window, min_periods=min_periods, center=center).count() + all_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() ) ) tm.assert_equal(result, expected.astype("float64")) def test_rolling_consistency_constant( - consistency_data, rolling_consistency_cases, center + consistent_data, rolling_consistency_cases, center ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases - if is_constant: - count_x = x.rolling( - window=window, min_periods=min_periods, center=center - ).count() - mean_x = x.rolling(window=window, min_periods=min_periods, center=center).mean() - # check that correlation of a series with itself is either 1 or NaN - corr_x_x = x.rolling( - window=window, min_periods=min_periods, center=center - ).corr(x) + count_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() + mean_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = consistent_data.rolling( + window=window, min_periods=min_periods, center=center + ).corr(consistent_data) - exp = x.max() if isinstance(x, Series) else x.max().max() + exp = ( + consistent_data.max() + if isinstance(consistent_data, Series) + else consistent_data.max().max() + ) - # check mean of constant series - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = exp - tm.assert_equal(mean_x, expected) + # check mean of constant series + expected = consistent_data * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) - # check correlation of constant series with itself is NaN - expected[:] = np.nan - tm.assert_equal(corr_x_x, expected) + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) def test_rolling_consistency_var_debiasing_factors( - consistency_data, rolling_consistency_cases, center + all_data, rolling_consistency_cases, center ): - x, is_constant, no_nans = consistency_data window, min_periods = rolling_consistency_cases # check variance debiasing factors - var_unbiased_x = x.rolling( + var_unbiased_x = all_data.rolling( window=window, min_periods=min_periods, center=center ).var() - var_biased_x = x.rolling(window=window, min_periods=min_periods, center=center).var( - ddof=0 - ) + var_biased_x = all_data.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=0) var_debiasing_factors_x = ( - x.rolling(window=window, min_periods=min_periods, center=center) + all_data.rolling(window=window, min_periods=min_periods, center=center) .count() .divide( ( - x.rolling(window=window, min_periods=min_periods, center=center).count() + all_data.rolling( + window=window, min_periods=min_periods, center=center + ).count() - 1.0 ).replace(0.0, np.nan) )