From ea8d831130418238c8af38cb746611baef1b9117 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 21 Nov 2019 01:15:19 +0000 Subject: [PATCH 1/7] add tests for period/datetime resample sum with min_count --- pandas/tests/resample/test_datetime_index.py | 14 ++++++++++++++ pandas/tests/resample/test_period_index.py | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index bcd7081d5b1a5..4c9e3aee60411 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -692,6 +692,20 @@ def test_resample_timestamp_to_period(simple_date_range_series): tm.assert_series_equal(result, expected) +def test_datetime_resample_sum_min_count(): + # GH 19974 + index = date_range(start="2018", freq="M", periods=6) + data = np.ones(6) + data[3:6] = np.nan + datetime = Series(data, index) + result = datetime.resample("Q").sum(min_count=1) + + index = date_range("2018-03-31", "2018-06-30", freq="Q-DEC") + expected = Series([3, np.nan], index) + + tm.assert_series_equal(result, expected) + + def test_ohlc_5min(): def _ohlc(group): if isna(group).all(): diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 0ed0bf18a82ee..c7c051cc03f07 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -548,6 +548,19 @@ def test_quarterly_resampling(self): exp = ts.to_timestamp().resample("A").mean().to_period() tm.assert_series_equal(result, exp) + def test_period_resample_sum_min_count(self): + # GH 19974 + index = date_range(start="2018", freq="M", periods=6) + data = np.ones(6) + data[3:6] = np.nan + period = Series(data, index).to_period() + result = period.resample("Q").sum(min_count=1) + + index = PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC") + expected = Series([3, np.nan], index) + + tm.assert_series_equal(result, expected) + def test_resample_weekly_bug_1726(self): # 8/6/12 is a Monday ind = date_range(start="8/6/2012", end="8/26/2012", freq="D") From 0784c2128e86f2c474288dfcf842bb3f6a020214 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 23 Nov 2019 01:27:39 +0000 Subject: [PATCH 2/7] remove datetime test --- pandas/tests/resample/test_datetime_index.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 4c9e3aee60411..bcd7081d5b1a5 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -692,20 +692,6 @@ def test_resample_timestamp_to_period(simple_date_range_series): tm.assert_series_equal(result, expected) -def test_datetime_resample_sum_min_count(): - # GH 19974 - index = date_range(start="2018", freq="M", periods=6) - data = np.ones(6) - data[3:6] = np.nan - datetime = Series(data, index) - result = datetime.resample("Q").sum(min_count=1) - - index = date_range("2018-03-31", "2018-06-30", freq="Q-DEC") - expected = Series([3, np.nan], index) - - tm.assert_series_equal(result, expected) - - def test_ohlc_5min(): def _ohlc(group): if isna(group).all(): From c9e68303b22e3bc41399ff8e033da6c53d4bf796 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 29 Nov 2019 20:06:54 +0000 Subject: [PATCH 3/7] move and generalise test to include DTI, PI and TDI --- pandas/tests/resample/test_base.py | 78 +++++++++++++++++++++- pandas/tests/resample/test_period_index.py | 13 ---- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 02203f476af8e..f95bd55fec8e7 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -7,7 +7,7 @@ from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError from pandas.core.groupby.grouper import Grouper -from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.period import PeriodIndex, period_range from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range import pandas.util.testing as tm @@ -267,3 +267,79 @@ def test_resample_quantile(series): result = s.resample(freq).quantile(q) expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name) tm.assert_series_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize( + "freq, result_name, result_data, result_index, result_freq", + [ + ( + "D", + "dti", + [1.0] * 5 + [np.nan] * 5, + ["2005-01-{}".format(i) for i in range(1, 11)], + "D", + ), + ( + "D", + "pi", + [1.0] * 5 + [np.nan] * 5, + ["2005-01-{}".format(i) for i in range(1, 11)], + "D", + ), + ( + "D", + "tdi", + [1.0] * 5 + [np.nan] * 5, + ["{} days".format(i) for i in range(1, 11)], + "D", + ), + ( + "W", + "dti", + [2.0, 3.0, np.nan], + ["2005-01-02", "2005-01-09", "2005-01-16"], + "W-SUN", + ), + ( + "W", + "pi", + [2.0, 3.0, np.nan], + ["2004-12-27/2005-01-02", "2005-01-03/2005-01-09", "2005-01-10/2005-01-16"], + "W-SUN", + ), + ("W", "", "", "", ""), + ("M", "dti", [5.0], ["2005-01-31"], "M"), + ("M", "pi", [5.0], ["2005-01"], "M"), + ("M", "", "", "", ""), + ], +) +def test_resample_sum( + series, freq, result_name, result_data, result_index, result_freq +): + # GH 19974 + series[:5] = 1 + series[5:] = np.nan + + if isinstance(series.index, TimedeltaIndex) and freq != "D": + msg = ".* is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + result = series.resample(freq).sum(min_count=1) + + else: + result = series.resample(freq).sum(min_count=1) + + if isinstance(series.index, DatetimeIndex) and result_name == "dti": + index = DatetimeIndex(result_index, freq=result_freq) + expected = Series(result_data, index, name=result_name) + tm.assert_series_equal(result, expected) + + if isinstance(series.index, PeriodIndex) and result_name == "pi": + index = PeriodIndex(result_index, freq=result_freq) + expected = Series(result_data, index, name=result_name) + tm.assert_series_equal(result, expected) + + if isinstance(series.index, TimedeltaIndex) and result_name == "tdi": + index = TimedeltaIndex(result_index, freq=result_freq) + expected = Series(result_data, index, name=result_name) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index c7c051cc03f07..0ed0bf18a82ee 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -548,19 +548,6 @@ def test_quarterly_resampling(self): exp = ts.to_timestamp().resample("A").mean().to_period() tm.assert_series_equal(result, exp) - def test_period_resample_sum_min_count(self): - # GH 19974 - index = date_range(start="2018", freq="M", periods=6) - data = np.ones(6) - data[3:6] = np.nan - period = Series(data, index).to_period() - result = period.resample("Q").sum(min_count=1) - - index = PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC") - expected = Series([3, np.nan], index) - - tm.assert_series_equal(result, expected) - def test_resample_weekly_bug_1726(self): # 8/6/12 is a Monday ind = date_range(start="8/6/2012", end="8/26/2012", freq="D") From 75c4781e8a6dd8d0d7ecd839f6270f7cd988d3d3 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 6 Dec 2019 19:00:50 +0000 Subject: [PATCH 4/7] move timedelta test to pandas/tests/resample/test_timedelta.py --- pandas/tests/resample/test_timedelta.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 7a6ebf826ca4d..cd95d8b7b0f61 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -4,7 +4,7 @@ import pandas as pd from pandas import DataFrame, Series -from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range import pandas.util.testing as tm @@ -125,3 +125,15 @@ def test_resample_timedelta_values(): tm.assert_series_equal(res, exp) res = df["time"].resample("2D").first() tm.assert_series_equal(res, exp) + + +def test_resample_sum(): + # GH 19974 + data = [1.0] * 5 + [np.nan] * 5 + index = timedelta_range("1 day", "10 day", freq="D") + series = Series(data, index=index) + result = series.resample("D").sum(min_count=1) + + index = TimedeltaIndex(["{} days".format(i) for i in range(1, 11)], freq="D") + expected = Series(data, index=index) + tm.assert_series_equal(result, expected) From 4f131aa40c54e973f7158c4d936325b727b779a8 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 6 Dec 2019 19:01:44 +0000 Subject: [PATCH 5/7] restructure test to avoid duplication --- pandas/tests/resample/test_base.py | 109 ++++++++++++++--------------- 1 file changed, 51 insertions(+), 58 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index f95bd55fec8e7..8f553825d584b 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -269,77 +269,70 @@ def test_resample_quantile(series): tm.assert_series_equal(result, expected) -@all_ts @pytest.mark.parametrize( - "freq, result_name, result_data, result_index, result_freq", + "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, PERIOD_RANGE] +) +@pytest.mark.parametrize( + "freq, result_dict", [ ( "D", - "dti", - [1.0] * 5 + [np.nan] * 5, - ["2005-01-{}".format(i) for i in range(1, 11)], - "D", - ), - ( - "D", - "pi", - [1.0] * 5 + [np.nan] * 5, - ["2005-01-{}".format(i) for i in range(1, 11)], - "D", - ), - ( - "D", - "tdi", - [1.0] * 5 + [np.nan] * 5, - ["{} days".format(i) for i in range(1, 11)], - "D", + { + "dti": { + "data": [1.0] * 5 + [np.nan] * 5, + "index": DatetimeIndex( + ["2005-01-{}".format(i) for i in range(1, 11)], freq="D" + ), + }, + "pi": { + "data": [1.0] * 5 + [np.nan] * 5, + "index": PeriodIndex( + ["2005-01-{}".format(i) for i in range(1, 11)], freq="D" + ), + }, + }, ), ( "W", - "dti", - [2.0, 3.0, np.nan], - ["2005-01-02", "2005-01-09", "2005-01-16"], - "W-SUN", + { + "dti": { + "data": [2.0, 3.0, np.nan], + "index": DatetimeIndex( + ["2005-01-02", "2005-01-09", "2005-01-16"], freq="W-SUN" + ), + }, + "pi": { + "data": [2.0, 3.0, np.nan], + "index": PeriodIndex( + [ + "2004-12-27/2005-01-02", + "2005-01-03/2005-01-09", + "2005-01-10/2005-01-16", + ], + freq="W-SUN", + ), + }, + }, ), ( - "W", - "pi", - [2.0, 3.0, np.nan], - ["2004-12-27/2005-01-02", "2005-01-03/2005-01-09", "2005-01-10/2005-01-16"], - "W-SUN", + "M", + { + "dti": { + "data": [5.0], + "index": DatetimeIndex(["2005-01-31"], freq="M"), + }, + "pi": {"data": [5.0], "index": PeriodIndex(["2005-01"], freq="M")}, + }, ), - ("W", "", "", "", ""), - ("M", "dti", [5.0], ["2005-01-31"], "M"), - ("M", "pi", [5.0], ["2005-01"], "M"), - ("M", "", "", "", ""), ], ) -def test_resample_sum( - series, freq, result_name, result_data, result_index, result_freq -): +def test_resample_sum(series, freq, result_dict): # GH 19974 series[:5] = 1 series[5:] = np.nan + result = series.resample(freq).sum(min_count=1) - if isinstance(series.index, TimedeltaIndex) and freq != "D": - msg = ".* is a non-fixed frequency" - with pytest.raises(ValueError, match=msg): - result = series.resample(freq).sum(min_count=1) - - else: - result = series.resample(freq).sum(min_count=1) - - if isinstance(series.index, DatetimeIndex) and result_name == "dti": - index = DatetimeIndex(result_index, freq=result_freq) - expected = Series(result_data, index, name=result_name) - tm.assert_series_equal(result, expected) - - if isinstance(series.index, PeriodIndex) and result_name == "pi": - index = PeriodIndex(result_index, freq=result_freq) - expected = Series(result_data, index, name=result_name) - tm.assert_series_equal(result, expected) - - if isinstance(series.index, TimedeltaIndex) and result_name == "tdi": - index = TimedeltaIndex(result_index, freq=result_freq) - expected = Series(result_data, index, name=result_name) - tm.assert_series_equal(result, expected) + key = result.name + index = result_dict[key]["index"] + expected = Series(result_dict[key]["data"], index, name=key) + tm.assert_series_equal(result, expected) From 38501558e1f760e0c28fb295e6326a0c242dd5f0 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Mon, 16 Dec 2019 16:22:14 +0000 Subject: [PATCH 6/7] update test --- pandas/tests/resample/test_base.py | 72 +++++++++++------------------- 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 8f553825d584b..761463cf1df51 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -273,66 +273,44 @@ def test_resample_quantile(series): "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, PERIOD_RANGE] ) @pytest.mark.parametrize( - "freq, result_dict", + "freq, result_data, result_index, result_freq", [ ( "D", - { - "dti": { - "data": [1.0] * 5 + [np.nan] * 5, - "index": DatetimeIndex( - ["2005-01-{}".format(i) for i in range(1, 11)], freq="D" - ), - }, - "pi": { - "data": [1.0] * 5 + [np.nan] * 5, - "index": PeriodIndex( - ["2005-01-{}".format(i) for i in range(1, 11)], freq="D" - ), - }, - }, + [1.0] * 5 + [np.nan] * 5, + ["2005-01-{}".format(i) for i in range(1, 11)], + "D", ), ( - "W", - { - "dti": { - "data": [2.0, 3.0, np.nan], - "index": DatetimeIndex( - ["2005-01-02", "2005-01-09", "2005-01-16"], freq="W-SUN" - ), - }, - "pi": { - "data": [2.0, 3.0, np.nan], - "index": PeriodIndex( - [ - "2004-12-27/2005-01-02", - "2005-01-03/2005-01-09", - "2005-01-10/2005-01-16", - ], - freq="W-SUN", - ), - }, - }, + "D", + [1.0] * 5 + [np.nan] * 5, + ["2005-01-{}".format(i) for i in range(1, 11)], + "D", ), + ("W", [2.0, 3.0, np.nan], ["2005-01-02", "2005-01-09", "2005-01-16"], "W-SUN"), ( - "M", - { - "dti": { - "data": [5.0], - "index": DatetimeIndex(["2005-01-31"], freq="M"), - }, - "pi": {"data": [5.0], "index": PeriodIndex(["2005-01"], freq="M")}, - }, + "W", + [2.0, 3.0, np.nan], + ["2004-12-27/2005-01-02", "2005-01-03/2005-01-09", "2005-01-10/2005-01-16"], + "W-SUN", ), + ("M", [5.0], ["2005-01-31"], "M"), + ("M", [5.0], ["2005-01"], "M"), ], ) -def test_resample_sum(series, freq, result_dict): +def test_resample_sum(series, freq, result_data, result_index, result_freq): # GH 19974 series[:5] = 1 series[5:] = np.nan result = series.resample(freq).sum(min_count=1) - key = result.name - index = result_dict[key]["index"] - expected = Series(result_dict[key]["data"], index, name=key) + result_name = result.name + if isinstance(series.index, DatetimeIndex) and result_name == "dti": + index = DatetimeIndex(result_index, freq=result_freq) + elif isinstance(series.index, PeriodIndex) and result_name == "pi": + index = PeriodIndex(result_index, freq=result_freq) + else: + pytest.skip("unsupported configuration") + + expected = Series(result_data, index, name=result_name) tm.assert_series_equal(result, expected) From 3d6bfa3bed7411692691cb6005eb213b92a2f910 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Mon, 16 Dec 2019 16:32:52 +0000 Subject: [PATCH 7/7] update test --- pandas/tests/resample/test_base.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 761463cf1df51..97a95d107f070 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -273,42 +273,52 @@ def test_resample_quantile(series): "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, PERIOD_RANGE] ) @pytest.mark.parametrize( - "freq, result_data, result_index, result_freq", + "freq, result_name, result_data, result_index, result_freq", [ ( "D", + "dti", [1.0] * 5 + [np.nan] * 5, ["2005-01-{}".format(i) for i in range(1, 11)], "D", ), ( "D", + "pi", [1.0] * 5 + [np.nan] * 5, ["2005-01-{}".format(i) for i in range(1, 11)], "D", ), - ("W", [2.0, 3.0, np.nan], ["2005-01-02", "2005-01-09", "2005-01-16"], "W-SUN"), ( "W", + "dti", + [2.0, 3.0, np.nan], + ["2005-01-02", "2005-01-09", "2005-01-16"], + "W-SUN", + ), + ( + "W", + "pi", [2.0, 3.0, np.nan], ["2004-12-27/2005-01-02", "2005-01-03/2005-01-09", "2005-01-10/2005-01-16"], "W-SUN", ), - ("M", [5.0], ["2005-01-31"], "M"), - ("M", [5.0], ["2005-01"], "M"), + ("M", "dti", [5.0], ["2005-01-31"], "M"), + ("M", "pi", [5.0], ["2005-01"], "M"), ], ) -def test_resample_sum(series, freq, result_data, result_index, result_freq): +def test_resample_sum( + series, freq, result_name, result_data, result_index, result_freq +): # GH 19974 series[:5] = 1 series[5:] = np.nan result = series.resample(freq).sum(min_count=1) - result_name = result.name - if isinstance(series.index, DatetimeIndex) and result_name == "dti": - index = DatetimeIndex(result_index, freq=result_freq) - elif isinstance(series.index, PeriodIndex) and result_name == "pi": + if isinstance(series.index, PeriodIndex) and result_name == "pi": index = PeriodIndex(result_index, freq=result_freq) + elif isinstance(series.index, DatetimeIndex) and result_name == "dti": + index = DatetimeIndex(result_index, freq=result_freq) else: pytest.skip("unsupported configuration")