From 2f72fd83dd7695098eb12d6df81e2a2c2d24cc51 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:44:09 -0800 Subject: [PATCH] TST: split and parametrize --- pandas/tests/frame/methods/test_quantile.py | 1 - pandas/tests/io/test_stata.py | 1 + pandas/tests/resample/test_datetime_index.py | 138 +++++++++++------- pandas/tests/resample/test_period_index.py | 79 +++++----- .../tests/resample/test_resampler_grouper.py | 4 +- 5 files changed, 126 insertions(+), 97 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 637fc6270b78d..dcec68ab3530d 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -376,7 +376,6 @@ def test_quantile_datetime(self, unit): {"a": Timestamp("2010-07-02 12:00:00").as_unit(unit), "b": 2.5}, index=[0.5], ) - # expected["a"] = expected["a"].dt.as_unit(unit) tm.assert_frame_equal(result, expected) # axis = 1 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index cd504616b6c5d..f7f94af92743e 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -184,6 +184,7 @@ def test_read_dta2(self, datapath): parsed_115 = self.read_dta(path2) with tm.assert_produces_warning(UserWarning): parsed_117 = self.read_dta(path3) + # FIXME: don't leave commented-out # 113 is buggy due to limits of date format support in Stata # parsed_113 = self.read_dta( # datapath("io", "data", "stata", "stata2_113.dta") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 21937a35629a0..b0a58070d72ad 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1119,7 +1119,7 @@ def test_nanosecond_resample_error(): tm.assert_series_equal(result, exp) -def test_resample_anchored_intraday(simple_date_range_series, unit): +def test_resample_anchored_intraday(unit): # #1471, #1458 rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit) @@ -1141,6 +1141,8 @@ def test_resample_anchored_intraday(simple_date_range_series, unit): assert exp.index.freq == "ME" tm.assert_frame_equal(result, exp) + +def test_resample_anchored_intraday2(unit): rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit) df = DataFrame(rng.month, index=rng) @@ -1161,6 +1163,8 @@ def test_resample_anchored_intraday(simple_date_range_series, unit): expected.index = expected.index.as_unit(unit) tm.assert_frame_equal(result, expected) + +def test_resample_anchored_intraday3(simple_date_range_series, unit): ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h") ts.index = ts.index.as_unit(unit) resampled = ts.resample("ME").mean() @@ -1374,8 +1378,17 @@ def test_resample_timegrouper(dates): result = df.groupby(Grouper(freq="ME", key="A")).count() tm.assert_frame_equal(result, expected) + +@pytest.mark.parametrize("dates", [dates1, dates2, dates3]) +def test_resample_timegrouper2(dates): df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))}) result = df.set_index("A").resample("ME").count() + + exp_idx = DatetimeIndex( + ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"], + freq="ME", + name="A", + ) expected = DataFrame( {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]}, index=exp_idx, @@ -1574,6 +1587,8 @@ def test_resample_dst_anchor(unit): ), ) + +def test_resample_dst_anchor2(unit): dti = date_range( "2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris" ).as_unit(unit) @@ -1581,73 +1596,86 @@ def test_resample_dst_anchor(unit): df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64") how = {"a": "min", "b": "max", "c": "count"} + rs = df.resample("W-MON") + result = rs.agg(how)[["a", "b", "c"]] + expected = DataFrame( + { + "a": [0, 48, 384, 720, 1056, 1394], + "b": [47, 383, 719, 1055, 1393, 1586], + "c": [48, 336, 336, 336, 338, 193], + }, + index=date_range( + "9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris" + ).as_unit(unit), + ) tm.assert_frame_equal( - df.resample("W-MON").agg(how)[["a", "b", "c"]], - DataFrame( - { - "a": [0, 48, 384, 720, 1056, 1394], - "b": [47, 383, 719, 1055, 1393, 1586], - "c": [48, 336, 336, 336, 338, 193], - }, - index=date_range( - "9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris" - ).as_unit(unit), - ), + result, + expected, "W-MON Frequency", ) + rs2 = df.resample("2W-MON") + result2 = rs2.agg(how)[["a", "b", "c"]] + expected2 = DataFrame( + { + "a": [0, 48, 720, 1394], + "b": [47, 719, 1393, 1586], + "c": [48, 672, 674, 193], + }, + index=date_range( + "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris" + ).as_unit(unit), + ) tm.assert_frame_equal( - df.resample("2W-MON").agg(how)[["a", "b", "c"]], - DataFrame( - { - "a": [0, 48, 720, 1394], - "b": [47, 719, 1393, 1586], - "c": [48, 672, 674, 193], - }, - index=date_range( - "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris" - ).as_unit(unit), - ), + result2, + expected2, "2W-MON Frequency", ) - tm.assert_frame_equal( - df.resample("MS").agg(how)[["a", "b", "c"]], - DataFrame( - {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]}, - index=date_range( - "9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris" - ).as_unit(unit), + rs3 = df.resample("MS") + result3 = rs3.agg(how)[["a", "b", "c"]] + expected3 = DataFrame( + {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]}, + index=date_range("9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris").as_unit( + unit ), + ) + tm.assert_frame_equal( + result3, + expected3, "MS Frequency", ) + rs4 = df.resample("2MS") + result4 = rs4.agg(how)[["a", "b", "c"]] + expected4 = DataFrame( + {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]}, + index=date_range( + "9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris" + ).as_unit(unit), + ) tm.assert_frame_equal( - df.resample("2MS").agg(how)[["a", "b", "c"]], - DataFrame( - {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]}, - index=date_range( - "9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris" - ).as_unit(unit), - ), + result4, + expected4, "2MS Frequency", ) df_daily = df["10/26/2013":"10/29/2013"] + rs_d = df_daily.resample("D") + result_d = rs_d.agg({"a": "min", "b": "max", "c": "count"})[["a", "b", "c"]] + expected_d = DataFrame( + { + "a": [1248, 1296, 1346, 1394], + "b": [1295, 1345, 1393, 1441], + "c": [48, 50, 48, 48], + }, + index=date_range( + "10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris" + ).as_unit(unit), + ) tm.assert_frame_equal( - df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[ - ["a", "b", "c"] - ], - DataFrame( - { - "a": [1248, 1296, 1346, 1394], - "b": [1295, 1345, 1393, 1441], - "c": [48, 50, 48, 48], - }, - index=date_range( - "10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris" - ).as_unit(unit), - ), + result_d, + expected_d, "D Frequency", ) @@ -1728,9 +1756,8 @@ def test_resample_with_nat(unit): "1970-01-01 00:00:01", "1970-01-01 00:00:02", ] - ) + ).as_unit(unit) frame = DataFrame([2, 3, 5, 7, 11], index=index) - frame.index = frame.index.as_unit(unit) index_1s = DatetimeIndex( ["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"] @@ -1789,7 +1816,14 @@ def f(data, add_arg): expected = series.resample("D").mean().multiply(multiplier) tm.assert_series_equal(result, expected) + +def test_resample_apply_with_additional_args2(): # Testing dataframe + def f(data, add_arg): + return np.mean(data) * add_arg + + multiplier = 10 + df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10)) msg = "DataFrameGroupBy.resample operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index b8b2325f03889..80bb18a6a2a98 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -174,6 +174,7 @@ def test_annual_upsample(self, simple_period_range_series): exp = df["a"].resample("D").ffill() tm.assert_series_equal(rdf["a"], exp) + def test_annual_upsample2(self): rng = period_range("2000", "2003", freq="Y-DEC") ts = Series([1, 2, 3, 4], index=rng) @@ -258,20 +259,29 @@ def test_resample_incompat_freq(self): "Frequency cannot be resampled to , " "as they are not sub or super periods" ) + pi = period_range(start="2000", periods=3, freq="M") + ser = Series(range(3), index=pi) + rs = ser.resample("W") with pytest.raises(IncompatibleFrequency, match=msg): - Series( - range(3), index=period_range(start="2000", periods=3, freq="M") - ).resample("W").mean() + # TODO: should this raise at the resample call instead of at the mean call? + rs.mean() - def test_with_local_timezone_pytz(self): + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("America/Los_Angeles"), + dateutil.tz.gettz("America/Los_Angeles"), + ], + ) + def test_with_local_timezone(self, tz): # see gh-5430 - local_timezone = pytz.timezone("America/Los_Angeles") + local_timezone = tz start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) # 1 day later end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) - index = date_range(start, end, freq="h") + index = date_range(start, end, freq="h", name="idx") series = Series(1, index=index) series = series.tz_convert(local_timezone) @@ -280,52 +290,30 @@ def test_with_local_timezone_pytz(self): # Create the expected series # Index is moved back a day with the timezone conversion from UTC to # Pacific - expected_index = period_range(start=start, end=end, freq="D") - offsets.Day() + expected_index = ( + period_range(start=start, end=end, freq="D", name="idx") - offsets.Day() + ) expected = Series(1.0, index=expected_index) tm.assert_series_equal(result, expected) - def test_resample_with_pytz(self): + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("America/Los_Angeles"), + dateutil.tz.gettz("America/Los_Angeles"), + ], + ) + def test_resample_with_tz(self, tz): # GH 13238 - s = Series( - 2, index=date_range("2017-01-01", periods=48, freq="h", tz="US/Eastern") - ) - result = s.resample("D").mean() + ser = Series(2, index=date_range("2017-01-01", periods=48, freq="h", tz=tz)) + result = ser.resample("D").mean() expected = Series( 2.0, - index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D" - ), + index=pd.DatetimeIndex(["2017-01-01", "2017-01-02"], tz=tz, freq="D"), ) tm.assert_series_equal(result, expected) # Especially assert that the timezone is LMT for pytz - assert result.index.tz == pytz.timezone("US/Eastern") - - def test_with_local_timezone_dateutil(self): - # see gh-5430 - local_timezone = "dateutil/America/Los_Angeles" - - start = datetime( - year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() - ) - # 1 day later - end = datetime( - year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() - ) - - index = date_range(start, end, freq="h", name="idx") - - series = Series(1, index=index) - series = series.tz_convert(local_timezone) - result = series.resample("D", kind="period").mean() - - # Create the expected series - # Index is moved back a day with the timezone conversion from UTC to - # Pacific - expected_index = ( - period_range(start=start, end=end, freq="D", name="idx") - offsets.Day() - ) - expected = Series(1.0, index=expected_index) - tm.assert_series_equal(result, expected) + assert result.index.tz == tz def test_resample_nonexistent_time_bin_edge(self): # GH 19375 @@ -336,6 +324,7 @@ def test_resample_nonexistent_time_bin_edge(self): result = expected.resample("900s").mean() tm.assert_series_equal(result, expected) + def test_resample_nonexistent_time_bin_edge2(self): # GH 23742 index = date_range(start="2017-10-10", end="2017-10-20", freq="1h") index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo") @@ -420,6 +409,7 @@ def test_resample_to_quarterly_start_end(self, simple_period_range_series, how): expected = ts.asfreq("Q-MAR", how=how) expected = expected.reindex(result.index, method="ffill") + # FIXME: don't leave commented-out # .to_timestamp('D') # expected = expected.resample('Q-MAR').ffill() @@ -510,6 +500,7 @@ def test_resample_tz_localized(self): # it works result = ts_local.resample("D").mean() + def test_resample_tz_localized2(self): # #2245 idx = date_range( "2001-09-20 15:59", "2001-09-20 16:00", freq="min", tz="Australia/Sydney" @@ -528,6 +519,7 @@ def test_resample_tz_localized(self): expected = Series([1.5], index=ex_index) tm.assert_series_equal(result, expected) + def test_resample_tz_localized3(self): # GH 6397 # comparing an offset that doesn't propagate tz's rng = date_range("1/1/2011", periods=20000, freq="h") @@ -694,6 +686,7 @@ def test_evenly_divisible_with_no_extra_bins(self): ) tm.assert_frame_equal(result, expected) + def test_evenly_divisible_with_no_extra_bins2(self): index = date_range(start="2001-5-4", periods=28) df = DataFrame( [ diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index ea53dea06129d..fd02216934576 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -695,8 +695,10 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column(): name="date", ), ) + gb = df.groupby("group") + rs = gb.resample("2D") with pytest.raises(KeyError, match="Columns not found"): - df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() + rs[["val_not_in_dataframe"]] @pytest.mark.parametrize("kind", ["datetime", "period"])