Skip to content

TST: split and parametrize #55957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,6 @@ def test_quantile_datetime(self, unit):
{"a": Timestamp("2010-07-02 12:00:00").as_unit(unit), "b": 2.5},
index=[0.5],
)
# expected["a"] = expected["a"].dt.as_unit(unit)
tm.assert_frame_equal(result, expected)

# axis = 1
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def test_read_dta2(self, datapath):
parsed_115 = self.read_dta(path2)
with tm.assert_produces_warning(UserWarning):
parsed_117 = self.read_dta(path3)
# FIXME: don't leave commented-out
# 113 is buggy due to limits of date format support in Stata
# parsed_113 = self.read_dta(
# datapath("io", "data", "stata", "stata2_113.dta")
Expand Down
138 changes: 86 additions & 52 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,7 @@ def test_nanosecond_resample_error():
tm.assert_series_equal(result, exp)


def test_resample_anchored_intraday(simple_date_range_series, unit):
def test_resample_anchored_intraday(unit):
# #1471, #1458

rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit)
Expand All @@ -1141,6 +1141,8 @@ def test_resample_anchored_intraday(simple_date_range_series, unit):
assert exp.index.freq == "ME"
tm.assert_frame_equal(result, exp)


def test_resample_anchored_intraday2(unit):
rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit)
df = DataFrame(rng.month, index=rng)

Expand All @@ -1161,6 +1163,8 @@ def test_resample_anchored_intraday(simple_date_range_series, unit):
expected.index = expected.index.as_unit(unit)
tm.assert_frame_equal(result, expected)


def test_resample_anchored_intraday3(simple_date_range_series, unit):
ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h")
ts.index = ts.index.as_unit(unit)
resampled = ts.resample("ME").mean()
Expand Down Expand Up @@ -1374,8 +1378,17 @@ def test_resample_timegrouper(dates):
result = df.groupby(Grouper(freq="ME", key="A")).count()
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("dates", [dates1, dates2, dates3])
def test_resample_timegrouper2(dates):
df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))})
result = df.set_index("A").resample("ME").count()

exp_idx = DatetimeIndex(
["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"],
freq="ME",
name="A",
)
expected = DataFrame(
{"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]},
index=exp_idx,
Expand Down Expand Up @@ -1574,80 +1587,95 @@ def test_resample_dst_anchor(unit):
),
)


def test_resample_dst_anchor2(unit):
dti = date_range(
"2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris"
).as_unit(unit)
values = range(dti.size)
df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64")
how = {"a": "min", "b": "max", "c": "count"}

rs = df.resample("W-MON")
result = rs.agg(how)[["a", "b", "c"]]
expected = DataFrame(
{
"a": [0, 48, 384, 720, 1056, 1394],
"b": [47, 383, 719, 1055, 1393, 1586],
"c": [48, 336, 336, 336, 338, 193],
},
index=date_range(
"9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"
).as_unit(unit),
)
tm.assert_frame_equal(
df.resample("W-MON").agg(how)[["a", "b", "c"]],
DataFrame(
{
"a": [0, 48, 384, 720, 1056, 1394],
"b": [47, 383, 719, 1055, 1393, 1586],
"c": [48, 336, 336, 336, 338, 193],
},
index=date_range(
"9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"
).as_unit(unit),
),
result,
expected,
"W-MON Frequency",
)

rs2 = df.resample("2W-MON")
result2 = rs2.agg(how)[["a", "b", "c"]]
expected2 = DataFrame(
{
"a": [0, 48, 720, 1394],
"b": [47, 719, 1393, 1586],
"c": [48, 672, 674, 193],
},
index=date_range(
"9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris"
).as_unit(unit),
)
tm.assert_frame_equal(
df.resample("2W-MON").agg(how)[["a", "b", "c"]],
DataFrame(
{
"a": [0, 48, 720, 1394],
"b": [47, 719, 1393, 1586],
"c": [48, 672, 674, 193],
},
index=date_range(
"9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris"
).as_unit(unit),
),
result2,
expected2,
"2W-MON Frequency",
)

tm.assert_frame_equal(
df.resample("MS").agg(how)[["a", "b", "c"]],
DataFrame(
{"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]},
index=date_range(
"9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris"
).as_unit(unit),
rs3 = df.resample("MS")
result3 = rs3.agg(how)[["a", "b", "c"]]
expected3 = DataFrame(
{"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]},
index=date_range("9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris").as_unit(
unit
),
)
tm.assert_frame_equal(
result3,
expected3,
"MS Frequency",
)

rs4 = df.resample("2MS")
result4 = rs4.agg(how)[["a", "b", "c"]]
expected4 = DataFrame(
{"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]},
index=date_range(
"9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"
).as_unit(unit),
)
tm.assert_frame_equal(
df.resample("2MS").agg(how)[["a", "b", "c"]],
DataFrame(
{"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]},
index=date_range(
"9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"
).as_unit(unit),
),
result4,
expected4,
"2MS Frequency",
)

df_daily = df["10/26/2013":"10/29/2013"]
rs_d = df_daily.resample("D")
result_d = rs_d.agg({"a": "min", "b": "max", "c": "count"})[["a", "b", "c"]]
expected_d = DataFrame(
{
"a": [1248, 1296, 1346, 1394],
"b": [1295, 1345, 1393, 1441],
"c": [48, 50, 48, 48],
},
index=date_range(
"10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"
).as_unit(unit),
)
tm.assert_frame_equal(
df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[
["a", "b", "c"]
],
DataFrame(
{
"a": [1248, 1296, 1346, 1394],
"b": [1295, 1345, 1393, 1441],
"c": [48, 50, 48, 48],
},
index=date_range(
"10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"
).as_unit(unit),
),
result_d,
expected_d,
"D Frequency",
)

Expand Down Expand Up @@ -1728,9 +1756,8 @@ def test_resample_with_nat(unit):
"1970-01-01 00:00:01",
"1970-01-01 00:00:02",
]
)
).as_unit(unit)
frame = DataFrame([2, 3, 5, 7, 11], index=index)
frame.index = frame.index.as_unit(unit)

index_1s = DatetimeIndex(
["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"]
Expand Down Expand Up @@ -1789,7 +1816,14 @@ def f(data, add_arg):
expected = series.resample("D").mean().multiply(multiplier)
tm.assert_series_equal(result, expected)


def test_resample_apply_with_additional_args2():
# Testing dataframe
def f(data, add_arg):
return np.mean(data) * add_arg

multiplier = 10

df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand Down
79 changes: 36 additions & 43 deletions pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def test_annual_upsample(self, simple_period_range_series):
exp = df["a"].resample("D").ffill()
tm.assert_series_equal(rdf["a"], exp)

def test_annual_upsample2(self):
rng = period_range("2000", "2003", freq="Y-DEC")
ts = Series([1, 2, 3, 4], index=rng)

Expand Down Expand Up @@ -258,20 +259,29 @@ def test_resample_incompat_freq(self):
"Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, "
"as they are not sub or super periods"
)
pi = period_range(start="2000", periods=3, freq="M")
ser = Series(range(3), index=pi)
rs = ser.resample("W")
with pytest.raises(IncompatibleFrequency, match=msg):
Series(
range(3), index=period_range(start="2000", periods=3, freq="M")
).resample("W").mean()
# TODO: should this raise at the resample call instead of at the mean call?
rs.mean()

def test_with_local_timezone_pytz(self):
@pytest.mark.parametrize(
"tz",
[
pytz.timezone("America/Los_Angeles"),
dateutil.tz.gettz("America/Los_Angeles"),
],
)
def test_with_local_timezone(self, tz):
# see gh-5430
local_timezone = pytz.timezone("America/Los_Angeles")
local_timezone = tz

start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
# 1 day later
end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)

index = date_range(start, end, freq="h")
index = date_range(start, end, freq="h", name="idx")

series = Series(1, index=index)
series = series.tz_convert(local_timezone)
Expand All @@ -280,52 +290,30 @@ def test_with_local_timezone_pytz(self):
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = period_range(start=start, end=end, freq="D") - offsets.Day()
expected_index = (
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)

def test_resample_with_pytz(self):
@pytest.mark.parametrize(
"tz",
[
pytz.timezone("America/Los_Angeles"),
dateutil.tz.gettz("America/Los_Angeles"),
],
)
def test_resample_with_tz(self, tz):
# GH 13238
s = Series(
2, index=date_range("2017-01-01", periods=48, freq="h", tz="US/Eastern")
)
result = s.resample("D").mean()
ser = Series(2, index=date_range("2017-01-01", periods=48, freq="h", tz=tz))
result = ser.resample("D").mean()
expected = Series(
2.0,
index=pd.DatetimeIndex(
["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D"
),
index=pd.DatetimeIndex(["2017-01-01", "2017-01-02"], tz=tz, freq="D"),
)
tm.assert_series_equal(result, expected)
# Especially assert that the timezone is LMT for pytz
assert result.index.tz == pytz.timezone("US/Eastern")

def test_with_local_timezone_dateutil(self):
# see gh-5430
local_timezone = "dateutil/America/Los_Angeles"

start = datetime(
year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)
# 1 day later
end = datetime(
year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)

index = date_range(start, end, freq="h", name="idx")

series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()

# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)
assert result.index.tz == tz

def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
Expand All @@ -336,6 +324,7 @@ def test_resample_nonexistent_time_bin_edge(self):
result = expected.resample("900s").mean()
tm.assert_series_equal(result, expected)

def test_resample_nonexistent_time_bin_edge2(self):
# GH 23742
index = date_range(start="2017-10-10", end="2017-10-20", freq="1h")
index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
Expand Down Expand Up @@ -420,6 +409,7 @@ def test_resample_to_quarterly_start_end(self, simple_period_range_series, how):
expected = ts.asfreq("Q-MAR", how=how)
expected = expected.reindex(result.index, method="ffill")

# FIXME: don't leave commented-out
# .to_timestamp('D')
# expected = expected.resample('Q-MAR').ffill()

Expand Down Expand Up @@ -510,6 +500,7 @@ def test_resample_tz_localized(self):
# it works
result = ts_local.resample("D").mean()

def test_resample_tz_localized2(self):
# #2245
idx = date_range(
"2001-09-20 15:59", "2001-09-20 16:00", freq="min", tz="Australia/Sydney"
Expand All @@ -528,6 +519,7 @@ def test_resample_tz_localized(self):
expected = Series([1.5], index=ex_index)
tm.assert_series_equal(result, expected)

def test_resample_tz_localized3(self):
# GH 6397
# comparing an offset that doesn't propagate tz's
rng = date_range("1/1/2011", periods=20000, freq="h")
Expand Down Expand Up @@ -694,6 +686,7 @@ def test_evenly_divisible_with_no_extra_bins(self):
)
tm.assert_frame_equal(result, expected)

def test_evenly_divisible_with_no_extra_bins2(self):
index = date_range(start="2001-5-4", periods=28)
df = DataFrame(
[
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,10 @@ def test_groupby_resample_on_index_with_list_of_keys_missing_column():
name="date",
),
)
gb = df.groupby("group")
rs = gb.resample("2D")
with pytest.raises(KeyError, match="Columns not found"):
df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()
rs[["val_not_in_dataframe"]]


@pytest.mark.parametrize("kind", ["datetime", "period"])
Expand Down