Skip to content

Commit 2521fab

Browse files
authored
TST: split/collect/parametrize tests (#39663)
1 parent cb6f64b commit 2521fab

File tree

7 files changed

+138
-134
lines changed

7 files changed

+138
-134
lines changed

pandas/conftest.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"""
2020

2121
from collections import abc
22-
from datetime import date, time, timedelta, timezone
22+
from datetime import date, datetime, time, timedelta, timezone
2323
from decimal import Decimal
2424
import operator
2525
import os
@@ -757,6 +757,27 @@ def mixed_type_frame():
757757
)
758758

759759

760+
@pytest.fixture
761+
def rand_series_with_duplicate_datetimeindex():
762+
"""
763+
Fixture for Series with a DatetimeIndex that has duplicates.
764+
"""
765+
dates = [
766+
datetime(2000, 1, 2),
767+
datetime(2000, 1, 2),
768+
datetime(2000, 1, 2),
769+
datetime(2000, 1, 3),
770+
datetime(2000, 1, 3),
771+
datetime(2000, 1, 3),
772+
datetime(2000, 1, 4),
773+
datetime(2000, 1, 4),
774+
datetime(2000, 1, 4),
775+
datetime(2000, 1, 5),
776+
]
777+
778+
return Series(np.random.randn(len(dates)), index=dates)
779+
780+
760781
# ----------------------------------------------------------------
761782
# Scalars
762783
# ----------------------------------------------------------------

pandas/tests/groupby/test_groupby.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,3 +2143,10 @@ def test_groupby_numerical_stability_cumsum():
21432143
)
21442144
expected = DataFrame({"a": exp_data, "b": exp_data})
21452145
tm.assert_frame_equal(result, expected, check_exact=True)
2146+
2147+
2148+
def test_groupby_mean_duplicate_index(rand_series_with_duplicate_datetimeindex):
2149+
dups = rand_series_with_duplicate_datetimeindex
2150+
result = dups.groupby(level=0).mean()
2151+
expected = dups.groupby(dups.index).mean()
2152+
tm.assert_series_equal(result, expected)

pandas/tests/indexes/datetimes/test_datetime.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -198,23 +198,6 @@ def test_ns_index(self):
198198
new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
199199
self.assert_index_parameters(new_index)
200200

201-
@pytest.mark.parametrize(
202-
"arr, expected",
203-
[
204-
(DatetimeIndex(["2017", "2017"]), DatetimeIndex(["2017"])),
205-
(
206-
DatetimeIndex(["2017", "2017"], tz="US/Eastern"),
207-
DatetimeIndex(["2017"], tz="US/Eastern"),
208-
),
209-
],
210-
)
211-
def test_unique(self, arr, expected):
212-
result = arr.unique()
213-
tm.assert_index_equal(result, expected)
214-
# GH 21737
215-
# Ensure the underlying data is consistent
216-
assert result[0] == expected[0]
217-
218201
def test_asarray_tz_naive(self):
219202
# This shouldn't produce a warning.
220203
idx = date_range("2000", periods=2)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from datetime import datetime, timedelta
2+
3+
import pytest
4+
5+
from pandas import DatetimeIndex, NaT, Timestamp
6+
import pandas._testing as tm
7+
8+
9+
@pytest.mark.parametrize(
10+
"arr, expected",
11+
[
12+
(DatetimeIndex(["2017", "2017"]), DatetimeIndex(["2017"])),
13+
(
14+
DatetimeIndex(["2017", "2017"], tz="US/Eastern"),
15+
DatetimeIndex(["2017"], tz="US/Eastern"),
16+
),
17+
],
18+
)
19+
def test_unique(arr, expected):
20+
result = arr.unique()
21+
tm.assert_index_equal(result, expected)
22+
# GH#21737
23+
# Ensure the underlying data is consistent
24+
assert result[0] == expected[0]
25+
26+
27+
def test_index_unique(rand_series_with_duplicate_datetimeindex):
28+
dups = rand_series_with_duplicate_datetimeindex
29+
index = dups.index
30+
31+
uniques = index.unique()
32+
expected = DatetimeIndex(
33+
[
34+
datetime(2000, 1, 2),
35+
datetime(2000, 1, 3),
36+
datetime(2000, 1, 4),
37+
datetime(2000, 1, 5),
38+
]
39+
)
40+
assert uniques.dtype == "M8[ns]" # sanity
41+
tm.assert_index_equal(uniques, expected)
42+
assert index.nunique() == 4
43+
44+
# GH#2563
45+
assert isinstance(uniques, DatetimeIndex)
46+
47+
dups_local = index.tz_localize("US/Eastern")
48+
dups_local.name = "foo"
49+
result = dups_local.unique()
50+
expected = DatetimeIndex(expected, name="foo")
51+
expected = expected.tz_localize("US/Eastern")
52+
assert result.tz is not None
53+
assert result.name == "foo"
54+
tm.assert_index_equal(result, expected)
55+
56+
# NaT, note this is excluded
57+
arr = [1370745748 + t for t in range(20)] + [NaT.value]
58+
idx = DatetimeIndex(arr * 3)
59+
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
60+
assert idx.nunique() == 20
61+
assert idx.nunique(dropna=False) == 21
62+
63+
arr = [
64+
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
65+
] + [NaT]
66+
idx = DatetimeIndex(arr * 3)
67+
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
68+
assert idx.nunique() == 20
69+
assert idx.nunique(dropna=False) == 21
70+
71+
72+
def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex):
73+
index = rand_series_with_duplicate_datetimeindex.index
74+
assert not index.is_unique

pandas/tests/series/indexing/test_datetime.py

Lines changed: 13 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,10 @@
99
import pytest
1010
import pytz
1111

12-
from pandas._libs import iNaT, index as libindex
12+
from pandas._libs import index as libindex
1313

1414
import pandas as pd
15-
from pandas import (
16-
DataFrame,
17-
DatetimeIndex,
18-
NaT,
19-
Series,
20-
Timestamp,
21-
date_range,
22-
period_range,
23-
)
15+
from pandas import DataFrame, Series, Timestamp, date_range, period_range
2416
import pandas._testing as tm
2517

2618

@@ -347,77 +339,10 @@ def test_datetime_indexing():
347339
"""
348340

349341

350-
@pytest.fixture
351-
def dups():
352-
dates = [
353-
datetime(2000, 1, 2),
354-
datetime(2000, 1, 2),
355-
datetime(2000, 1, 2),
356-
datetime(2000, 1, 3),
357-
datetime(2000, 1, 3),
358-
datetime(2000, 1, 3),
359-
datetime(2000, 1, 4),
360-
datetime(2000, 1, 4),
361-
datetime(2000, 1, 4),
362-
datetime(2000, 1, 5),
363-
]
364-
365-
return Series(np.random.randn(len(dates)), index=dates)
366-
367-
368-
def test_constructor(dups):
369-
assert isinstance(dups, Series)
370-
assert isinstance(dups.index, DatetimeIndex)
371-
372-
373-
def test_is_unique_monotonic(dups):
374-
assert not dups.index.is_unique
375-
376-
377-
def test_index_unique(dups):
378-
uniques = dups.index.unique()
379-
expected = DatetimeIndex(
380-
[
381-
datetime(2000, 1, 2),
382-
datetime(2000, 1, 3),
383-
datetime(2000, 1, 4),
384-
datetime(2000, 1, 5),
385-
]
386-
)
387-
assert uniques.dtype == "M8[ns]" # sanity
388-
tm.assert_index_equal(uniques, expected)
389-
assert dups.index.nunique() == 4
390-
391-
# #2563
392-
assert isinstance(uniques, DatetimeIndex)
393-
394-
dups_local = dups.index.tz_localize("US/Eastern")
395-
dups_local.name = "foo"
396-
result = dups_local.unique()
397-
expected = DatetimeIndex(expected, name="foo")
398-
expected = expected.tz_localize("US/Eastern")
399-
assert result.tz is not None
400-
assert result.name == "foo"
401-
tm.assert_index_equal(result, expected)
402-
403-
# NaT, note this is excluded
404-
arr = [1370745748 + t for t in range(20)] + [iNaT]
405-
idx = DatetimeIndex(arr * 3)
406-
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
407-
assert idx.nunique() == 20
408-
assert idx.nunique(dropna=False) == 21
409-
410-
arr = [
411-
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
412-
] + [NaT]
413-
idx = DatetimeIndex(arr * 3)
414-
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
415-
assert idx.nunique() == 20
416-
assert idx.nunique(dropna=False) == 21
417-
418-
419-
def test_duplicate_dates_indexing(dups):
420-
ts = dups
342+
def test_indexing_with_duplicate_datetimeindex(
343+
rand_series_with_duplicate_datetimeindex,
344+
):
345+
ts = rand_series_with_duplicate_datetimeindex
421346

422347
uniques = ts.index.unique()
423348
for date in uniques:
@@ -445,12 +370,6 @@ def test_duplicate_dates_indexing(dups):
445370
assert ts[datetime(2000, 1, 6)] == 0
446371

447372

448-
def test_groupby_average_dup_values(dups):
449-
result = dups.groupby(level=0).mean()
450-
expected = dups.groupby(dups.index).mean()
451-
tm.assert_series_equal(result, expected)
452-
453-
454373
def test_indexing_over_size_cutoff(monkeypatch):
455374
# #1821
456375

@@ -579,6 +498,8 @@ def test_indexing():
579498
result = df["2001"]["A"]
580499
tm.assert_series_equal(expected, result)
581500

501+
502+
def test_getitem_str_month_with_datetimeindex():
582503
# GH3546 (not including times on the last day)
583504
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
584505
ts = Series(range(len(idx)), index=idx)
@@ -590,6 +511,8 @@ def test_indexing():
590511
expected = ts["2013-05"]
591512
tm.assert_series_equal(expected, ts)
592513

514+
515+
def test_getitem_str_year_with_datetimeindex():
593516
idx = [
594517
Timestamp("2013-05-31 00:00"),
595518
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
@@ -598,17 +521,19 @@ def test_indexing():
598521
expected = ts["2013"]
599522
tm.assert_series_equal(expected, ts)
600523

524+
525+
def test_getitem_str_second_with_datetimeindex():
601526
# GH14826, indexing with a seconds resolution string / datetime object
602527
df = DataFrame(
603528
np.random.rand(5, 5),
604529
columns=["open", "high", "low", "close", "volume"],
605530
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
606531
)
607-
expected = df.loc[[df.index[2]]]
608532

609533
# this is a single date, so will raise
610534
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
611535
df["2012-01-02 18:01:02"]
536+
612537
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
613538
with pytest.raises(KeyError, match=msg):
614539
df[df.index[2]]

pandas/tests/series/indexing/test_indexing.py

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -546,64 +546,52 @@ def test_timedelta_assignment():
546546

547547

548548
@pytest.mark.parametrize(
549-
"nat_val,should_cast",
549+
"nat_val",
550550
[
551-
(pd.NaT, True),
552-
(np.timedelta64("NaT", "ns"), False),
553-
(np.datetime64("NaT", "ns"), True),
551+
pd.NaT,
552+
np.timedelta64("NaT", "ns"),
553+
np.datetime64("NaT", "ns"),
554554
],
555555
)
556556
@pytest.mark.parametrize("tz", [None, "UTC"])
557-
def test_dt64_series_assign_nat(nat_val, should_cast, tz):
557+
def test_dt64_series_assign_nat(nat_val, tz, indexer_sli):
558558
# some nat-like values should be cast to datetime64 when inserting
559559
# into a datetime64 series. Others should coerce to object
560560
# and retain their dtypes.
561561
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
562562
base = Series(dti)
563563
expected = Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype)
564+
565+
should_cast = nat_val is pd.NaT or base.dtype.kind == nat_val.dtype.kind
564566
if not should_cast:
565567
expected = expected.astype(object)
566568

567569
ser = base.copy(deep=True)
568-
ser[0] = nat_val
569-
tm.assert_series_equal(ser, expected)
570-
571-
ser = base.copy(deep=True)
572-
ser.loc[0] = nat_val
573-
tm.assert_series_equal(ser, expected)
574-
575-
ser = base.copy(deep=True)
576-
ser.iloc[0] = nat_val
570+
indexer_sli(ser)[0] = nat_val
577571
tm.assert_series_equal(ser, expected)
578572

579573

580574
@pytest.mark.parametrize(
581-
"nat_val,should_cast",
575+
"nat_val",
582576
[
583-
(pd.NaT, True),
584-
(np.timedelta64("NaT", "ns"), True),
585-
(np.datetime64("NaT", "ns"), False),
577+
pd.NaT,
578+
np.timedelta64("NaT", "ns"),
579+
np.datetime64("NaT", "ns"),
586580
],
587581
)
588-
def test_td64_series_assign_nat(nat_val, should_cast):
582+
def test_td64_series_assign_nat(nat_val, indexer_sli):
589583
# some nat-like values should be cast to timedelta64 when inserting
590584
# into a timedelta64 series. Others should coerce to object
591585
# and retain their dtypes.
592586
base = Series([0, 1, 2], dtype="m8[ns]")
593587
expected = Series([pd.NaT, 1, 2], dtype="m8[ns]")
588+
589+
should_cast = nat_val is pd.NaT or base.dtype == nat_val.dtype
594590
if not should_cast:
595591
expected = expected.astype(object)
596592

597593
ser = base.copy(deep=True)
598-
ser[0] = nat_val
599-
tm.assert_series_equal(ser, expected)
600-
601-
ser = base.copy(deep=True)
602-
ser.loc[0] = nat_val
603-
tm.assert_series_equal(ser, expected)
604-
605-
ser = base.copy(deep=True)
606-
ser.iloc[0] = nat_val
594+
indexer_sli(ser)[0] = nat_val
607595
tm.assert_series_equal(ser, expected)
608596

609597

pandas/tests/series/test_constructors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,3 +1672,9 @@ def test_from_list_dtype(self):
16721672

16731673
result = Series(["2015"], dtype="datetime64[ns]")
16741674
assert result._mgr.blocks[0].is_extension is False
1675+
1676+
1677+
def test_constructor(rand_series_with_duplicate_datetimeindex):
1678+
dups = rand_series_with_duplicate_datetimeindex
1679+
assert isinstance(dups, Series)
1680+
assert isinstance(dups.index, DatetimeIndex)

0 commit comments

Comments
 (0)