Skip to content

Commit 6584099

Browse files
authored
TST/REF: collect tests by method (#37342)
1 parent 39338e2 commit 6584099

File tree

12 files changed

+466
-438
lines changed

12 files changed

+466
-438
lines changed

pandas/conftest.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import pandas.util._test_decorators as td
3535

3636
import pandas as pd
37-
from pandas import DataFrame
37+
from pandas import DataFrame, Series
3838
import pandas._testing as tm
3939
from pandas.core import ops
4040
from pandas.core.indexes.api import Index, MultiIndex
@@ -529,6 +529,23 @@ def series_with_simple_index(index):
529529
return _create_series(index)
530530

531531

532+
@pytest.fixture
533+
def series_with_multilevel_index():
534+
"""
535+
Fixture with a Series with a 2-level MultiIndex.
536+
"""
537+
arrays = [
538+
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
539+
["one", "two", "one", "two", "one", "two", "one", "two"],
540+
]
541+
tuples = zip(*arrays)
542+
index = MultiIndex.from_tuples(tuples)
543+
data = np.random.randn(8)
544+
ser = Series(data, index=index)
545+
ser[3] = np.NaN
546+
return ser
547+
548+
532549
_narrow_dtypes = [
533550
np.float16,
534551
np.float32,

pandas/tests/frame/methods/test_count.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@
66

77

88
class TestDataFrameCount:
9+
def test_count_multiindex(self, multiindex_dataframe_random_data):
10+
frame = multiindex_dataframe_random_data
11+
12+
frame = frame.copy()
13+
frame.index.names = ["a", "b"]
14+
15+
result = frame.count(level="b")
16+
expected = frame.count(level=1)
17+
tm.assert_frame_equal(result, expected, check_names=False)
18+
19+
result = frame.count(level="a")
20+
expected = frame.count(level=0)
21+
tm.assert_frame_equal(result, expected, check_names=False)
22+
23+
msg = "Level x not found"
24+
with pytest.raises(KeyError, match=msg):
25+
frame.count(level="x")
26+
927
def test_count(self):
1028
# corner case
1129
frame = DataFrame()

pandas/tests/frame/methods/test_reset_index.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from datetime import datetime
2+
from itertools import product
23

34
import numpy as np
45
import pytest
56

7+
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
8+
69
import pandas as pd
710
from pandas import (
811
DataFrame,
@@ -301,6 +304,194 @@ def test_reset_index_range(self):
301304
)
302305
tm.assert_frame_equal(result, expected)
303306

307+
def test_reset_index_multiindex_columns(self):
308+
levels = [["A", ""], ["B", "b"]]
309+
df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
310+
result = df[["B"]].rename_axis("A").reset_index()
311+
tm.assert_frame_equal(result, df)
312+
313+
# GH#16120: already existing column
314+
msg = r"cannot insert \('A', ''\), already exists"
315+
with pytest.raises(ValueError, match=msg):
316+
df.rename_axis("A").reset_index()
317+
318+
# GH#16164: multiindex (tuple) full key
319+
result = df.set_index([("A", "")]).reset_index()
320+
tm.assert_frame_equal(result, df)
321+
322+
# with additional (unnamed) index level
323+
idx_col = DataFrame(
324+
[[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")])
325+
)
326+
expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1)
327+
result = df.set_index([("B", "b")], append=True).reset_index()
328+
tm.assert_frame_equal(result, expected)
329+
330+
# with index name which is a too long tuple...
331+
msg = "Item must have length equal to number of levels."
332+
with pytest.raises(ValueError, match=msg):
333+
df.rename_axis([("C", "c", "i")]).reset_index()
334+
335+
# or too short...
336+
levels = [["A", "a", ""], ["B", "b", "i"]]
337+
df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
338+
idx_col = DataFrame(
339+
[[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")])
340+
)
341+
expected = pd.concat([idx_col, df2], axis=1)
342+
result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii")
343+
tm.assert_frame_equal(result, expected)
344+
345+
# ... which is incompatible with col_fill=None
346+
with pytest.raises(
347+
ValueError,
348+
match=(
349+
"col_fill=None is incompatible with "
350+
r"incomplete column name \('C', 'c'\)"
351+
),
352+
):
353+
df2.rename_axis([("C", "c")]).reset_index(col_fill=None)
354+
355+
# with col_level != 0
356+
result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C")
357+
tm.assert_frame_equal(result, expected)
358+
359+
def test_reset_index_datetime(self, tz_naive_fixture):
360+
# GH#3950
361+
tz = tz_naive_fixture
362+
idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
363+
idx2 = Index(range(5), name="idx2", dtype="int64")
364+
idx = MultiIndex.from_arrays([idx1, idx2])
365+
df = DataFrame(
366+
{"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
367+
index=idx,
368+
)
369+
370+
expected = DataFrame(
371+
{
372+
"idx1": [
373+
datetime(2011, 1, 1),
374+
datetime(2011, 1, 2),
375+
datetime(2011, 1, 3),
376+
datetime(2011, 1, 4),
377+
datetime(2011, 1, 5),
378+
],
379+
"idx2": np.arange(5, dtype="int64"),
380+
"a": np.arange(5, dtype="int64"),
381+
"b": ["A", "B", "C", "D", "E"],
382+
},
383+
columns=["idx1", "idx2", "a", "b"],
384+
)
385+
expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))
386+
387+
tm.assert_frame_equal(df.reset_index(), expected)
388+
389+
idx3 = pd.date_range(
390+
"1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
391+
)
392+
idx = MultiIndex.from_arrays([idx1, idx2, idx3])
393+
df = DataFrame(
394+
{"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
395+
index=idx,
396+
)
397+
398+
expected = DataFrame(
399+
{
400+
"idx1": [
401+
datetime(2011, 1, 1),
402+
datetime(2011, 1, 2),
403+
datetime(2011, 1, 3),
404+
datetime(2011, 1, 4),
405+
datetime(2011, 1, 5),
406+
],
407+
"idx2": np.arange(5, dtype="int64"),
408+
"idx3": [
409+
datetime(2012, 1, 1),
410+
datetime(2012, 2, 1),
411+
datetime(2012, 3, 1),
412+
datetime(2012, 4, 1),
413+
datetime(2012, 5, 1),
414+
],
415+
"a": np.arange(5, dtype="int64"),
416+
"b": ["A", "B", "C", "D", "E"],
417+
},
418+
columns=["idx1", "idx2", "idx3", "a", "b"],
419+
)
420+
expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))
421+
expected["idx3"] = expected["idx3"].apply(
422+
lambda d: Timestamp(d, tz="Europe/Paris")
423+
)
424+
tm.assert_frame_equal(df.reset_index(), expected)
425+
426+
# GH#7793
427+
idx = MultiIndex.from_product(
428+
[["a", "b"], pd.date_range("20130101", periods=3, tz=tz)]
429+
)
430+
df = DataFrame(
431+
np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx
432+
)
433+
434+
expected = DataFrame(
435+
{
436+
"level_0": "a a a b b b".split(),
437+
"level_1": [
438+
datetime(2013, 1, 1),
439+
datetime(2013, 1, 2),
440+
datetime(2013, 1, 3),
441+
]
442+
* 2,
443+
"a": np.arange(6, dtype="int64"),
444+
},
445+
columns=["level_0", "level_1", "a"],
446+
)
447+
expected["level_1"] = expected["level_1"].apply(
448+
lambda d: Timestamp(d, freq="D", tz=tz)
449+
)
450+
result = df.reset_index()
451+
tm.assert_frame_equal(result, expected)
452+
453+
def test_reset_index_period(self):
454+
# GH#7746
455+
idx = MultiIndex.from_product(
456+
[pd.period_range("20130101", periods=3, freq="M"), list("abc")],
457+
names=["month", "feature"],
458+
)
459+
460+
df = DataFrame(
461+
np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"]
462+
)
463+
expected = DataFrame(
464+
{
465+
"month": (
466+
[pd.Period("2013-01", freq="M")] * 3
467+
+ [pd.Period("2013-02", freq="M")] * 3
468+
+ [pd.Period("2013-03", freq="M")] * 3
469+
),
470+
"feature": ["a", "b", "c"] * 3,
471+
"a": np.arange(9, dtype="int64"),
472+
},
473+
columns=["month", "feature", "a"],
474+
)
475+
result = df.reset_index()
476+
tm.assert_frame_equal(result, expected)
477+
478+
def test_reset_index_delevel_infer_dtype(self):
479+
tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1]))
480+
index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
481+
df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
482+
deleveled = df.reset_index()
483+
assert is_integer_dtype(deleveled["prm1"])
484+
assert is_float_dtype(deleveled["prm2"])
485+
486+
def test_reset_index_with_drop(
487+
self, multiindex_year_month_day_dataframe_random_data
488+
):
489+
ymd = multiindex_year_month_day_dataframe_random_data
490+
491+
deleveled = ymd.reset_index(drop=True)
492+
assert len(deleveled.columns) == len(ymd.columns)
493+
assert deleveled.index.name == ymd.index.name
494+
304495

305496
@pytest.mark.parametrize(
306497
"array, dtype",

pandas/tests/frame/methods/test_set_index.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@
1717

1818

1919
class TestSetIndex:
20+
def test_set_index_multiindex(self):
21+
# segfault in GH#3308
22+
d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
23+
df = DataFrame(d)
24+
tuples = [(0, 1), (0, 2), (1, 2)]
25+
df["tuples"] = tuples
26+
27+
index = MultiIndex.from_tuples(df["tuples"])
28+
# it works!
29+
df.set_index(index)
30+
2031
def test_set_index_empty_column(self):
2132
# GH#1971
2233
df = DataFrame(

pandas/tests/frame/methods/test_sort_index.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import CategoricalDtype, DataFrame, Index, IntervalIndex, MultiIndex, Series
5+
from pandas import (
6+
CategoricalDtype,
7+
DataFrame,
8+
Index,
9+
IntervalIndex,
10+
MultiIndex,
11+
Series,
12+
Timestamp,
13+
)
614
import pandas._testing as tm
715

816

@@ -668,6 +676,66 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data):
668676
result = frame.sort_index()
669677
assert result.index.names == frame.index.names
670678

679+
@pytest.mark.parametrize(
680+
"gen,extra",
681+
[
682+
([1.0, 3.0, 2.0, 5.0], 4.0),
683+
([1, 3, 2, 5], 4),
684+
(
685+
[
686+
Timestamp("20130101"),
687+
Timestamp("20130103"),
688+
Timestamp("20130102"),
689+
Timestamp("20130105"),
690+
],
691+
Timestamp("20130104"),
692+
),
693+
(["1one", "3one", "2one", "5one"], "4one"),
694+
],
695+
)
696+
def test_sort_index_multilevel_repr_8017(self, gen, extra):
697+
698+
np.random.seed(0)
699+
data = np.random.randn(3, 4)
700+
701+
columns = MultiIndex.from_tuples([("red", i) for i in gen])
702+
df = DataFrame(data, index=list("def"), columns=columns)
703+
df2 = pd.concat(
704+
[
705+
df,
706+
DataFrame(
707+
"world",
708+
index=list("def"),
709+
columns=MultiIndex.from_tuples([("red", extra)]),
710+
),
711+
],
712+
axis=1,
713+
)
714+
715+
# check that the repr is good
716+
# make sure that we have a correct sparsified repr
717+
# e.g. only 1 header of read
718+
assert str(df2).splitlines()[0].split() == ["red"]
719+
720+
# GH 8017
721+
# sorting fails after columns added
722+
723+
# construct single-dtype then sort
724+
result = df.copy().sort_index(axis=1)
725+
expected = df.iloc[:, [0, 2, 1, 3]]
726+
tm.assert_frame_equal(result, expected)
727+
728+
result = df2.sort_index(axis=1)
729+
expected = df2.iloc[:, [0, 2, 1, 4, 3]]
730+
tm.assert_frame_equal(result, expected)
731+
732+
# setitem then sort
733+
result = df.copy()
734+
result[("red", extra)] = "world"
735+
736+
result = result.sort_index(axis=1)
737+
tm.assert_frame_equal(result, expected)
738+
671739

672740
class TestDataFrameSortIndexKey:
673741
def test_sort_multi_index_key(self):

0 commit comments

Comments
 (0)