Skip to content

TST/REF: collect tests by method #37342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import pandas.util._test_decorators as td

import pandas as pd
from pandas import DataFrame
from pandas import DataFrame, Series
import pandas._testing as tm
from pandas.core import ops
from pandas.core.indexes.api import Index, MultiIndex
Expand Down Expand Up @@ -529,6 +529,23 @@ def series_with_simple_index(index):
return _create_series(index)


@pytest.fixture
def series_with_multilevel_index():
"""
Fixture with a Series with a 2-level MultiIndex.
"""
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
data = np.random.randn(8)
ser = Series(data, index=index)
ser[3] = np.NaN
return ser


_narrow_dtypes = [
np.float16,
np.float32,
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/methods/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@


class TestDataFrameCount:
def test_count_multiindex(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data

frame = frame.copy()
frame.index.names = ["a", "b"]

result = frame.count(level="b")
expected = frame.count(level=1)
tm.assert_frame_equal(result, expected, check_names=False)

result = frame.count(level="a")
expected = frame.count(level=0)
tm.assert_frame_equal(result, expected, check_names=False)

msg = "Level x not found"
with pytest.raises(KeyError, match=msg):
frame.count(level="x")

def test_count(self):
# corner case
frame = DataFrame()
Expand Down
191 changes: 191 additions & 0 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from datetime import datetime
from itertools import product

import numpy as np
import pytest

from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -301,6 +304,194 @@ def test_reset_index_range(self):
)
tm.assert_frame_equal(result, expected)

def test_reset_index_multiindex_columns(self):
levels = [["A", ""], ["B", "b"]]
df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
result = df[["B"]].rename_axis("A").reset_index()
tm.assert_frame_equal(result, df)

# GH#16120: already existing column
msg = r"cannot insert \('A', ''\), already exists"
with pytest.raises(ValueError, match=msg):
df.rename_axis("A").reset_index()

# GH#16164: multiindex (tuple) full key
result = df.set_index([("A", "")]).reset_index()
tm.assert_frame_equal(result, df)

# with additional (unnamed) index level
idx_col = DataFrame(
[[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")])
)
expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1)
result = df.set_index([("B", "b")], append=True).reset_index()
tm.assert_frame_equal(result, expected)

# with index name which is a too long tuple...
msg = "Item must have length equal to number of levels."
with pytest.raises(ValueError, match=msg):
df.rename_axis([("C", "c", "i")]).reset_index()

# or too short...
levels = [["A", "a", ""], ["B", "b", "i"]]
df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
idx_col = DataFrame(
[[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")])
)
expected = pd.concat([idx_col, df2], axis=1)
result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii")
tm.assert_frame_equal(result, expected)

# ... which is incompatible with col_fill=None
with pytest.raises(
ValueError,
match=(
"col_fill=None is incompatible with "
r"incomplete column name \('C', 'c'\)"
),
):
df2.rename_axis([("C", "c")]).reset_index(col_fill=None)

# with col_level != 0
result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C")
tm.assert_frame_equal(result, expected)

def test_reset_index_datetime(self, tz_naive_fixture):
# GH#3950
tz = tz_naive_fixture
idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
idx2 = Index(range(5), name="idx2", dtype="int64")
idx = MultiIndex.from_arrays([idx1, idx2])
df = DataFrame(
{"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
index=idx,
)

expected = DataFrame(
{
"idx1": [
datetime(2011, 1, 1),
datetime(2011, 1, 2),
datetime(2011, 1, 3),
datetime(2011, 1, 4),
datetime(2011, 1, 5),
],
"idx2": np.arange(5, dtype="int64"),
"a": np.arange(5, dtype="int64"),
"b": ["A", "B", "C", "D", "E"],
},
columns=["idx1", "idx2", "a", "b"],
)
expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))

tm.assert_frame_equal(df.reset_index(), expected)

idx3 = pd.date_range(
"1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
)
idx = MultiIndex.from_arrays([idx1, idx2, idx3])
df = DataFrame(
{"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
index=idx,
)

expected = DataFrame(
{
"idx1": [
datetime(2011, 1, 1),
datetime(2011, 1, 2),
datetime(2011, 1, 3),
datetime(2011, 1, 4),
datetime(2011, 1, 5),
],
"idx2": np.arange(5, dtype="int64"),
"idx3": [
datetime(2012, 1, 1),
datetime(2012, 2, 1),
datetime(2012, 3, 1),
datetime(2012, 4, 1),
datetime(2012, 5, 1),
],
"a": np.arange(5, dtype="int64"),
"b": ["A", "B", "C", "D", "E"],
},
columns=["idx1", "idx2", "idx3", "a", "b"],
)
expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))
expected["idx3"] = expected["idx3"].apply(
lambda d: Timestamp(d, tz="Europe/Paris")
)
tm.assert_frame_equal(df.reset_index(), expected)

# GH#7793
idx = MultiIndex.from_product(
[["a", "b"], pd.date_range("20130101", periods=3, tz=tz)]
)
df = DataFrame(
np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx
)

expected = DataFrame(
{
"level_0": "a a a b b b".split(),
"level_1": [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
]
* 2,
"a": np.arange(6, dtype="int64"),
},
columns=["level_0", "level_1", "a"],
)
expected["level_1"] = expected["level_1"].apply(
lambda d: Timestamp(d, freq="D", tz=tz)
)
result = df.reset_index()
tm.assert_frame_equal(result, expected)

def test_reset_index_period(self):
# GH#7746
idx = MultiIndex.from_product(
[pd.period_range("20130101", periods=3, freq="M"), list("abc")],
names=["month", "feature"],
)

df = DataFrame(
np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"]
)
expected = DataFrame(
{
"month": (
[pd.Period("2013-01", freq="M")] * 3
+ [pd.Period("2013-02", freq="M")] * 3
+ [pd.Period("2013-03", freq="M")] * 3
),
"feature": ["a", "b", "c"] * 3,
"a": np.arange(9, dtype="int64"),
},
columns=["month", "feature", "a"],
)
result = df.reset_index()
tm.assert_frame_equal(result, expected)

def test_reset_index_delevel_infer_dtype(self):
tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1]))
index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
deleveled = df.reset_index()
assert is_integer_dtype(deleveled["prm1"])
assert is_float_dtype(deleveled["prm2"])

def test_reset_index_with_drop(
self, multiindex_year_month_day_dataframe_random_data
):
ymd = multiindex_year_month_day_dataframe_random_data

deleveled = ymd.reset_index(drop=True)
assert len(deleveled.columns) == len(ymd.columns)
assert deleveled.index.name == ymd.index.name


@pytest.mark.parametrize(
"array, dtype",
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@


class TestSetIndex:
def test_set_index_multiindex(self):
# segfault in GH#3308
d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
df = DataFrame(d)
tuples = [(0, 1), (0, 2), (1, 2)]
df["tuples"] = tuples

index = MultiIndex.from_tuples(df["tuples"])
# it works!
df.set_index(index)

def test_set_index_empty_column(self):
# GH#1971
df = DataFrame(
Expand Down
70 changes: 69 additions & 1 deletion pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
import pytest

import pandas as pd
from pandas import CategoricalDtype, DataFrame, Index, IntervalIndex, MultiIndex, Series
from pandas import (
CategoricalDtype,
DataFrame,
Index,
IntervalIndex,
MultiIndex,
Series,
Timestamp,
)
import pandas._testing as tm


Expand Down Expand Up @@ -668,6 +676,66 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data):
result = frame.sort_index()
assert result.index.names == frame.index.names

@pytest.mark.parametrize(
"gen,extra",
[
([1.0, 3.0, 2.0, 5.0], 4.0),
([1, 3, 2, 5], 4),
(
[
Timestamp("20130101"),
Timestamp("20130103"),
Timestamp("20130102"),
Timestamp("20130105"),
],
Timestamp("20130104"),
),
(["1one", "3one", "2one", "5one"], "4one"),
],
)
def test_sort_index_multilevel_repr_8017(self, gen, extra):

np.random.seed(0)
data = np.random.randn(3, 4)

columns = MultiIndex.from_tuples([("red", i) for i in gen])
df = DataFrame(data, index=list("def"), columns=columns)
df2 = pd.concat(
[
df,
DataFrame(
"world",
index=list("def"),
columns=MultiIndex.from_tuples([("red", extra)]),
),
],
axis=1,
)

# check that the repr is good
# make sure that we have a correct sparsified repr
# e.g. only 1 header of read
assert str(df2).splitlines()[0].split() == ["red"]

# GH 8017
# sorting fails after columns added

# construct single-dtype then sort
result = df.copy().sort_index(axis=1)
expected = df.iloc[:, [0, 2, 1, 3]]
tm.assert_frame_equal(result, expected)

result = df2.sort_index(axis=1)
expected = df2.iloc[:, [0, 2, 1, 4, 3]]
tm.assert_frame_equal(result, expected)

# setitem then sort
result = df.copy()
result[("red", extra)] = "world"

result = result.sort_index(axis=1)
tm.assert_frame_equal(result, expected)


class TestDataFrameSortIndexKey:
def test_sort_multi_index_key(self):
Expand Down
Loading