Skip to content

TST/REF: collect tests by method #37403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pandas/tests/arrays/categorical/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,22 @@ def test_setitem_tuple(self):
cat[1] = cat[0]
assert cat[1] == (0, 1)

def test_setitem_listlike(self):

# GH#9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(
np.random.randint(0, 5, size=150000).astype(np.int8)
).add_categories([-1000])
indexer = np.array([100000]).astype(np.int64)
c[indexer] = -1000

# we are asserting the code result here
# which maps to the -1000 category
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))


class TestCategoricalIndexing:
def test_getitem_slice(self):
Expand Down
103 changes: 102 additions & 1 deletion pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,18 @@
import pytest

import pandas as pd
from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna
from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
Index,
MultiIndex,
Series,
date_range,
isna,
)
import pandas._testing as tm
from pandas.api.types import CategoricalDtype as CDT
import pandas.core.common as com


Expand Down Expand Up @@ -745,3 +755,94 @@ def test_reindex_multi_categorical_time(self):
result = df2.reindex(midx)
expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
tm.assert_frame_equal(result, expected)

def test_reindex_with_categoricalindex(self):
df = DataFrame(
{
"A": np.arange(3, dtype="int64"),
},
index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"),
)

# reindexing
# convert to a regular index
result = df.reindex(["a", "b", "e"])
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
"B"
)
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["a", "b"])
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["e"])
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["d"])
expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

# since we are actually reindexing with a Categorical
# then return a Categorical
cats = list("cabe")

result = df.reindex(Categorical(["a", "e"], categories=cats))
expected = DataFrame(
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(Categorical(["a"], categories=cats))
expected = DataFrame(
{"A": [0], "B": Series(list("a")).astype(CDT(cats))}
).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["a", "b", "e"])
expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
"B"
)
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["a", "b"])
expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

result = df.reindex(["e"])
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

# give back the type of categorical that we received
result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
expected = DataFrame(
{"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you split this test up


result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
expected = DataFrame(
{"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
).set_index("B")
tm.assert_frame_equal(result, expected, check_index_type=True)

df2 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
)
# passed duplicate indexers are not allowed
msg = "cannot reindex from a duplicate axis"
with pytest.raises(ValueError, match=msg):
df2.reindex(["a", "b"])

# args NotImplemented ATM
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
with pytest.raises(NotImplementedError, match=msg.format("method")):
df.reindex(["a"], method="ffill")
with pytest.raises(NotImplementedError, match=msg.format("level")):
df.reindex(["a"], level=1)
with pytest.raises(NotImplementedError, match=msg.format("limit")):
df.reindex(["a"], limit=2)
31 changes: 30 additions & 1 deletion pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from pandas import (
CategoricalDtype,
CategoricalIndex,
DataFrame,
Index,
IntervalIndex,
Expand Down Expand Up @@ -495,7 +496,7 @@ def test_sort_index_categorical_multiindex(self):
columns=["a"],
index=MultiIndex(
levels=[
pd.CategoricalIndex(
CategoricalIndex(
["c", "a", "b"],
categories=["c", "a", "b"],
ordered=True,
Expand Down Expand Up @@ -736,6 +737,34 @@ def test_sort_index_multilevel_repr_8017(self, gen, extra):
result = result.sort_index(axis=1)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"categories",
[
pytest.param(["a", "b", "c"], id="str"),
pytest.param(
[pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)],
id="pd.Interval",
),
],
)
def test_sort_index_with_categories(self, categories):
# GH#23452
df = DataFrame(
{"foo": range(len(categories))},
index=CategoricalIndex(
data=categories, categories=categories, ordered=True
),
)
df.index = df.index.reorder_categories(df.index.categories[::-1])
result = df.sort_index()
expected = DataFrame(
{"foo": reversed(range(len(categories)))},
index=CategoricalIndex(
data=categories[::-1], categories=categories[::-1], ordered=True
),
)
tm.assert_frame_equal(result, expected)


class TestDataFrameSortIndexKey:
def test_sort_multi_index_key(self):
Expand Down
89 changes: 89 additions & 0 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import Categorical, DataFrame, NaT, Timestamp, date_range
import pandas._testing as tm
Expand Down Expand Up @@ -711,3 +713,90 @@ def sorter(key):
)

tm.assert_frame_equal(result, expected)


@pytest.fixture
def df_none():
return DataFrame(
{
"outer": ["a", "a", "a", "b", "b", "b"],
"inner": [1, 2, 2, 2, 1, 1],
"A": np.arange(6, 0, -1),
("B", 5): ["one", "one", "two", "two", "one", "one"],
}
)


@pytest.fixture(params=[["outer"], ["outer", "inner"]])
def df_idx(request, df_none):
levels = request.param
return df_none.set_index(levels)


@pytest.fixture(
params=[
"inner", # index level
["outer"], # list of index level
"A", # column
[("B", 5)], # list of column
["inner", "outer"], # two index levels
[("B", 5), "outer"], # index level and column
["A", ("B", 5)], # Two columns
["inner", "outer"], # two index levels and column
]
)
def sort_names(request):
return request.param


@pytest.fixture(params=[True, False])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be a top-level fixture

def ascending(request):
return request.param


class TestSortValuesLevelAsStr:
def test_sort_index_level_and_column_label(
self, df_none, df_idx, sort_names, ascending
):
# GH#14353

# Get index levels from df_idx
levels = df_idx.index.names

# Compute expected by sorting on columns and the setting index
expected = df_none.sort_values(
by=sort_names, ascending=ascending, axis=0
).set_index(levels)

# Compute result sorting on mix on columns and index levels
result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0)

tm.assert_frame_equal(result, expected)

def test_sort_column_level_and_index_label(
self, df_none, df_idx, sort_names, ascending
):
# GH#14353

# Get levels from df_idx
levels = df_idx.index.names

# Compute expected by sorting on axis=0, setting index levels, and then
# transposing. For some cases this will result in a frame with
# multiple column levels
expected = (
df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
.set_index(levels)
.T
)

# Compute result by transposing and sorting on axis=1.
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)

if len(levels) > 1:
# Accessing multi-level columns that are not lexsorted raises a
# performance warning
with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False):
tm.assert_frame_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
7 changes: 7 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2699,6 +2699,13 @@ def test_frame_ctor_datetime64_column(self):


class TestDataFrameConstructorWithDatetimeTZ:
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
# GH#25843
tz = tz_aware_fixture
result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
expected = DataFrame({"d": [Timestamp("2019")]})
tm.assert_frame_equal(result, expected)

def test_from_dict(self):

# 8260
Expand Down
23 changes: 22 additions & 1 deletion pandas/tests/frame/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, period_range
from pandas import DataFrame, Index, MultiIndex, date_range, period_range
import pandas._testing as tm


Expand Down Expand Up @@ -341,3 +341,24 @@ def test_merge_join_different_levels(self):
with tm.assert_produces_warning(UserWarning):
result = df1.join(df2, on="a")
tm.assert_frame_equal(result, expected)

def test_frame_join_tzaware(self):
test1 = DataFrame(
np.zeros((6, 3)),
index=date_range(
"2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central"
),
)
test2 = DataFrame(
np.zeros((3, 3)),
index=date_range(
"2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"
),
columns=range(3, 6),
)

result = test1.join(test2, how="outer")
expected = test1.index.union(test2.index)

tm.assert_index_equal(result.index, expected)
assert result.index.tz.zone == "US/Central"
Loading