Skip to content

CLN/REF: Split up / clean Categorical constructor tests #32211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 25, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 36 additions & 37 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self):
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)

def test_constructor_invariant(self):
# GH 14190
vals = [
@pytest.mark.parametrize(
"values",
[
np.array([1.0, 1.2, 1.8, np.nan]),
np.array([1, 2, 3], dtype="int64"),
["a", "b", "c", np.nan],
Expand All @@ -366,11 +366,13 @@ def test_constructor_invariant(self):
Timestamp("2014-01-02", tz="US/Eastern"),
NaT,
],
]
for val in vals:
c = Categorical(val)
c2 = Categorical(c)
tm.assert_categorical_equal(c, c2)
],
)
def test_constructor_invariant(self, values):
# GH 14190
c = Categorical(values)
c2 = Categorical(c)
tm.assert_categorical_equal(c, c2)

@pytest.mark.parametrize("ordered", [True, False])
def test_constructor_with_dtype(self, ordered):
Expand Down Expand Up @@ -470,61 +472,66 @@ def test_construction_with_null(self, klass, nulls_fixture):

tm.assert_categorical_equal(result, expected)

def test_from_codes(self):
def test_from_codes_empty(self):
cat = ["a", "b", "c"]
result = Categorical.from_codes([], categories=cat)
expected = Categorical([], categories=cat)

# too few categories
tm.assert_categorical_equal(result, expected)

def test_from_codes_too_few_categories(self):
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be between "
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], dtype=dtype)

# no int codes
def test_from_codes_non_int_codes(self):
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], dtype=dtype)

# no unique categories
def test_from_codes_non_unique_categories(self):
with pytest.raises(ValueError, match="Categorical categories must be unique"):
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

# NaN categories included
def test_from_codes_nan_cat_included(self):
with pytest.raises(ValueError, match="Categorial categories cannot be null"):
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

# too negative
def test_from_codes_too_negative(self):
dtype = CategoricalDtype(categories=["a", "b", "c"])
msg = r"codes need to be between -1 and len\(categories\)-1"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], dtype=dtype)

def test_from_codes(self):
dtype = CategoricalDtype(categories=["a", "b", "c"])
exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)

res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)

def test_from_codes_with_categorical_categories(self):
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
def test_from_codes_with_categorical_categories(self, klass):
# GH17884
expected = Categorical(["a", "b"], categories=["a", "b", "c"])

result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"]))
result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"]))
tm.assert_categorical_equal(result, expected)

result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(["a", "b", "c"])
)
tm.assert_categorical_equal(result, expected)

# non-unique Categorical still raises
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
def test_from_codes_with_non_unique_categorical_categories(self, klass):
with pytest.raises(ValueError, match="Categorical categories must be unique"):
Categorical.from_codes([0, 1], Categorical(["a", "b", "a"]))
Categorical.from_codes([0, 1], klass(["a", "b", "a"]))

def test_from_codes_with_nan_code(self):
# GH21767
Expand All @@ -535,24 +542,16 @@ def test_from_codes_with_nan_code(self):
with pytest.raises(ValueError, match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_float(self):
@pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]])
def test_from_codes_with_float(self, codes):
# GH21767
codes = [1.0, 2.0, 0] # integer, but in float dtype
# float codes should raise even if values are equal to integers
dtype = CategoricalDtype(categories=["a", "b", "c"])

# empty codes should not raise for floats
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you retain this comment and the one on L552 describing the choice of parametrizations

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean the inline comments from L540 and L552? This comment (L543) I find a little confusing because we're testing an empty list rather than an empty float array

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

Categorical.from_codes([], dtype.categories)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did this get lost or moved?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had removed that because it seemed duplicative given that we have a test_constructor_empty test. Looking now I see that it isn't using from_codes so I can add another test for that specifically.


with pytest.raises(ValueError, match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype.categories)

with pytest.raises(ValueError, match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError, match="codes need to be array-like integers"):
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(codes, dtype.categories)
with pytest.raises(ValueError, match="codes need to be array-like integers"):
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_dtype_raises(self):
Expand Down