-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN/REF: Split up / clean Categorical constructor tests #32211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c39534f
b6207b4
2483e02
4bb8ebd
accfdc6
a7b0281
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self): | |
result = Categorical(Series(idx)) | ||
tm.assert_index_equal(result.categories, idx) | ||
|
||
def test_constructor_invariant(self): | ||
# GH 14190 | ||
vals = [ | ||
@pytest.mark.parametrize( | ||
"values", | ||
[ | ||
np.array([1.0, 1.2, 1.8, np.nan]), | ||
np.array([1, 2, 3], dtype="int64"), | ||
["a", "b", "c", np.nan], | ||
|
@@ -366,11 +366,13 @@ def test_constructor_invariant(self): | |
Timestamp("2014-01-02", tz="US/Eastern"), | ||
NaT, | ||
], | ||
] | ||
for val in vals: | ||
c = Categorical(val) | ||
c2 = Categorical(c) | ||
tm.assert_categorical_equal(c, c2) | ||
], | ||
) | ||
def test_constructor_invariant(self, values): | ||
# GH 14190 | ||
c = Categorical(values) | ||
c2 = Categorical(c) | ||
tm.assert_categorical_equal(c, c2) | ||
|
||
@pytest.mark.parametrize("ordered", [True, False]) | ||
def test_constructor_with_dtype(self, ordered): | ||
|
@@ -470,61 +472,66 @@ def test_construction_with_null(self, klass, nulls_fixture): | |
|
||
tm.assert_categorical_equal(result, expected) | ||
|
||
def test_from_codes(self): | ||
def test_from_codes_empty(self): | ||
cat = ["a", "b", "c"] | ||
result = Categorical.from_codes([], categories=cat) | ||
expected = Categorical([], categories=cat) | ||
|
||
# too few categories | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
def test_from_codes_too_few_categories(self): | ||
dtype = CategoricalDtype(categories=[1, 2]) | ||
msg = "codes need to be between " | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes([1, 2], categories=dtype.categories) | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes([1, 2], dtype=dtype) | ||
|
||
# no int codes | ||
def test_from_codes_non_int_codes(self): | ||
dtype = CategoricalDtype(categories=[1, 2]) | ||
msg = "codes need to be array-like integers" | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes(["a"], categories=dtype.categories) | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes(["a"], dtype=dtype) | ||
|
||
# no unique categories | ||
def test_from_codes_non_unique_categories(self): | ||
with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) | ||
|
||
# NaN categories included | ||
def test_from_codes_nan_cat_included(self): | ||
with pytest.raises(ValueError, match="Categorial categories cannot be null"): | ||
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) | ||
|
||
# too negative | ||
def test_from_codes_too_negative(self): | ||
dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||
msg = r"codes need to be between -1 and len\(categories\)-1" | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes([-2, 1, 2], categories=dtype.categories) | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes([-2, 1, 2], dtype=dtype) | ||
|
||
def test_from_codes(self): | ||
dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||
exp = Categorical(["a", "b", "c"], ordered=False) | ||
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) | ||
tm.assert_categorical_equal(exp, res) | ||
|
||
res = Categorical.from_codes([0, 1, 2], dtype=dtype) | ||
tm.assert_categorical_equal(exp, res) | ||
|
||
def test_from_codes_with_categorical_categories(self): | ||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||
def test_from_codes_with_categorical_categories(self, klass): | ||
# GH17884 | ||
expected = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||
|
||
result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"])) | ||
result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
result = Categorical.from_codes( | ||
[0, 1], categories=CategoricalIndex(["a", "b", "c"]) | ||
) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
# non-unique Categorical still raises | ||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||
def test_from_codes_with_non_unique_categorical_categories(self, klass): | ||
with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||
Categorical.from_codes([0, 1], Categorical(["a", "b", "a"])) | ||
Categorical.from_codes([0, 1], klass(["a", "b", "a"])) | ||
|
||
def test_from_codes_with_nan_code(self): | ||
# GH21767 | ||
|
@@ -535,24 +542,16 @@ def test_from_codes_with_nan_code(self): | |
with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||
Categorical.from_codes(codes, dtype=dtype) | ||
|
||
def test_from_codes_with_float(self): | ||
@pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) | ||
def test_from_codes_with_float(self, codes): | ||
# GH21767 | ||
codes = [1.0, 2.0, 0] # integer, but in float dtype | ||
# float codes should raise even if values are equal to integers | ||
dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||
|
||
# empty codes should not raise for floats | ||
Categorical.from_codes([], dtype.categories) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did this get lost or moved? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had removed that because it seemed duplicative given that we have a |
||
|
||
with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||
Categorical.from_codes(codes, dtype.categories) | ||
|
||
with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||
Categorical.from_codes(codes, dtype=dtype) | ||
|
||
codes = [1.1, 2.0, 0] # non-integer | ||
with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||
msg = "codes need to be array-like integers" | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes(codes, dtype.categories) | ||
with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||
with pytest.raises(ValueError, match=msg): | ||
Categorical.from_codes(codes, dtype=dtype) | ||
|
||
def test_from_codes_with_dtype_raises(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you retain this comment and the one on L552 describing the choice of parametrizations
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean the inline comments from L540 and L552? This comment (L543) I find a little confusing because we're testing an empty list rather than an empty float array
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks