From c39534f1ed91cbe8a86d947e10dad8024722f555 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 23 Feb 2020 16:33:26 -0600 Subject: [PATCH 1/5] Split / parametrize --- .../arrays/categorical/test_constructors.py | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index d5537359d6948..bdffa8730b225 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -470,9 +470,7 @@ def test_construction_with_null(self, klass, nulls_fixture): tm.assert_categorical_equal(result, expected) - def test_from_codes(self): - - # too few categories + def test_from_codes_too_few_categories(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " with pytest.raises(ValueError, match=msg): @@ -480,22 +478,23 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) - # no int codes + def test_from_codes_non_int_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) - # no unique categories + def test_from_codes_non_unique_categories(self): with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) - # NaN categories included + def test_from_codes_nan_cat_included(self): with pytest.raises(ValueError, match="Categorial categories cannot be null"): Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) - # too negative + def test_from_codes_too_negative(self): dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" with pytest.raises(ValueError, match=msg): @@ -503,6 +502,8 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) + def test_from_codes(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) tm.assert_categorical_equal(exp, res) @@ -510,21 +511,18 @@ def test_from_codes(self): res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res) - def test_from_codes_with_categorical_categories(self): + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_categorical_categories(self, klass): # GH17884 expected = Categorical(["a", "b"], categories=["a", "b", "c"]) - result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"])) - tm.assert_categorical_equal(result, expected) - - result = Categorical.from_codes( - [0, 1], categories=CategoricalIndex(["a", "b", "c"]) - ) + result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) - # non-unique Categorical still raises + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_non_unique_categorical_categories(self, klass): with pytest.raises(ValueError, match="Categorical categories must be unique"): - Categorical.from_codes([0, 1], Categorical(["a", "b", "a"])) + Categorical.from_codes([0, 1], klass(["a", "b", "a"])) def test_from_codes_with_nan_code(self): # GH21767 From b6207b43c79569897cd41d61d66dd9e59d9c711e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 23 Feb 2020 16:40:04 -0600 Subject: [PATCH 2/5] Dedupe some code --- .../arrays/categorical/test_constructors.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index bdffa8730b225..22591e45d36ca 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -533,24 +533,15 @@ def test_from_codes_with_nan_code(self): with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) - def test_from_codes_with_float(self): + @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) + def test_from_codes_with_float(self, codes): # GH21767 - codes = [1.0, 2.0, 0] # integer, but in float dtype dtype = CategoricalDtype(categories=["a", "b", "c"]) - # empty codes should not raise for floats - Categorical.from_codes([], dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype=dtype) - - codes = [1.1, 2.0, 0] # non-integer - with pytest.raises(ValueError, match="codes need to be array-like integers"): + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype.categories) - with pytest.raises(ValueError, match="codes need to be array-like integers"): + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype=dtype) def test_from_codes_with_dtype_raises(self): From 2483e0261dedf40dda44ecbf322f4790fa698166 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 23 Feb 2020 16:42:45 -0600 Subject: [PATCH 3/5] Parametrize --- .../arrays/categorical/test_constructors.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 22591e45d36ca..f35bf8c903a51 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self): result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) - def test_constructor_invariant(self): - # GH 14190 - vals = [ + @pytest.mark.parametrize( + "values", + [ np.array([1.0, 1.2, 1.8, np.nan]), np.array([1, 2, 3], dtype="int64"), ["a", "b", "c", np.nan], @@ -366,11 +366,13 @@ def test_constructor_invariant(self): Timestamp("2014-01-02", tz="US/Eastern"), NaT, ], - ] - for val in vals: - c = Categorical(val) - c2 = Categorical(c) - tm.assert_categorical_equal(c, c2) + ], + ) + def test_constructor_invariant(self, values): + # GH 14190 + c = Categorical(values) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) @pytest.mark.parametrize("ordered", [True, False]) def test_constructor_with_dtype(self, ordered): From 4bb8ebdf92a1f227efdaab2c5229e79c360d3bc8 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 11:10:49 -0600 Subject: [PATCH 4/5] Add back from_codes for empty --- pandas/tests/arrays/categorical/test_constructors.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index f35bf8c903a51..09e16c9c48251 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -472,6 +472,13 @@ def test_construction_with_null(self, klass, nulls_fixture): tm.assert_categorical_equal(result, expected) + def test_from_codes_empty(self): + cat = ["a", "b", "c"] + result = Categorical.from_codes([], categories=cat) + expected = Categorical([], categories=cat) + + tm.assert_categorical_equal(result, expected) + def test_from_codes_too_few_categories(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " From accfdc6e7d7738a609cbd20033b751df65e92d6e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 24 Feb 2020 13:17:21 -0600 Subject: [PATCH 5/5] Add comment --- pandas/tests/arrays/categorical/test_constructors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 09e16c9c48251..c6b4c4904735c 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -545,6 +545,7 @@ def test_from_codes_with_nan_code(self): @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) def test_from_codes_with_float(self, codes): # GH21767 + # float codes should raise even if values are equal to integers dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = "codes need to be array-like integers"