From 416a8698019b203986575b11330bc1a61b617f5d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 7 Feb 2020 15:28:40 -0600 Subject: [PATCH 1/8] Add test --- pandas/tests/arrays/categorical/test_constructors.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 70e1421c8dcf4..1d82d368d9a24 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -560,6 +560,15 @@ def test_from_codes_neither(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1]) + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.astype(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories(self, dtype): cats = ["a", "b"] From 4bd7588e4d7c5a0b3b9cbc97f25375fde5904e32 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 7 Feb 2020 15:28:59 -0600 Subject: [PATCH 2/8] Don't cast EA to object --- pandas/core/arrays/categorical.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d26ff7490e714..5b06b4b7f08bf 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -644,7 +644,11 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): ) raise ValueError(msg) - codes = np.asarray(codes) # #21767 + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + codes = codes.to_numpy(dtype=np.int64, na_value=np.nan) + else: + codes = np.asarray(codes) # #21767 if len(codes) and not is_integer_dtype(codes): raise ValueError("codes need to be array-like integers") From 6c2eddab3baf538ffb456c3acf79714f83b45631 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 7 Feb 2020 15:31:33 -0600 Subject: [PATCH 3/8] Add whatsnew --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 94dc1e0c007ca..25de7330f9f8b 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -25,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) - .. --------------------------------------------------------------------------- From 270199aef44cfb568fa78aab790e5335262c3ba8 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 8 Feb 2020 08:52:19 -0600 Subject: [PATCH 4/8] Update pandas/tests/arrays/categorical/test_constructors.py Co-Authored-By: Joris Van den Bossche --- pandas/tests/arrays/categorical/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 1d82d368d9a24..17d5720db4bc7 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -565,7 +565,7 @@ def test_from_codes_with_nullable_int(self): categories = ["a", "b"] result = Categorical.from_codes(codes, categories=categories) - expected = Categorical.from_codes(codes.astype(int), categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) tm.assert_categorical_equal(result, expected) From 75e25d63003d82e38d29713a8b55624598ce2403 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 8 Feb 2020 09:17:29 -0600 Subject: [PATCH 5/8] No na_value --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5b06b4b7f08bf..eb37183b30b1a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -646,7 +646,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): if is_extension_array_dtype(codes) and is_integer_dtype(codes): # Avoid the implicit conversion of Int to object - codes = codes.to_numpy(dtype=np.int64, na_value=np.nan) + codes = codes.to_numpy(dtype=np.int64) else: codes = np.asarray(codes) # #21767 if len(codes) and not is_integer_dtype(codes): From 6ec804e978d6f665b18e8aecafb0327e3e82a454 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 9 Feb 2020 11:46:43 -0600 Subject: [PATCH 6/8] Remove issue number comment --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index eb37183b30b1a..a1a750b812275 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -648,7 +648,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): # Avoid the implicit conversion of Int to object codes = codes.to_numpy(dtype=np.int64) else: - codes = np.asarray(codes) # #21767 + codes = np.asarray(codes) if len(codes) and not is_integer_dtype(codes): raise ValueError("codes need to be array-like integers") From c5508dcbe38284eb88a37cd8b06ed09949dc51af Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 9 Feb 2020 11:56:29 -0600 Subject: [PATCH 7/8] Add test with NA code --- pandas/tests/arrays/categorical/test_constructors.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 17d5720db4bc7..8c7f5929f78ff 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -569,6 +569,14 @@ def test_from_codes_with_nullable_int(self): tm.assert_categorical_equal(result, expected) + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "[Cc]annot convert" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories(self, dtype): cats = ["a", "b"] From 63b7c1c6ebf328afcffaee2cfeec9051a232b062 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 13:42:32 -0600 Subject: [PATCH 8/8] Raise a better error message --- pandas/core/arrays/categorical.py | 2 ++ pandas/tests/arrays/categorical/test_constructors.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a1a750b812275..0e04354ae7c89 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -646,6 +646,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): if is_extension_array_dtype(codes) and is_integer_dtype(codes): # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") codes = codes.to_numpy(dtype=np.int64) else: codes = np.asarray(codes) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 8c7f5929f78ff..dbd8fd8df67c1 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -573,7 +573,7 @@ def test_from_codes_with_nullable_int_na_raises(self): codes = pd.array([0, None], dtype="Int64") categories = ["a", "b"] - msg = "[Cc]annot convert" + msg = "codes cannot contain NA values" with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, categories=categories)