From cd44bf4e907221f9efa9358d14a3f298464bf431 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 10:14:29 -0600 Subject: [PATCH 1/7] Special case __ne__ --- pandas/core/arrays/categorical.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a5048e3aae899..a87ade10c1254 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -103,7 +103,10 @@ def func(self, other): mask = (self._codes == -1) | (other_codes == -1) if mask.any(): # In other series, the leads to False, so do that here too - ret[mask] = False + if opname == "__ne__": + ret[mask & (self._codes == other_codes)] = True + else: + ret[mask] = False return ret if is_scalar(other): From 3c621fb0677651059ddce637cc679ff8bba1ee5f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 12:18:48 -0600 Subject: [PATCH 2/7] Test --- pandas/tests/extension/test_categorical.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 69a97f5c9fe02..624c0eee409e9 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -282,6 +282,15 @@ def _compare_other(self, s, data, op_name, other): with pytest.raises(TypeError, match=msg): op(data, other) + def test_not_equal_with_na(): + # https://github.com/pandas-dev/pandas/issues/32276 + categories = ["a", "b"] + c1 = Categorical([None, "a"], categories=categories) + c2 = Categorical(["a", "b"], categories=categories) + + result = c1 != c2 + + assert result.all() class TestParsing(base.BaseParsingTests): pass From b3adfe2f42ac5a7e65de911fd8e88ea6096b9432 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 12:19:08 -0600 Subject: [PATCH 3/7] Doc --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2b64b85863def..4ac4ff6c748b5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -127,7 +127,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) -- +- Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) - Datetimelike From 70f7ebbd52951e7b7ec8b593e760ec04a5596728 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 12:23:57 -0600 Subject: [PATCH 4/7] Reference self --- pandas/tests/extension/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 624c0eee409e9..47e97cc3c70e7 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -282,7 +282,7 @@ def _compare_other(self, s, data, op_name, other): with pytest.raises(TypeError, match=msg): op(data, other) - def test_not_equal_with_na(): + def test_not_equal_with_na(self): # https://github.com/pandas-dev/pandas/issues/32276 categories = ["a", "b"] c1 = Categorical([None, "a"], categories=categories) From 0087a53462bcbb863f870c2e5fb69fb9a6fa1913 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 12:52:05 -0600 Subject: [PATCH 5/7] Blacken --- pandas/tests/extension/test_categorical.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 47e97cc3c70e7..995b64f581613 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -292,5 +292,6 @@ def test_not_equal_with_na(self): assert result.all() + class TestParsing(base.BaseParsingTests): pass From 94d623805c3db9bd42eae70b77b0cb15671f856b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 16:37:51 -0600 Subject: [PATCH 6/7] Don't use mask --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a87ade10c1254..6d1d29db10865 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -104,7 +104,7 @@ def func(self, other): if mask.any(): # In other series, the leads to False, so do that here too if opname == "__ne__": - ret[mask & (self._codes == other_codes)] = True + ret[(self._codes == -1) & (other_codes == -1)] = True else: ret[mask] = False return ret From 888f7cf9bdef50e6de068639736475b8595d7883 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 27 Feb 2020 16:39:25 -0600 Subject: [PATCH 7/7] Param over categories --- pandas/tests/extension/test_categorical.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 995b64f581613..059d3453995bd 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -282,11 +282,14 @@ def _compare_other(self, s, data, op_name, other): with pytest.raises(TypeError, match=msg): op(data, other) - def test_not_equal_with_na(self): + @pytest.mark.parametrize( + "categories", + [["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]], + ) + def test_not_equal_with_na(self, categories): # https://github.com/pandas-dev/pandas/issues/32276 - categories = ["a", "b"] - c1 = Categorical([None, "a"], categories=categories) - c2 = Categorical(["a", "b"], categories=categories) + c1 = Categorical.from_codes([-1, 0], categories=categories) + c2 = Categorical.from_codes([0, 1], categories=categories) result = c1 != c2