From 5b3c4c5391e312c193878a65a9130d230da8f0d7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 4 Apr 2020 15:00:02 -0500 Subject: [PATCH 1/3] BUG: Fix bug when replacing categorical value with self --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/categorical.py | 3 ++- pandas/tests/arrays/categorical/test_algos.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8bff34dbdadad..9c3392ca1bb3b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -293,6 +293,7 @@ Categorical - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) - Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`) +- Bug where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 55c42f59f865e..de19a37bb515c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2463,7 +2463,8 @@ def replace(self, to_replace, value, inplace: bool = False): if new_value in cat.categories: value_index = categories.index(new_value) cat._codes[cat._codes == index] = value_index - cat.remove_categories(replace_value, inplace=True) + if new_value != replace_value: + cat.remove_categories(replace_value, inplace=True) else: categories[index] = new_value cat.rename_categories(categories, inplace=True) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 10c454f7c479a..1fbae3089043c 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -64,6 +64,7 @@ def test_isin_cats(): [ ("b", "c", ["a", "c"], "Categorical.categories are different"), ("c", "d", ["a", "b"], None), + ("a", "a", ["a", "b"], None), ("b", None, ["a", None], "Categorical.categories length are different"), ], ) From 696eea95bc583571f8f0bf876b5763620b2b0458 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 5 Apr 2020 14:57:08 -0500 Subject: [PATCH 2/3] Test comment --- pandas/tests/arrays/categorical/test_algos.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 1fbae3089043c..325fa476d70e6 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -64,6 +64,7 @@ def test_isin_cats(): [ ("b", "c", ["a", "c"], "Categorical.categories are different"), ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 ("a", "a", ["a", "b"], None), ("b", None, ["a", None], "Categorical.categories length are different"), ], From 9be0d1bd78ae3d622a4656f14bf6612b2108d36e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 5 Apr 2020 15:01:27 -0500 Subject: [PATCH 3/3] Check earlier --- pandas/core/arrays/categorical.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index aef1510f2bb1d..c9b8db28e0cf6 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2447,6 +2447,8 @@ def replace(self, to_replace, value, inplace: bool = False): # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame for replace_value, new_value in replace_dict.items(): + if new_value == replace_value: + continue if replace_value in cat.categories: if isna(new_value): cat.remove_categories(replace_value, inplace=True) @@ -2456,8 +2458,7 @@ def replace(self, to_replace, value, inplace: bool = False): if new_value in cat.categories: value_index = categories.index(new_value) cat._codes[cat._codes == index] = value_index - if new_value != replace_value: - cat.remove_categories(replace_value, inplace=True) + cat.remove_categories(replace_value, inplace=True) else: categories[index] = new_value cat.rename_categories(categories, inplace=True)