diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index fe712d8f0710f..eec400facffae 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -187,3 +187,4 @@ Bug Fixes - Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`) - Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`) - Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`) +- Bug in ``remove_unused_categories`` when having ``NaN`` values (:issue:`11599`). diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index cd4038d2b142b..be4b3f59bead4 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -817,6 +817,9 @@ def remove_unused_categories(self, inplace=False): """ cat = self if inplace else self.copy() _used = sorted(np.unique(cat._codes)) + if _used[0] == -1: + _used = _used[1:] + new_categories = cat.categories.take(_ensure_platform_int(_used)) from pandas.core.index import _ensure_index diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 0ed84763aa9b6..3da4ad62b45af 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -844,6 +844,11 @@ def test_remove_unused_categories(self): self.assert_numpy_array_equal(c.categories, exp_categories_dropped) self.assertIsNone(res) + # with NaN values (GH11599) + c = Categorical(["a","b","c",np.nan], categories=["a","b","c","d","e"]) + res = c.remove_unused_categories() + self.assert_numpy_array_equal(res.categories, np.array(["a","b","c"])) + self.assert_numpy_array_equal(c.categories, exp_categories_all) def test_nan_handling(self):