diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 65361886436d6..ff5e550ebd97f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -206,6 +206,10 @@ by using the :func:`Categorical.rename_categories` method: s.cat.categories = ["Group %s" % g for g in s.cat.categories] s s.cat.rename_categories([1,2,3]) + s + # You can also pass a dict-like object to map the renaming + s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'}) + s .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a353544a4283..f440ca6e82828 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -115,6 +115,7 @@ Other Enhancements - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`). - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) +- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index ddca93f07ad5e..6f7eafe43dbbb 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -25,7 +25,8 @@ is_categorical_dtype, is_integer_dtype, is_bool, is_list_like, is_sequence, - is_scalar) + is_scalar, + is_dict_like) from pandas.core.common import is_null_slice, _maybe_box_datetimelike from pandas.core.algorithms import factorize, take_1d, unique1d @@ -792,19 +793,20 @@ def set_categories(self, new_categories, ordered=None, rename=False, def rename_categories(self, new_categories, inplace=False): """ Renames categories. - The new categories has to be a list-like object. All items must be - unique and the number of items in the new categories must be the same - as the number of items in the old categories. + The new categories can be either a list-like dict-like object. + If it is list-like, all items must be unique and the number of items + in the new categories must be the same as the number of items in the + old categories. Raises ------ ValueError - If the new categories do not have the same number of items than the - current categories or do not validate as categories + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories Parameters ---------- - new_categories : Index-like + new_categories : Index-like or dict-like (>=0.21.0) The renamed categories. inplace : boolean (default: False) Whether or not to rename the categories inplace or return a copy of @@ -824,7 +826,12 @@ def rename_categories(self, new_categories, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - cat.categories = new_categories + + if is_dict_like(new_categories): + cat.categories = [new_categories.get(item, item) + for item in cat.categories] + else: + cat.categories = new_categories if not inplace: return cat diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index c361b430cfd8a..e6fa5d1af55be 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -983,6 +983,40 @@ def test_rename_categories(self): with pytest.raises(ValueError): cat.rename_categories([1, 2]) + def test_rename_categories_dict(self): + # GH 17336 + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + + # Test for inplace + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}, + inplace=True) + assert res is None + tm.assert_index_equal(cat.categories, expected) + + # Test for dicts of smaller length + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'c': 3}) + + expected = Index([1, 'b', 3, 'd']) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with bigger length + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, + 'd': 4, 'e': 5, 'f': 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with no items from old categories + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'f': 1, 'g': 3}) + + expected = Index(['a', 'b', 'c', 'd']) + tm.assert_index_equal(res.categories, expected) + @pytest.mark.parametrize('codes, old, new, expected', [ ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),