diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 0579a80aad28e..fcca50d1acdfd 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -139,6 +139,8 @@ Other Enhancements - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) +- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` + can now take a callable as their argument (:issue:`18862`) .. _whatsnew_0220.api_breaking: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 356e76df366b4..f9bd6849c5072 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -844,7 +844,7 @@ def rename_categories(self, new_categories, inplace=False): Parameters ---------- - new_categories : list-like or dict-like + new_categories : list-like, dict-like or callable * list-like: all items must be unique and the number of items in the new categories must match the existing number of categories. @@ -852,7 +852,14 @@ def rename_categories(self, new_categories, inplace=False): * dict-like: specifies a mapping from old categories to new. Categories not contained in the mapping are passed through and extra categories in the mapping are - ignored. *New in version 0.21.0*. + ignored. + + .. versionadded:: 0.21.0 + + * callable : a callable that is called on all items in the old + categories and whose return values comprise the new categories. + + .. versionadded:: 0.22.0 .. warning:: @@ -890,6 +897,12 @@ def rename_categories(self, new_categories, inplace=False): >>> c.rename_categories({'a': 'A', 'c': 'C'}) [A, A, b] Categories (2, object): [A, b] + + You may also provide a callable to create the new categories + + >>> c.rename_categories(lambda x: x.upper()) + [A, A, B] + Categories (2, object): [A, B] """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() @@ -906,6 +919,8 @@ def rename_categories(self, new_categories, inplace=False): if is_dict_like(new_categories): cat.categories = [new_categories.get(item, item) for item in cat.categories] + elif callable(new_categories): + cat.categories = [new_categories(item) for item in cat.categories] else: cat.categories = new_categories if not inplace: diff --git a/pandas/tests/categorical/test_api.py b/pandas/tests/categorical/test_api.py index 7cc0aafaf05b6..12db4a9bea28b 100644 --- a/pandas/tests/categorical/test_api.py +++ b/pandas/tests/categorical/test_api.py @@ -71,9 +71,14 @@ def test_rename_categories(self): exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) - res = cat.rename_categories([1, 2, 3], inplace=True) + + # GH18862 (let rename_categories take callables) + result = cat.rename_categories(lambda x: x.upper()) + expected = Categorical(["A", "B", "C", "A"]) + tm.assert_categorical_equal(result, expected) # and now inplace + res = cat.rename_categories([1, 2, 3], inplace=True) assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 543f59013ff12..f7328a99195b9 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -185,6 +185,11 @@ def test_method_delegation(self): tm.assert_index_equal(result, CategoricalIndex( list('ffggef'), categories=list('efg'))) + # GH18862 (let rename_categories take callables) + result = ci.rename_categories(lambda x: x.upper()) + tm.assert_index_equal(result, CategoricalIndex( + list('AABBCA'), categories=list('CAB'))) + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal(result, CategoricalIndex( diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 617ca2199f588..a2838f803421c 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -588,6 +588,14 @@ def f(): pytest.raises(Exception, f) # right: s.cat.set_categories([4,3,2,1]) + # GH18862 (let Series.cat.rename_categories take callables) + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + result = s.cat.rename_categories(lambda x: x.upper()) + expected = Series(Categorical(["A", "B", "C", "A"], + categories=["A", "B", "C"], + ordered=True)) + tm.assert_series_equal(result, expected) + def test_str_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 from pandas.core.strings import StringMethods