diff --git a/doc/source/api.rst b/doc/source/api.rst index b708e35f3b6e1..57ae089e463c8 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -560,6 +560,7 @@ strings and apply several methods to it. These can be acccessed like Series.str.strip Series.str.swapcase Series.str.title + Series.str.translate Series.str.upper Series.str.wrap Series.str.zfill diff --git a/doc/source/text.rst b/doc/source/text.rst index 4ec041d19ce1b..810e3e0146f9f 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -273,6 +273,7 @@ Method Summary :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize`` :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase`` :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize`` + :meth:`~Series.str.translate`,Equivalent to ``str.translate`` :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum`` :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha`` :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit`` diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index a51a8ebc04535..08a2946279a98 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -189,13 +189,13 @@ String Methods Enhancements :ref:`Continuing from v0.16.0 `, following enhancements are performed to make string operation easier. -- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`) +- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`) ================ =============== =============== =============== ================ .. .. Methods .. .. ================ =============== =============== =============== ================ ``capitalize()`` ``swapcase()`` ``normalize()`` ``partition()`` ``rpartition()`` - ``index()`` ``rindex()`` + ``index()`` ``rindex()`` ``translate()`` ================ =============== =============== =============== ================ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index d92d164acdd4b..a25879e61b580 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -890,6 +890,44 @@ def str_wrap(arr, width, **kwargs): return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr) +def str_translate(arr, table, deletechars=None): + """ + Map all characters in the string through the given mapping table. + Equivalent to standard :meth:`str.translate`. Note that the optional + argument deletechars is only valid if you are using python 2. For python 3, + character deletion should be specified via the table argument. + + Parameters + ---------- + table : dict (python 3), str or None (python 2) + In python 3, table is a mapping of Unicode ordinals to Unicode ordinals, + strings, or None. Unmapped characters are left untouched. Characters + mapped to None are deleted. :meth:`str.maketrans` is a helper function + for making translation tables. + In python 2, table is either a string of length 256 or None. If the + table argument is None, no translation is applied and the operation + simply removes the characters in deletechars. :func:`string.maketrans` + is a helper function for making translation tables. + deletechars : str, optional (python 2) + A string of characters to delete. This argument is only valid + in python 2. + + Returns + ------- + translated : Series/Index of objects + """ + if deletechars is None: + f = lambda x: x.translate(table) + else: + from pandas import compat + if compat.PY3: + raise ValueError("deletechars is not a valid argument for " + "str.translate in python 3. You should simply " + "specify character deletions in the table argument") + f = lambda x: x.translate(table, deletechars) + return _na_map(f, arr) + + def str_get(arr, i): """ Extract element from lists, tuples, or strings in each element in the @@ -1283,6 +1321,11 @@ def get_dummies(self, sep='|'): result = str_get_dummies(self.series, sep) return self._wrap_result(result) + @copy(str_translate) + def translate(self, table, deletechars=None): + result = str_translate(self.series, table, deletechars) + return self._wrap_result(result) + count = _pat_wrapper(str_count, flags=True) startswith = _pat_wrapper(str_startswith, na=True) endswith = _pat_wrapper(str_endswith, na=True) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 8cd8ac9f66a1f..9011e6c64b097 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -700,6 +700,12 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty.str.capitalize()) tm.assert_series_equal(empty_str, empty.str.swapcase()) tm.assert_series_equal(empty_str, empty.str.normalize('NFC')) + if compat.PY3: + table = str.maketrans('a', 'b') + else: + import string + table = string.maketrans('a', 'b') + tm.assert_series_equal(empty_str, empty.str.translate(table)) def test_empty_str_methods_to_frame(self): empty_str = empty = Series(dtype=str) @@ -1039,6 +1045,37 @@ def test_pad_fillchar(self): with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): result = values.str.pad(5, fillchar=5) + def test_translate(self): + for klass in [Series, Index]: + s = klass(['abcdefg', 'abcc', 'cdddfg', 'cdefggg']) + if not compat.PY3: + import string + table = string.maketrans('abc', 'cde') + else: + table = str.maketrans('abc', 'cde') + result = s.str.translate(table) + expected = klass(['cdedefg', 'cdee', 'edddfg', 'edefggg']) + tm.assert_array_equal(result, expected) + + # use of deletechars is python 2 only + if not compat.PY3: + result = s.str.translate(table, deletechars='fg') + expected = klass(['cdede', 'cdee', 'eddd', 'ede']) + tm.assert_array_equal(result, expected) + + result = s.str.translate(None, deletechars='fg') + expected = klass(['abcde', 'abcc', 'cddd', 'cde']) + tm.assert_array_equal(result, expected) + else: + with tm.assertRaisesRegexp(ValueError, "deletechars is not a valid argument"): + result = s.str.translate(table, deletechars='fg') + + # Series with non-string values + s = Series(['a', 'b', 'c', 1.2]) + expected = Series(['c', 'd', 'e', np.nan]) + result = s.str.translate(table) + tm.assert_array_equal(result, expected) + def test_center_ljust_rjust(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])