From fa57c0de8e53fa55c913287e9e65f8ab51773571 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 15 Apr 2014 20:14:54 -0400 Subject: [PATCH] BUG: properly rename single group match in Series.str.extract() --- doc/source/release.rst | 2 ++ pandas/core/strings.py | 9 ++++++++- pandas/tests/test_strings.py | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 2fea35a887f34..fb4f06ac03ff9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -374,6 +374,8 @@ Bug Fixes - Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`) - Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`) - Bug in arithmetic operations affecting to NaT (:issue:`6873`) +- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single + group match wasn't renamed to the group name pandas 0.13.1 ------------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6add1767a05d6..7bcc534a34a1f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -387,6 +387,13 @@ def f(x): return _na_map(f, arr, na) +def _get_single_group_name(rx): + try: + return list(rx.groupindex.keys()).pop() + except IndexError: + return None + + def str_extract(arr, pat, flags=0): """ Find groups in each string using passed regular expression @@ -452,7 +459,7 @@ def f(x): return empty_row if regex.groups == 1: result = Series([f(val)[0] for val in arr], - name=regex.groupindex.get(1), + name=_get_single_group_name(regex), index=arr.index) else: names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 2721edcc89e59..412f2c62e55f2 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -556,6 +556,11 @@ def test_extract(self): exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number']) tm.assert_frame_equal(result, exp) + # single group renames series properly + s = Series(['A1', 'A2']) + result = s.str.extract(r'(?PA)\d') + tm.assert_equal(result.name, 'uno') + # GH6348 # not passing index to the extractor def check_index(index):