From efae4c717d61e80bce501d38bdc2da531e267561 Mon Sep 17 00:00:00 2001 From: csmcallister Date: Thu, 23 Aug 2018 17:14:59 -0500 Subject: [PATCH] BUG:reorder type check/conversion so wide_to_long handles str arg for stubnames. GH22468 DOC:Updating whatsnew (#22468) TST:test bug fix and old functionality (#22468) CLN:complying with PEP8 isssues (#22468) TST: Moved wide_to_long test to test_melt.py and fixed linting issues (#22468) CLN: Adjusted indentation for linting (#22468) CLN: Adjusted spacing for linting (#22468) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/reshape/melt.py | 6 +++--- pandas/tests/reshape/test_melt.py | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3a44b0260153c..244f6883a2f97 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -790,6 +790,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) +- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index f4b96c8f1ca49..26221143c0cdf 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -409,14 +409,14 @@ def melt_stub(df, stub, i, j, value_vars, sep): return newdf.set_index(i + [j]) - if any(col in stubnames for col in df.columns): - raise ValueError("stubname can't be identical to a column name") - if not is_list_like(stubnames): stubnames = [stubnames] else: stubnames = list(stubnames) + if any(col in stubnames for col in df.columns): + raise ValueError("stubname can't be identical to a column name") + if not is_list_like(i): i = [i] else: diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 81570de7586de..e83a2cb483de7 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -640,3 +640,24 @@ def test_float_suffix(self): result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected) + + def test_col_substring_of_stubname(self): + # GH22468 + # Don't raise ValueError when a column name is a substring + # of a stubname that's been passed as a string + wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, + 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, + 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, + 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, + 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67} + } + wide_df = pd.DataFrame.from_dict(wide_data) + expected = pd.wide_to_long(wide_df, + stubnames=['PA'], + i=['node_id', 'A'], + j='time') + result = pd.wide_to_long(wide_df, + stubnames='PA', + i=['node_id', 'A'], + j='time') + tm.assert_frame_equal(result, expected)