diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 618d7454c67fe..9d559acfa59e7 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -802,6 +802,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) +- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index f4b96c8f1ca49..26221143c0cdf 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -409,14 +409,14 @@ def melt_stub(df, stub, i, j, value_vars, sep): return newdf.set_index(i + [j]) - if any(col in stubnames for col in df.columns): - raise ValueError("stubname can't be identical to a column name") - if not is_list_like(stubnames): stubnames = [stubnames] else: stubnames = list(stubnames) + if any(col in stubnames for col in df.columns): + raise ValueError("stubname can't be identical to a column name") + if not is_list_like(i): i = [i] else: diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 81570de7586de..e83a2cb483de7 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -640,3 +640,24 @@ def test_float_suffix(self): result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected) + + def test_col_substring_of_stubname(self): + # GH22468 + # Don't raise ValueError when a column name is a substring + # of a stubname that's been passed as a string + wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, + 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, + 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, + 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, + 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67} + } + wide_df = pd.DataFrame.from_dict(wide_data) + expected = pd.wide_to_long(wide_df, + stubnames=['PA'], + i=['node_id', 'A'], + j='time') + result = pd.wide_to_long(wide_df, + stubnames='PA', + i=['node_id', 'A'], + j='time') + tm.assert_frame_equal(result, expected)