Skip to content

Commit 4c12848

Browse files
committed
Merge pull request #3649 from hoechenberger/master
ENH: Allow for custom variable/value column names when melt()'ing
2 parents 79cda50 + f36d7a8 commit 4c12848

File tree

4 files changed

+92
-17
lines changed

4 files changed

+92
-17
lines changed

doc/source/reshaping.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,9 @@ Reshaping by Melt
200200
The ``melt`` function found in ``pandas.core.reshape`` is useful to massage a
201201
DataFrame into a format where one or more columns are identifier variables,
202202
while all other columns, considered measured variables, are "pivoted" to the
203-
row axis, leaving just two non-identifier columns, "variable" and "value".
203+
row axis, leaving just two non-identifier columns, "variable" and "value". The
204+
names of those columns can be customized by supplying the ``var_name`` and
205+
``value_name`` parameters.
204206

205207
For instance,
206208

@@ -212,6 +214,7 @@ For instance,
212214
'weight' : [130, 150]})
213215
cheese
214216
melt(cheese, id_vars=['first', 'last'])
217+
melt(cheese, id_vars=['first', 'last'], var_name='quantity')
215218
216219
Combining with stats and GroupBy
217220
--------------------------------

doc/source/v0.11.1.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ Enhancements
140140
import os
141141
os.remove('mi.csv')
142142

143+
- ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name``
144+
to specify custom column names of the returned DataFrame.
145+
143146
Bug Fixes
144147
~~~~~~~~~
145148

pandas/core/reshape.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -600,16 +600,19 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
600600
return result
601601

602602

603-
def melt(frame, id_vars=None, value_vars=None):
603+
def melt(frame, id_vars=None, value_vars=None,
604+
var_name='variable', value_name='value'):
604605
"""
605606
"Unpivots" a DataFrame from wide format to long format, optionally leaving
606607
id variables set
607608
608609
Parameters
609610
----------
610611
frame : DataFrame
611-
id_vars :
612-
value_vars :
612+
id_vars : tuple, list, or ndarray
613+
value_vars : tuple, list, or ndarray
614+
var_name : scalar
615+
value_name : scalar
613616
614617
Examples
615618
--------
@@ -621,9 +624,16 @@ def melt(frame, id_vars=None, value_vars=None):
621624
622625
>>> melt(df, id_vars=['A'], value_vars=['B'])
623626
A variable value
624-
a B 1
625-
b B 3
626-
c B 5
627+
a B 1
628+
b B 3
629+
c B 5
630+
631+
>>> melt(df, id_vars=['A'], value_vars=['B'],
632+
... var_name='myVarname', value_name='myValname')
633+
A myVarname myValname
634+
a B 1
635+
b B 3
636+
c B 5
627637
"""
628638
# TODO: what about the existing index?
629639
if id_vars is not None:
@@ -648,11 +658,11 @@ def melt(frame, id_vars=None, value_vars=None):
648658
for col in id_vars:
649659
mdata[col] = np.tile(frame.pop(col).values, K)
650660

651-
mcolumns = id_vars + ['variable', 'value']
661+
mcolumns = id_vars + [var_name, value_name]
652662

653-
mdata['value'] = frame.values.ravel('F')
654-
655-
mdata['variable'] = np.asarray(frame.columns).repeat(N)
663+
mdata[value_name] = frame.values.ravel('F')
664+
mdata[var_name] = np.asarray(frame.columns).repeat(N)
665+
656666
return DataFrame(mdata, columns=mcolumns)
657667

658668

pandas/tests/test_reshape.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,73 @@ def test_melt():
2525
df['id1'] = (df['A'] > 0).astype(int)
2626
df['id2'] = (df['B'] > 0).astype(int)
2727

28-
molten1 = melt(df)
29-
molten2 = melt(df, id_vars=['id1'])
30-
molten3 = melt(df, id_vars=['id1', 'id2'])
31-
molten4 = melt(df, id_vars=['id1', 'id2'],
28+
var_name = 'var'
29+
value_name = 'val'
30+
31+
# Default column names
32+
result = melt(df)
33+
result1 = melt(df, id_vars=['id1'])
34+
result2 = melt(df, id_vars=['id1', 'id2'])
35+
result3 = melt(df, id_vars=['id1', 'id2'],
3236
value_vars='A')
33-
molten5 = melt(df, id_vars=['id1', 'id2'],
37+
result4 = melt(df, id_vars=['id1', 'id2'],
3438
value_vars=['A', 'B'])
35-
39+
40+
expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
41+
'id2': df['id2'].tolist() * 2,
42+
'variable': ['A']*10 + ['B']*10,
43+
'value': df['A'].tolist() + df['B'].tolist()},
44+
columns=['id1', 'id2', 'variable', 'value'])
45+
tm.assert_frame_equal(result4, expected4)
46+
47+
# Supply custom name for the 'variable' column
48+
result5 = melt(df, var_name=var_name)
49+
result6 = melt(df, id_vars=['id1'], var_name=var_name)
50+
result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
51+
result8 = melt(df, id_vars=['id1', 'id2'],
52+
value_vars='A', var_name=var_name)
53+
result9 = melt(df, id_vars=['id1', 'id2'],
54+
value_vars=['A', 'B'], var_name=var_name)
55+
56+
expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
57+
'id2': df['id2'].tolist() * 2,
58+
var_name: ['A']*10 + ['B']*10,
59+
'value': df['A'].tolist() + df['B'].tolist()},
60+
columns=['id1', 'id2', var_name, 'value'])
61+
tm.assert_frame_equal(result9, expected9)
62+
63+
# Supply custom name for the 'value' column
64+
result10 = melt(df, value_name=value_name)
65+
result11 = melt(df, id_vars=['id1'], value_name=value_name)
66+
result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
67+
result13 = melt(df, id_vars=['id1', 'id2'],
68+
value_vars='A', value_name=value_name)
69+
result14 = melt(df, id_vars=['id1', 'id2'],
70+
value_vars=['A', 'B'], value_name=value_name)
71+
72+
expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
73+
'id2': df['id2'].tolist() * 2,
74+
'variable': ['A']*10 + ['B']*10,
75+
value_name: df['A'].tolist() + df['B'].tolist()},
76+
columns=['id1', 'id2', 'variable', value_name])
77+
tm.assert_frame_equal(result14, expected14)
78+
79+
# Supply custom names for the 'variable' and 'value' columns
80+
result15 = melt(df, var_name=var_name, value_name=value_name)
81+
result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
82+
result17 = melt(df, id_vars=['id1', 'id2'],
83+
var_name=var_name, value_name=value_name)
84+
result18 = melt(df, id_vars=['id1', 'id2'],
85+
value_vars='A', var_name=var_name, value_name=value_name)
86+
result19 = melt(df, id_vars=['id1', 'id2'],
87+
value_vars=['A', 'B'], var_name=var_name, value_name=value_name)
88+
89+
expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
90+
'id2': df['id2'].tolist() * 2,
91+
var_name: ['A']*10 + ['B']*10,
92+
value_name: df['A'].tolist() + df['B'].tolist()},
93+
columns=['id1', 'id2', var_name, value_name])
94+
tm.assert_frame_equal(result19, expected19)
3695

3796
def test_convert_dummies():
3897
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',

0 commit comments

Comments
 (0)