Skip to content

Commit 02f2c42

Browse files
committed
ENH MultiIndex columns with melt
1 parent 164b1ce commit 02f2c42

File tree

5 files changed

+72
-18
lines changed

5 files changed

+72
-18
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ pandas 0.12
7777
to specify custom column names of the returned DataFrame (:issue:`3649`),
7878
thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name``
7979
is not None, then this will be used as the ``var_name`` (:issue:`4144`).
80+
Also support for MultiIndex columns.
8081
- clipboard functions use pyperclip (no dependencies on Windows, alternative
8182
dependencies offered for Linux) (:issue:`3837`).
8283
- Plotting functions now raise a ``TypeError`` before trying to plot anything

pandas/core/index.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1653,7 +1653,9 @@ def get_level_values(self, level):
16531653
num = self._get_level_number(level)
16541654
unique_vals = self.levels[num] # .values
16551655
labels = self.labels[num]
1656-
return unique_vals.take(labels)
1656+
values = unique_vals.take(labels)
1657+
values.name = self.names[num]
1658+
return values
16571659

16581660
def format(self, space=2, sparsify=None, adjoin=True, names=False,
16591661
na_rep='NaN', formatter=None):

pandas/core/reshape.py

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -617,24 +617,42 @@ def melt(frame, id_vars=None, value_vars=None,
617617
618618
Examples
619619
--------
620+
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
621+
'B': {0: 1, 1: 3, 2: 5},
622+
'C': {0: 2, 1: 4, 2: 6}})
623+
620624
>>> df
621-
A B C
622-
a 1 2
623-
b 3 4
624-
c 5 6
625+
A B C
626+
0 a 1 2
627+
1 b 3 4
628+
2 c 5 6
625629
626630
>>> melt(df, id_vars=['A'], value_vars=['B'])
627-
A variable value
628-
a B 1
629-
b B 3
630-
c B 5
631+
A variable value
632+
0 a B 1
633+
1 b B 3
634+
2 c B 5
631635
632636
>>> melt(df, id_vars=['A'], value_vars=['B'],
633637
... var_name='myVarname', value_name='myValname')
634-
A myVarname myValname
635-
a B 1
636-
b B 3
637-
c B 5
638+
A myVarname myValname
639+
0 a B 1
640+
1 b B 3
641+
2 c B 5
642+
643+
>>> df.columns = [list('ABC'), list('DEF')]
644+
645+
>>> melt(df, col_level=0, id_vars=['A'], value_vars=['B'])
646+
A variable value
647+
0 a B 1
648+
1 b B 3
649+
2 c B 5
650+
651+
>>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')])
652+
(A, D) variable_0 variable_1 value
653+
0 a B E 1
654+
1 b B E 3
655+
2 c B E 5
638656
639657
"""
640658
# TODO: what about the existing index?
@@ -653,11 +671,17 @@ def melt(frame, id_vars=None, value_vars=None,
653671
else:
654672
frame = frame.copy()
655673

656-
if col_level: # allow list?
674+
if col_level is not None: # allow list or other?
657675
frame.columns = frame.columns.get_level_values(col_level) # frame is a copy
658676

659677
if var_name is None:
660-
var_name = frame.columns.name if frame.columns.name is not None else 'variable'
678+
if isinstance(frame.columns, MultiIndex):
679+
if len(frame.columns.names) == len(set(frame.columns.names)):
680+
var_name = frame.columns.names
681+
else:
682+
var_name = ['variable_%s' % i for i in range(len(frame.columns.names))]
683+
else:
684+
var_name = frame.columns.name if frame.columns.name is not None else 'variable'
661685

662686
N, K = frame.shape
663687
K -= len(id_vars)
@@ -666,11 +690,18 @@ def melt(frame, id_vars=None, value_vars=None,
666690
for col in id_vars:
667691
mdata[col] = np.tile(frame.pop(col).values, K)
668692

669-
mcolumns = id_vars + [var_name, value_name]
693+
if isinstance(var_name, list):
694+
mcolumns = id_vars + var_name + [value_name]
695+
else:
696+
mcolumns = id_vars + [var_name, value_name]
670697

671698
mdata[value_name] = frame.values.ravel('F')
672-
mdata[var_name] = np.asarray(frame.columns).repeat(N)
673-
699+
if isinstance(frame.columns, MultiIndex):
700+
for i, col in enumerate(var_name):
701+
mdata[col] = np.asarray(frame.columns.get_level_values(i)).repeat(N)
702+
else: # assume isinstance(frame.columns, Index):
703+
mdata[var_name] = np.asarray(frame.columns).repeat(N)
704+
674705
return DataFrame(mdata, columns=mcolumns)
675706

676707

pandas/tests/test_index.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,8 @@ def test_get_level_values(self):
10291029
expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux']
10301030
self.assert_(np.array_equal(result, expected))
10311031

1032+
self.assertEquals(result.name, 'first')
1033+
10321034
result = self.index.get_level_values('first')
10331035
expected = self.index.get_level_values(0)
10341036
self.assert_(np.array_equal(result, expected))

pandas/tests/test_reshape.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import nose
1111

1212
from pandas import DataFrame
13+
import pandas as pd
1314

1415
from numpy import nan
1516
import numpy as np
@@ -30,6 +31,12 @@ def setUp(self):
3031
self.var_name = 'var'
3132
self.value_name = 'val'
3233

34+
self.df1 = pd.DataFrame([[ 1.067683, -1.110463, 0.20867 ],
35+
[-1.321405, 0.368915, -1.055342],
36+
[-0.807333, 0.08298 , -0.873361]])
37+
self.df1.columns = [list('ABC'), list('abc')]
38+
self.df1.columns.names = ['CAP', 'low']
39+
3340
def test_default_col_names(self):
3441
result = melt(self.df)
3542
self.assertEqual(result.columns.tolist(), ['variable', 'value'])
@@ -128,6 +135,17 @@ def test_custom_var_and_value_name(self):
128135
result20 = melt(self.df)
129136
self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
130137

138+
def test_col_level(self):
139+
res1 = melt(self.df1, col_level=0)
140+
res2 = melt(self.df1, col_level='CAP')
141+
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
142+
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
143+
144+
def test_multiindex(self):
145+
res = pd.melt(self.df1)
146+
self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value'])
147+
148+
131149
class TestConvertDummies(unittest.TestCase):
132150
def test_convert_dummies(self):
133151
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',

0 commit comments

Comments
 (0)