Skip to content

Commit f4246fb

Browse files
committed
Merge pull request #4150 from hayd/melt_multi
ENH: Melt with MultiIndex columns
2 parents 56009bd + 92fdeff commit f4246fb

File tree

5 files changed

+100
-33
lines changed

5 files changed

+100
-33
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ pandas 0.12
7878
to specify custom column names of the returned DataFrame (:issue:`3649`),
7979
thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name``
8080
is not None, then this will be used as the ``var_name`` (:issue:`4144`).
81+
Also support for MultiIndex columns.
8182
- clipboard functions use pyperclip (no dependencies on Windows, alternative
8283
dependencies offered for Linux) (:issue:`3837`).
8384
- Plotting functions now raise a ``TypeError`` before trying to plot anything

pandas/core/index.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1653,7 +1653,9 @@ def get_level_values(self, level):
16531653
num = self._get_level_number(level)
16541654
unique_vals = self.levels[num] # .values
16551655
labels = self.labels[num]
1656-
return unique_vals.take(labels)
1656+
values = unique_vals.take(labels)
1657+
values.name = self.names[num]
1658+
return values
16571659

16581660
def format(self, space=2, sparsify=None, adjoin=True, names=False,
16591661
na_rep='NaN', formatter=None):

pandas/core/reshape.py

Lines changed: 76 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,20 @@
55

66
import numpy as np
77

8+
import six
9+
810
from pandas.core.series import Series
911
from pandas.core.frame import DataFrame
1012

1113
from pandas.core.categorical import Categorical
1214
from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
13-
_maybe_upcast, isnull)
15+
isnull)
1416
from pandas.core.groupby import (get_group_index, _compress_group_index,
1517
decons_group_index)
1618
import pandas.core.common as com
1719
import pandas.algos as algos
18-
from pandas import lib
1920

20-
from pandas.core.index import MultiIndex, Index
21+
from pandas.core.index import MultiIndex
2122

2223

2324
class ReshapeError(Exception):
@@ -35,21 +36,26 @@ class _Unstacker(object):
3536
3637
Examples
3738
--------
39+
>>> import pandas as pd
40+
>>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
41+
... ('two', 'a'), ('two', 'b')])
42+
>>> s = pd.Series(np.arange(1.0, 5.0), index=index)
3843
>>> s
39-
one a 1.
40-
one b 2.
41-
two a 3.
42-
two b 4.
44+
one a 1
45+
b 2
46+
two a 3
47+
b 4
48+
dtype: float64
4349
4450
>>> s.unstack(level=-1)
4551
a b
46-
one 1. 2.
47-
two 3. 4.
52+
one 1 2
53+
two 3 4
4854
4955
>>> s.unstack(level=0)
5056
one two
51-
a 1. 2.
52-
b 3. 4.
57+
a 1 2
58+
b 3 4
5359
5460
Returns
5561
-------
@@ -159,7 +165,7 @@ def get_result(self):
159165
values[j] = orig_values[i]
160166
else:
161167
index = index.take(self.unique_groups)
162-
168+
163169
return DataFrame(values, index=index, columns=columns)
164170

165171
def get_new_values(self):
@@ -601,7 +607,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
601607

602608

603609
def melt(frame, id_vars=None, value_vars=None,
604-
var_name=None, value_name='value'):
610+
var_name=None, value_name='value', col_level=None):
605611
"""
606612
"Unpivots" a DataFrame from wide format to long format, optionally leaving
607613
id variables set
@@ -613,27 +619,47 @@ def melt(frame, id_vars=None, value_vars=None,
613619
value_vars : tuple, list, or ndarray
614620
var_name : scalar, if None uses frame.column.name or 'variable'
615621
value_name : scalar, default 'value'
622+
col_level : scalar, if columns are a MultiIndex then use this level to melt
616623
617624
Examples
618625
--------
626+
>>> import pandas as pd
627+
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
628+
... 'B': {0: 1, 1: 3, 2: 5},
629+
... 'C': {0: 2, 1: 4, 2: 6}})
630+
619631
>>> df
620-
A B C
621-
a 1 2
622-
b 3 4
623-
c 5 6
632+
A B C
633+
0 a 1 2
634+
1 b 3 4
635+
2 c 5 6
624636
625637
>>> melt(df, id_vars=['A'], value_vars=['B'])
626-
A variable value
627-
a B 1
628-
b B 3
629-
c B 5
630-
638+
A variable value
639+
0 a B 1
640+
1 b B 3
641+
2 c B 5
642+
631643
>>> melt(df, id_vars=['A'], value_vars=['B'],
632644
... var_name='myVarname', value_name='myValname')
633-
A myVarname myValname
634-
a B 1
635-
b B 3
636-
c B 5
645+
A myVarname myValname
646+
0 a B 1
647+
1 b B 3
648+
2 c B 5
649+
650+
>>> df.columns = [list('ABC'), list('DEF')]
651+
652+
>>> melt(df, col_level=0, id_vars=['A'], value_vars=['B'])
653+
A variable value
654+
0 a B 1
655+
1 b B 3
656+
2 c B 5
657+
658+
>>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')])
659+
(A, D) variable_0 variable_1 value
660+
0 a B E 1
661+
1 b B E 3
662+
2 c B E 5
637663
638664
"""
639665
# TODO: what about the existing index?
@@ -652,8 +678,21 @@ def melt(frame, id_vars=None, value_vars=None,
652678
else:
653679
frame = frame.copy()
654680

681+
if col_level is not None: # allow list or other?
682+
frame.columns = frame.columns.get_level_values(col_level) # frame is a copy
683+
655684
if var_name is None:
656-
var_name = frame.columns.name if frame.columns.name is not None else 'variable'
685+
if isinstance(frame.columns, MultiIndex):
686+
if len(frame.columns.names) == len(set(frame.columns.names)):
687+
var_name = frame.columns.names
688+
else:
689+
var_name = ['variable_%s' % i for i in
690+
xrange(len(frame.columns.names))]
691+
else:
692+
var_name = [frame.columns.name if frame.columns.name is not None
693+
else 'variable']
694+
if isinstance(var_name, six.string_types):
695+
var_name = [var_name]
657696

658697
N, K = frame.shape
659698
K -= len(id_vars)
@@ -662,11 +701,13 @@ def melt(frame, id_vars=None, value_vars=None,
662701
for col in id_vars:
663702
mdata[col] = np.tile(frame.pop(col).values, K)
664703

665-
mcolumns = id_vars + [var_name, value_name]
704+
mcolumns = id_vars + var_name + [value_name]
666705

667706
mdata[value_name] = frame.values.ravel('F')
668-
mdata[var_name] = np.asarray(frame.columns).repeat(N)
669-
707+
for i, col in enumerate(var_name):
708+
# asanyarray will keep the columns as an Index
709+
mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N)
710+
670711
return DataFrame(mdata, columns=mcolumns)
671712

672713

@@ -683,13 +724,16 @@ def lreshape(data, groups, dropna=True, label=None):
683724
684725
Examples
685726
--------
727+
>>> import pandas as pd
728+
>>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
729+
... 'team': ['Red Sox', 'Yankees'],
730+
... 'year1': [2007, 2008], 'year2': [2008, 2008]})
686731
>>> data
687732
hr1 hr2 team year1 year2
688733
0 514 545 Red Sox 2007 2008
689734
1 573 526 Yankees 2007 2008
690735
691-
>>> pd.lreshape(data, {'year': ['year1', 'year2'],
692-
'hr': ['hr1', 'hr2']})
736+
>>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
693737
team hr year
694738
0 Red Sox 514 2007
695739
1 Yankees 573 2007

pandas/tests/test_index.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,8 @@ def test_get_level_values(self):
10291029
expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux']
10301030
self.assert_(np.array_equal(result, expected))
10311031

1032+
self.assertEquals(result.name, 'first')
1033+
10321034
result = self.index.get_level_values('first')
10331035
expected = self.index.get_level_values(0)
10341036
self.assert_(np.array_equal(result, expected))

pandas/tests/test_reshape.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import nose
1111

1212
from pandas import DataFrame
13+
import pandas as pd
1314

1415
from numpy import nan
1516
import numpy as np
@@ -30,6 +31,12 @@ def setUp(self):
3031
self.var_name = 'var'
3132
self.value_name = 'val'
3233

34+
self.df1 = pd.DataFrame([[ 1.067683, -1.110463, 0.20867 ],
35+
[-1.321405, 0.368915, -1.055342],
36+
[-0.807333, 0.08298 , -0.873361]])
37+
self.df1.columns = [list('ABC'), list('abc')]
38+
self.df1.columns.names = ['CAP', 'low']
39+
3340
def test_default_col_names(self):
3441
result = melt(self.df)
3542
self.assertEqual(result.columns.tolist(), ['variable', 'value'])
@@ -128,6 +135,17 @@ def test_custom_var_and_value_name(self):
128135
result20 = melt(self.df)
129136
self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
130137

138+
def test_col_level(self):
139+
res1 = melt(self.df1, col_level=0)
140+
res2 = melt(self.df1, col_level='CAP')
141+
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
142+
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
143+
144+
def test_multiindex(self):
145+
res = pd.melt(self.df1)
146+
self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value'])
147+
148+
131149
class TestConvertDummies(unittest.TestCase):
132150
def test_convert_dummies(self):
133151
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',

0 commit comments

Comments
 (0)