5
5
6
6
import numpy as np
7
7
8
+ import six
9
+
8
10
from pandas .core .series import Series
9
11
from pandas .core .frame import DataFrame
10
12
11
13
from pandas .core .categorical import Categorical
12
14
from pandas .core .common import (notnull , _ensure_platform_int , _maybe_promote ,
13
- _maybe_upcast , isnull )
15
+ isnull )
14
16
from pandas .core .groupby import (get_group_index , _compress_group_index ,
15
17
decons_group_index )
16
18
import pandas .core .common as com
17
19
import pandas .algos as algos
18
- from pandas import lib
19
20
20
- from pandas .core .index import MultiIndex , Index
21
+ from pandas .core .index import MultiIndex
21
22
22
23
23
24
class ReshapeError (Exception ):
@@ -35,21 +36,26 @@ class _Unstacker(object):
35
36
36
37
Examples
37
38
--------
39
+ >>> import pandas as pd
40
+ >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
41
+ ... ('two', 'a'), ('two', 'b')])
42
+ >>> s = pd.Series(np.arange(1.0, 5.0), index=index)
38
43
>>> s
39
- one a 1.
40
- one b 2.
41
- two a 3.
42
- two b 4.
44
+ one a 1
45
+ b 2
46
+ two a 3
47
+ b 4
48
+ dtype: float64
43
49
44
50
>>> s.unstack(level=-1)
45
51
a b
46
- one 1. 2.
47
- two 3. 4.
52
+ one 1 2
53
+ two 3 4
48
54
49
55
>>> s.unstack(level=0)
50
56
one two
51
- a 1. 2.
52
- b 3. 4.
57
+ a 1 2
58
+ b 3 4
53
59
54
60
Returns
55
61
-------
@@ -159,7 +165,7 @@ def get_result(self):
159
165
values [j ] = orig_values [i ]
160
166
else :
161
167
index = index .take (self .unique_groups )
162
-
168
+
163
169
return DataFrame (values , index = index , columns = columns )
164
170
165
171
def get_new_values (self ):
@@ -601,7 +607,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
601
607
602
608
603
609
def melt (frame , id_vars = None , value_vars = None ,
604
- var_name = None , value_name = 'value' ):
610
+ var_name = None , value_name = 'value' , col_level = None ):
605
611
"""
606
612
"Unpivots" a DataFrame from wide format to long format, optionally leaving
607
613
id variables set
@@ -613,27 +619,47 @@ def melt(frame, id_vars=None, value_vars=None,
613
619
value_vars : tuple, list, or ndarray
614
620
var_name : scalar, if None uses frame.column.name or 'variable'
615
621
value_name : scalar, default 'value'
622
+ col_level : scalar, if columns are a MultiIndex then use this level to melt
616
623
617
624
Examples
618
625
--------
626
+ >>> import pandas as pd
627
+ >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
628
+ ... 'B': {0: 1, 1: 3, 2: 5},
629
+ ... 'C': {0: 2, 1: 4, 2: 6}})
630
+
619
631
>>> df
620
- A B C
621
- a 1 2
622
- b 3 4
623
- c 5 6
632
+ A B C
633
+ 0 a 1 2
634
+ 1 b 3 4
635
+ 2 c 5 6
624
636
625
637
>>> melt(df, id_vars=['A'], value_vars=['B'])
626
- A variable value
627
- a B 1
628
- b B 3
629
- c B 5
630
-
638
+ A variable value
639
+ 0 a B 1
640
+ 1 b B 3
641
+ 2 c B 5
642
+
631
643
>>> melt(df, id_vars=['A'], value_vars=['B'],
632
644
... var_name='myVarname', value_name='myValname')
633
- A myVarname myValname
634
- a B 1
635
- b B 3
636
- c B 5
645
+ A myVarname myValname
646
+ 0 a B 1
647
+ 1 b B 3
648
+ 2 c B 5
649
+
650
+ >>> df.columns = [list('ABC'), list('DEF')]
651
+
652
+ >>> melt(df, col_level=0, id_vars=['A'], value_vars=['B'])
653
+ A variable value
654
+ 0 a B 1
655
+ 1 b B 3
656
+ 2 c B 5
657
+
658
+ >>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')])
659
+ (A, D) variable_0 variable_1 value
660
+ 0 a B E 1
661
+ 1 b B E 3
662
+ 2 c B E 5
637
663
638
664
"""
639
665
# TODO: what about the existing index?
@@ -652,8 +678,21 @@ def melt(frame, id_vars=None, value_vars=None,
652
678
else :
653
679
frame = frame .copy ()
654
680
681
+ if col_level is not None : # allow list or other?
682
+ frame .columns = frame .columns .get_level_values (col_level ) # frame is a copy
683
+
655
684
if var_name is None :
656
- var_name = frame .columns .name if frame .columns .name is not None else 'variable'
685
+ if isinstance (frame .columns , MultiIndex ):
686
+ if len (frame .columns .names ) == len (set (frame .columns .names )):
687
+ var_name = frame .columns .names
688
+ else :
689
+ var_name = ['variable_%s' % i for i in
690
+ xrange (len (frame .columns .names ))]
691
+ else :
692
+ var_name = [frame .columns .name if frame .columns .name is not None
693
+ else 'variable' ]
694
+ if isinstance (var_name , six .string_types ):
695
+ var_name = [var_name ]
657
696
658
697
N , K = frame .shape
659
698
K -= len (id_vars )
@@ -662,11 +701,13 @@ def melt(frame, id_vars=None, value_vars=None,
662
701
for col in id_vars :
663
702
mdata [col ] = np .tile (frame .pop (col ).values , K )
664
703
665
- mcolumns = id_vars + [ var_name , value_name ]
704
+ mcolumns = id_vars + var_name + [ value_name ]
666
705
667
706
mdata [value_name ] = frame .values .ravel ('F' )
668
- mdata [var_name ] = np .asarray (frame .columns ).repeat (N )
669
-
707
+ for i , col in enumerate (var_name ):
708
+ # asanyarray will keep the columns as an Index
709
+ mdata [col ] = np .asanyarray (frame .columns .get_level_values (i )).repeat (N )
710
+
670
711
return DataFrame (mdata , columns = mcolumns )
671
712
672
713
@@ -683,13 +724,16 @@ def lreshape(data, groups, dropna=True, label=None):
683
724
684
725
Examples
685
726
--------
727
+ >>> import pandas as pd
728
+ >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
729
+ ... 'team': ['Red Sox', 'Yankees'],
730
+ ... 'year1': [2007, 2008], 'year2': [2008, 2008]})
686
731
>>> data
687
732
hr1 hr2 team year1 year2
688
733
0 514 545 Red Sox 2007 2008
689
734
1 573 526 Yankees 2007 2008
690
735
691
- >>> pd.lreshape(data, {'year': ['year1', 'year2'],
692
- 'hr': ['hr1', 'hr2']})
736
+ >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
693
737
team hr year
694
738
0 Red Sox 514 2007
695
739
1 Yankees 573 2007
0 commit comments