@@ -786,6 +786,82 @@ def lreshape(data, groups, dropna=True, label=None):
786
786
787
787
return DataFrame (mdata , columns = id_cols + pivot_cols )
788
788
789
+ def wide_to_long (df , stubnames , i , j ):
790
+ """
791
+ Wide panel to long format. Less flexible but more user-friendly than melt.
792
+
793
+ Parameters
794
+ ----------
795
+ df : DataFrame
796
+ The wide-format DataFrame
797
+ stubnames : list
798
+ A list of stub names. The wide format variables are assumed to
799
+ start with the stub names.
800
+ i : str
801
+ The name of the id variable.
802
+ j : str
803
+ The name of the subobservation variable.
804
+
805
+ Returns
806
+ -------
807
+ DataFrame
808
+ A DataFrame that contains each stub name as a variable as well as
809
+ variables for i and j.
810
+
811
+ Examples
812
+ --------
813
+ >>> import pandas as pd
814
+ >>> import numpy as np
815
+ >>> np.random.seed(123)
816
+ >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
817
+ ... "A1980" : {0 : "d", 1 : "e", 2 : "f"},
818
+ ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
819
+ ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
820
+ ... "X" : dict(zip(range(3), np.random.randn(3)))
821
+ ... })
822
+ >>> df["id"] = df.index
823
+ >>> df
824
+ A1970 A1980 B1970 B1980 X
825
+ 0 a d 2.5 3.2 -1.085631
826
+ 1 b e 1.2 1.3 0.997345
827
+ 2 c f 0.7 0.1 0.282978
828
+ >>> wide_to_long(df, ["A", "B"], i="id", j="year")
829
+ X A B
830
+ id year
831
+ 0 1970 -1.085631 a 2.5
832
+ 1 1970 0.997345 b 1.2
833
+ 2 1970 0.282978 c 0.7
834
+ 0 1980 -1.085631 d 3.2
835
+ 1 1980 0.997345 e 1.3
836
+ 2 1980 0.282978 f 0.1
837
+
838
+ Notes
839
+ -----
840
+ All extra variables are treated as extra id variables. This simply uses
841
+ `pandas.melt` under the hood, but is hard-coded to "do the right thing"
842
+ in a typicaly case.
843
+ """
844
+ def get_var_names (df , regex ):
845
+ return df .filter (regex = regex ).columns .tolist ()
846
+
847
+ def melt_stub (df , stub , i , j ):
848
+ varnames = get_var_names (df , "^" + stub )
849
+ newdf = melt (df , id_vars = i , value_vars = varnames ,
850
+ value_name = stub , var_name = j )
851
+ newdf [j ] = newdf [j ].str .replace (stub , "" ).astype (int )
852
+ return newdf
853
+
854
+ id_vars = get_var_names (df , "^(?!%s)" % "|" .join (stubnames ))
855
+ if i not in id_vars :
856
+ id_vars += [i ]
857
+
858
+ stub = stubnames .pop (0 )
859
+ newdf = melt_stub (df , stub , id_vars , j )
860
+
861
+ for stub in stubnames :
862
+ new = melt_stub (df , stub , id_vars , j )
863
+ newdf = newdf .merge (new , how = "outer" , on = id_vars + [j ], copy = False )
864
+ return newdf .set_index ([i , j ])
789
865
790
866
def convert_dummies (data , cat_variables , prefix_sep = '_' ):
791
867
"""
0 commit comments