@@ -42,9 +42,11 @@ class DataFrame(Picklable, Groupable):
42
42
----------
43
43
data : dict
44
44
Mapping of column name --> array or Series/TimeSeries objects
45
- index : array-like
46
- Specific index to use for the Frame, Series will be conformed to this
47
- if you provide it.
45
+ index : array-like, optional
46
+ Specific index to use for the Frame, Series will be conformed
47
+ to this if you provide it. If not input, index will be
48
+ inferred from input Series
49
+ columns : array-like, optional
48
50
49
51
Notes
50
52
-----
@@ -56,12 +58,12 @@ class DataFrame(Picklable, Groupable):
56
58
--------
57
59
DataMatrix: more efficient version of DataFrame for most operations
58
60
59
- Example usage
60
- -------------
61
+ Example
62
+ -------
61
63
>>> d = {'col1' : ts1, 'col2' : ts2}
62
64
>>> df = DataFrame(data=d, index=someIndex)
63
65
"""
64
- def __init__ (self , data = None , index = None ):
66
+ def __init__ (self , data = None , index = None , columns = None ):
65
67
self ._series = {}
66
68
if data is not None and len (data ) > 0 :
67
69
if index is None :
@@ -75,7 +77,7 @@ def __init__(self, data=None, index=None):
75
77
76
78
for k , v in data .iteritems ():
77
79
if isinstance (v , Series ):
78
- # Forces homogoneity and copies data
80
+ # Forces homogeneity and copies data
79
81
self ._series [k ] = v .reindex (self .index )
80
82
else :
81
83
# Copies data and checks length
@@ -169,8 +171,8 @@ def fromDict(cls, inputDict=None, castFloat=True, **kwds):
169
171
170
172
def toDict (self ):
171
173
"""
172
- Simpler pseudo-inverse operation of dictToDataFrame , NaN values will be
173
- included in the resulting dict-tree.
174
+ Simpler pseudo-inverse operation of DataFrame.fromDict , NaN
175
+ values will be included in the resulting dict-tree.
174
176
175
177
Return
176
178
------
@@ -316,9 +318,9 @@ def __setitem__(self, key, value):
316
318
317
319
def __delitem__ (self , key ):
318
320
"""
319
- Delete column from DataFrame (only deletes the reference)
321
+ Delete column from DataFrame
320
322
"""
321
- self ._series . pop ( key , None )
323
+ del self ._series [ key ]
322
324
323
325
def pop (self , item ):
324
326
"""
@@ -611,16 +613,16 @@ def append(self, otherFrame):
611
613
612
614
def asfreq (self , freq , fillMethod = None ):
613
615
"""
614
- Convert all TimeSeries inside to specified frequency using DateOffset
615
- objects. Optionally provide fill method to pad/backfill/interpolate
616
- missing values.
616
+ Convert all TimeSeries inside to specified frequency using
617
+ DateOffset objects. Optionally provide fill method to pad or
618
+ backfill missing values.
617
619
618
620
Parameters
619
621
----------
620
622
offset : DateOffset object, or string in {'WEEKDAY', 'EOM'}
621
623
DateOffset object or subclass (e.g. monthEnd)
622
624
623
- fillMethod : {'backfill', 'pad', 'interpolate', None}
625
+ fillMethod : {'backfill', 'pad', None}
624
626
Method to use for filling holes in new inde
625
627
"""
626
628
if isinstance (freq , datetools .DateOffset ):
@@ -886,38 +888,53 @@ def pivot(self, index=None, columns=None, values=None):
886
888
887
889
return _slow_pivot (self [index ], self [columns ], self [values ])
888
890
889
- def reindex (self , newIndex , fillMethod = None ):
891
+ def reindex (self , index = None , columns = None , fillMethod = None ):
890
892
"""
891
893
Reindex data inside, optionally filling according to some rule.
892
894
893
895
Parameters
894
896
----------
895
- newIndex : array-like
897
+ index : array-like, optional
896
898
preferably an Index object (to avoid duplicating data)
897
- fillMethod : {'backfill', 'pad', 'interpolate', None}
898
- Method to use for filling holes in reindexed DataFrame
899
+ columns : array-like, optional
900
+ fillMethod : {'backfill', 'pad', None}
901
+ Method to use for filling data holes using the index
902
+
903
+ Returns
904
+ -------
905
+ y : same type as calling instance
899
906
"""
900
- if self .index .equals (newIndex ):
907
+ fillMethod = fillMethod .upper () if fillMethod else ''
908
+
909
+ if fillMethod not in ['BACKFILL' , 'PAD' , '' ]:
910
+ raise Exception ("Don't recognize fillMethod: %s" % fillMethod )
911
+
912
+ frame = self
913
+
914
+ if index is not None :
915
+ frame = frame ._reindex_index (index , fillMethod )
916
+
917
+ if columns is not None :
918
+ frame = frame ._reindex_columns (columns )
919
+
920
+ return frame
921
+
922
+ def _reindex_index (self , index , method ):
923
+ if self .index .equals (index ):
901
924
return self .copy ()
902
925
903
- if len (newIndex ) == 0 :
926
+ if len (index ) == 0 :
904
927
return DataFrame (index = NULL_INDEX )
905
928
906
- if not isinstance (newIndex , Index ):
907
- newIndex = Index (newIndex )
929
+ if not isinstance (index , Index ):
930
+ index = Index (index )
908
931
909
932
if len (self .index ) == 0 :
910
- return DataFrame (index = newIndex )
933
+ return DataFrame (index = index )
911
934
912
- oldMap = self .index .indexMap
913
- newMap = newIndex .indexMap
914
-
915
- fillMethod = fillMethod .upper () if fillMethod else ''
916
- if fillMethod not in ['BACKFILL' , 'PAD' , '' ]:
917
- raise Exception ("Don't recognize fillMethod: %s" % fillMethod )
918
-
919
- fillVec , mask = tseries .getFillVec (self .index , newIndex , oldMap ,
920
- newMap , fillMethod )
935
+ fillVec , mask = tseries .getFillVec (self .index , index ,
936
+ self .index .indexMap ,
937
+ index .indexMap , method )
921
938
922
939
# Maybe this is a bit much? Wish I had unit tests...
923
940
typeHierarchy = [
@@ -938,14 +955,26 @@ def reindex(self, newIndex, fillMethod=None):
938
955
newSeries = {}
939
956
for col , series in self .iteritems ():
940
957
series = series .view (np .ndarray )
941
- for type , dest in typeHierarchy :
942
- if issubclass (series .dtype .type , type ):
958
+ for klass , dest in typeHierarchy :
959
+ if issubclass (series .dtype .type , klass ):
943
960
new = series .take (fillVec ).astype (dest )
944
961
new [- mask ] = missingValue [dest ]
945
962
newSeries [col ] = new
946
963
break
947
964
948
- return DataFrame (newSeries , index = newIndex )
965
+ return DataFrame (newSeries , index = index )
966
+
967
+ def _reindex_columns (self , columns ):
968
+ if len (columns ) == 0 :
969
+ return DataFrame (index = self .index )
970
+
971
+ newFrame = self .filterItems (columns )
972
+
973
+ for col in columns :
974
+ if col not in newFrame :
975
+ newFrame [col ] = NaN
976
+
977
+ return newFrame
949
978
950
979
@property
951
980
def T (self ):
@@ -1000,7 +1029,7 @@ def shift(self, periods, offset=None, timeRule=None):
1000
1029
for col , series in self .iteritems ()])
1001
1030
return DataFrame (data = newValues , index = newIndex )
1002
1031
1003
- def apply (self , func ):
1032
+ def apply (self , func , axis = 0 ):
1004
1033
"""
1005
1034
Applies func to columns (Series) of this DataFrame and returns either
1006
1035
a DataFrame (if the function produces another series) or a Series
@@ -1011,6 +1040,7 @@ def apply(self, func):
1011
1040
----------
1012
1041
func : function
1013
1042
Function to apply to each column
1043
+ axis : {0, 1}
1014
1044
1015
1045
Example
1016
1046
-------
@@ -1019,30 +1049,28 @@ def apply(self, func):
1019
1049
1020
1050
Note
1021
1051
----
1022
- Do NOT use functions that might toy with the index.
1052
+ Functions altering the index are not supported (yet)
1023
1053
"""
1024
1054
if not len (self .cols ()):
1025
1055
return self
1026
1056
1027
- results = {}
1028
- for col , series in self .iteritems ():
1029
- result = func (series )
1030
- results [col ] = result
1057
+ if axis == 0 :
1058
+ target = self
1059
+ elif axis == 1 :
1060
+ target = self .T
1061
+
1062
+ results = dict ([(k , func (target [k ])) for k in target .columns ])
1031
1063
1032
1064
if hasattr (results .values ()[0 ], '__iter__' ):
1033
1065
return DataFrame (data = results , index = self .index )
1034
1066
else :
1035
- keyArray = np .asarray (sorted (set (results .keys ())), dtype = object )
1036
- newIndex = Index (keyArray )
1037
-
1038
- arr = np .array ([results [idx ] for idx in newIndex ])
1039
- return Series (arr , index = newIndex )
1067
+ return Series .fromDict (results )
1040
1068
1041
1069
def tapply (self , func ):
1042
1070
"""
1043
1071
Apply func to the transposed DataFrame, results as per apply
1044
1072
"""
1045
- return self .T . apply (func )
1073
+ return self .apply (func , axis = 1 )
1046
1074
1047
1075
def applymap (self , func ):
1048
1076
"""
@@ -1323,8 +1351,8 @@ def plot(self, kind='line', **kwds):
1323
1351
Plot the DataFrame's series with the index on the x-axis using
1324
1352
matplotlib / pylab.
1325
1353
1326
- Params
1327
- ------
1354
+ Parameters
1355
+ ----------
1328
1356
kind : {'line', 'bar', 'hist'}
1329
1357
Default: line for TimeSeries, hist for Series
1330
1358
@@ -1414,10 +1442,7 @@ def sum(self, axis=0, asarray=False):
1414
1442
theCount = self .count (axis )
1415
1443
theSum [theCount == 0 ] = NaN
1416
1444
except Exception :
1417
- if axis == 0 :
1418
- theSum = self .apply (np .sum )
1419
- else :
1420
- theSum = self .tapply (np .sum )
1445
+ theSum = self .apply (np .sum , axis = axis )
1421
1446
1422
1447
if asarray :
1423
1448
return theSum
@@ -1428,6 +1453,27 @@ def sum(self, axis=0, asarray=False):
1428
1453
else :
1429
1454
raise Exception ('Must have 0<= axis <= 1' )
1430
1455
1456
+ def cumsum (self , axis = 0 ):
1457
+ """
1458
+ Return cumulative sum over requested axis as DataFrame
1459
+
1460
+ Parameters
1461
+ ----------
1462
+ axis : {0, 1}
1463
+ 0 for row-wise, 1 for column-wise
1464
+
1465
+ Returns
1466
+ -------
1467
+ y : DataFrame
1468
+ """
1469
+ def get_cumsum (y ):
1470
+ y = np .array (y )
1471
+ if not issubclass (y .dtype .type , np .int_ ):
1472
+ y [np .isnan (y )] = 0
1473
+ return y .cumsum ()
1474
+
1475
+ return self .apply (get_cumsum , axis = axis )
1476
+
1431
1477
def product (self , axis = 0 , asarray = False ):
1432
1478
"""
1433
1479
Return array or Series of products over requested axis.
@@ -1664,22 +1710,6 @@ def skew(self, axis=0, asarray=False):
1664
1710
else :
1665
1711
raise Exception ('Must have 0<= axis <= 1' )
1666
1712
1667
- def _withColumns (self , newCols ):
1668
- """
1669
- Utility method, force values matrix to have particular columns
1670
- Can make this as cute as we like
1671
- """
1672
- if len (newCols ) == 0 :
1673
- return DataFrame (index = self .index )
1674
-
1675
- newFrame = self .filterItems (newCols )
1676
-
1677
- for col in newCols :
1678
- if col not in newFrame :
1679
- newFrame [col ] = NaN
1680
-
1681
- return newFrame
1682
-
1683
1713
def _pfixed (s , space , nanRep = None ):
1684
1714
if isinstance (s , float ):
1685
1715
fstring = '%-' + str (space - 4 ) + 'g'
0 commit comments