1
1
# pylint: disable-msg=E1101
2
2
# pylint: disable-msg=E1103
3
- # pylint: disable-msg=W0212,W0703
3
+ # pylint: disable-msg=W0212,W0703,W0622
4
4
5
+ from cStringIO import StringIO
5
6
import operator
7
+ import sys
6
8
7
9
from numpy import NaN
8
10
import numpy as np
@@ -260,11 +262,14 @@ def __repr__(self):
260
262
"""
261
263
Return a string representation for a particular DataFrame
262
264
"""
263
- if len (self .index ) < 1000 and len (self ._series ) < 10 :
264
- return self .toString (to_stdout = False )
265
+ buf = StringIO ()
266
+ if len (self .index ) < 500 and len (self ._series ) < 10 :
267
+ self .toString (buffer = buf )
265
268
else :
266
- output = str (self .__class__ ) + '\n '
267
- return output + self .info (to_stdout = False )
269
+ buf .write (str (self .__class__ ) + '\n ' )
270
+ self .info (buffer = buf )
271
+
272
+ return buf .getvalue ()
268
273
269
274
def __getitem__ (self , item ):
270
275
"""
@@ -313,7 +318,7 @@ def __delitem__(self, key):
313
318
"""
314
319
Delete column from DataFrame (only deletes the reference)
315
320
"""
316
- r = self ._series .pop (key , None )
321
+ self ._series .pop (key , None )
317
322
318
323
def pop (self , item ):
319
324
"""
@@ -408,7 +413,6 @@ def _combineFrame(self, other, func):
408
413
409
414
for col , series in other .iteritems ():
410
415
if col not in self :
411
- cls = series .__class__
412
416
newColumns [col ] = series .fromValue (np .NaN , index = newIndex )
413
417
414
418
return DataFrame (data = newColumns , index = newIndex )
@@ -514,54 +518,60 @@ def toDataMatrix(self):
514
518
515
519
return DataMatrix (self ._series , index = self .index )
516
520
517
- def toString (self , to_stdout = True , verbose = False , colSpace = 15 , nanRep = None ):
521
+ def toString (self , buffer = sys .stdout , verbose = False ,
522
+ colSpace = 15 , nanRep = None , formatters = None ,
523
+ float_format = None ):
518
524
"""Output a tab-separated version of this DataFrame"""
519
525
series = self ._series
520
- skeys = sorted (series .keys ())
521
- if len (skeys ) == 0 or len (self .index ) == 0 :
522
- output = 'Empty DataFrame\n '
523
- output += self .index .__repr__ ()
526
+ columns = sorted (series .keys ())
527
+ formatters = formatters or {}
528
+
529
+
530
+ # TODO
531
+
532
+ float_format = float_format or str
533
+ for c in columns :
534
+ if c not in formatters :
535
+ formatters [c ] = str # float_format if c in self.columns else str
536
+
537
+ if len (columns ) == 0 or len (self .index ) == 0 :
538
+ print >> buffer , 'Empty DataFrame'
539
+ print >> buffer , repr (self .index )
524
540
else :
525
541
idxSpace = max ([len (str (idx )) for idx in self .index ]) + 4
526
542
head = _pfixed ('' , idxSpace )
527
543
if verbose :
528
544
colSpace = max ([len (c ) for c in self .columns ]) + 4
529
- for h in skeys :
545
+ for h in columns :
530
546
head += _pfixed (h , colSpace )
531
- output = head + ' \n '
547
+ print >> buffer , head
532
548
for idx in self .index :
533
549
ot = _pfixed (idx , idxSpace )
534
- for k in skeys :
535
- ot += _pfixed (series [k ][idx ], colSpace , nanRep = nanRep )
536
- output += ot + '\n '
537
- if to_stdout :
538
- print output
539
- else :
540
- return output
550
+ for k in columns :
551
+ formatter = formatters .get (k , str )
552
+ ot += _pfixed (formatter (series [k ][idx ]),
553
+ colSpace , nanRep = nanRep )
554
+ print >> buffer , ot
541
555
542
- def info (self , to_stdout = True ):
556
+ def info (self , buffer = sys . stdout ):
543
557
"""Concise summary of a DataFrame, used in __repr__ when very large."""
544
558
if len (self ._series ) == 0 :
545
- output = 'DataFrame is empty!\n '
546
- output += self .index . __repr__ ( )
547
- return output
548
-
549
- output = 'Index: %s entries, %s to %s \n ' % ( len (self .index ),
550
- min (self .index ),
551
- max ( self . index ))
552
- output += 'Columns: \n '
559
+ print >> buffer , 'DataFrame is empty!'
560
+ print >> buffer , repr ( self .index )
561
+
562
+ print >> buffer , 'Index: %s entries, %s to %s' % ( len ( self . index ),
563
+ min (self .index ),
564
+ max (self .index ))
565
+ print >> buffer , 'Data columns:'
566
+
553
567
series = self ._series
554
- skeys = sorted (self .cols ())
555
- space = max ([len (str (k )) for k in skeys ]) + 4
556
- for k in skeys :
568
+ columns = sorted (self .cols ())
569
+ space = max ([len (str (k )) for k in columns ]) + 4
570
+ for k in columns :
557
571
out = _pfixed (k , space )
558
572
N = notnull (series [k ]).sum ()
559
- out += '%d non-null values\n ' % N
560
- output += out
561
- if to_stdout :
562
- print output
563
- else :
564
- return output
573
+ out += '%d non-null values' % N
574
+ print >> buffer , out
565
575
566
576
def rows (self ):
567
577
"""Alias for the frame's index"""
@@ -586,7 +596,7 @@ def append(self, otherFrame):
586
596
"""
587
597
newIndex = np .concatenate ((self .index , otherFrame .index ))
588
598
newValues = {}
589
-
599
+
590
600
for column , series in self .iteritems ():
591
601
if column in otherFrame :
592
602
newValues [column ] = series .append (otherFrame [column ])
@@ -793,7 +803,7 @@ def getTS(self, colName=None, fromDate=None, toDate=None, nPeriods=None):
793
803
else :
794
804
return self .reindex (dateRange )
795
805
796
- def truncate (self , before = None , after = None , periods = None ):
806
+ def truncate (self , before = None , after = None ):
797
807
"""Function truncate a sorted DataFrame before and/or after
798
808
some particular dates.
799
809
@@ -803,13 +813,13 @@ def truncate(self, before=None, after=None, periods=None):
803
813
Truncate before date
804
814
after : date
805
815
Truncate after date
806
-
816
+
807
817
Returns
808
818
-------
809
819
DataFrame
810
820
"""
811
821
beg_slice , end_slice = self ._getIndices (before , after )
812
-
822
+
813
823
return self [beg_slice :end_slice ]
814
824
815
825
def _getIndices (self , before , after ):
@@ -833,8 +843,8 @@ def _getIndices(self, before, after):
833
843
end_slice = self .index .indexMap [after ] + 1
834
844
835
845
return beg_slice , end_slice
836
-
837
- def getXS (self , key , subset = None , asOf = False ):
846
+
847
+ def getXS (self , key , subset = None ):
838
848
"""
839
849
Returns a row from the DataFrame as a Series object.
840
850
@@ -843,9 +853,6 @@ def getXS(self, key, subset=None, asOf=False):
843
853
key : some index contained in the index
844
854
subset : iterable (list, array, set, etc.), optional
845
855
columns to be included
846
- asOf : boolean, optional
847
- Whether to use asOf values for TimeSeries objects
848
- Won't do anything for Series objects.
849
856
850
857
Note
851
858
----
@@ -1050,7 +1057,7 @@ def applymap(self, func):
1050
1057
"""
1051
1058
results = {}
1052
1059
for col , series in self .iteritems ():
1053
- results [col ] = map ( func , series )
1060
+ results [col ] = [ func ( v ) for v in series ]
1054
1061
return DataFrame (data = results , index = self .index )
1055
1062
1056
1063
def tgroupby (self , keyfunc , applyfunc ):
0 commit comments