Skip to content

Commit e1083b5

Browse files
committed
not weighting untransformed data in PanelOLS. changed DataFrame and DataMatrix.toString to take a buffer instead of a to_stdout flag
git-svn-id: http://pandas.googlecode.com/svn/trunk@94 d5231056-7de3-11de-ac95-d976489f1ece
1 parent 23ad9d2 commit e1083b5

File tree

6 files changed

+159
-144
lines changed

6 files changed

+159
-144
lines changed

pandas/core/frame.py

Lines changed: 56 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# pylint: disable-msg=E1101
22
# pylint: disable-msg=E1103
3-
# pylint: disable-msg=W0212,W0703
3+
# pylint: disable-msg=W0212,W0703,W0622
44

5+
from cStringIO import StringIO
56
import operator
7+
import sys
68

79
from numpy import NaN
810
import numpy as np
@@ -260,11 +262,14 @@ def __repr__(self):
260262
"""
261263
Return a string representation for a particular DataFrame
262264
"""
263-
if len(self.index) < 1000 and len(self._series) < 10:
264-
return self.toString(to_stdout=False)
265+
buf = StringIO()
266+
if len(self.index) < 500 and len(self._series) < 10:
267+
self.toString(buffer=buf)
265268
else:
266-
output = str(self.__class__) + '\n'
267-
return output + self.info(to_stdout=False)
269+
buf.write(str(self.__class__) + '\n')
270+
self.info(buffer=buf)
271+
272+
return buf.getvalue()
268273

269274
def __getitem__(self, item):
270275
"""
@@ -313,7 +318,7 @@ def __delitem__(self, key):
313318
"""
314319
Delete column from DataFrame (only deletes the reference)
315320
"""
316-
r = self._series.pop(key, None)
321+
self._series.pop(key, None)
317322

318323
def pop(self, item):
319324
"""
@@ -408,7 +413,6 @@ def _combineFrame(self, other, func):
408413

409414
for col, series in other.iteritems():
410415
if col not in self:
411-
cls = series.__class__
412416
newColumns[col] = series.fromValue(np.NaN, index=newIndex)
413417

414418
return DataFrame(data=newColumns, index=newIndex)
@@ -514,54 +518,60 @@ def toDataMatrix(self):
514518

515519
return DataMatrix(self._series, index=self.index)
516520

517-
def toString(self, to_stdout=True, verbose=False, colSpace=15, nanRep=None):
521+
def toString(self, buffer=sys.stdout, verbose=False,
522+
colSpace=15, nanRep=None, formatters=None,
523+
float_format=None):
518524
"""Output a tab-separated version of this DataFrame"""
519525
series = self._series
520-
skeys = sorted(series.keys())
521-
if len(skeys) == 0 or len(self.index) == 0:
522-
output = 'Empty DataFrame\n'
523-
output += self.index.__repr__()
526+
columns = sorted(series.keys())
527+
formatters = formatters or {}
528+
529+
530+
# TODO
531+
532+
float_format = float_format or str
533+
for c in columns:
534+
if c not in formatters:
535+
formatters[c] = str # float_format if c in self.columns else str
536+
537+
if len(columns) == 0 or len(self.index) == 0:
538+
print >> buffer, 'Empty DataFrame'
539+
print >> buffer, repr(self.index)
524540
else:
525541
idxSpace = max([len(str(idx)) for idx in self.index]) + 4
526542
head = _pfixed('', idxSpace)
527543
if verbose:
528544
colSpace = max([len(c) for c in self.columns]) + 4
529-
for h in skeys:
545+
for h in columns:
530546
head += _pfixed(h, colSpace)
531-
output = head + '\n'
547+
print >> buffer, head
532548
for idx in self.index:
533549
ot = _pfixed(idx, idxSpace)
534-
for k in skeys:
535-
ot += _pfixed(series[k][idx], colSpace, nanRep=nanRep)
536-
output += ot + '\n'
537-
if to_stdout:
538-
print output
539-
else:
540-
return output
550+
for k in columns:
551+
formatter = formatters.get(k, str)
552+
ot += _pfixed(formatter(series[k][idx]),
553+
colSpace, nanRep=nanRep)
554+
print >> buffer, ot
541555

542-
def info(self, to_stdout=True):
556+
def info(self, buffer=sys.stdout):
543557
"""Concise summary of a DataFrame, used in __repr__ when very large."""
544558
if len(self._series) == 0:
545-
output = 'DataFrame is empty!\n'
546-
output += self.index.__repr__()
547-
return output
548-
549-
output = 'Index: %s entries, %s to %s\n' % (len(self.index),
550-
min(self.index),
551-
max(self.index))
552-
output += 'Columns:\n'
559+
print >> buffer, 'DataFrame is empty!'
560+
print >> buffer, repr(self.index)
561+
562+
print >> buffer, 'Index: %s entries, %s to %s' % (len(self.index),
563+
min(self.index),
564+
max(self.index))
565+
print >> buffer, 'Data columns:'
566+
553567
series = self._series
554-
skeys = sorted(self.cols())
555-
space = max([len(str(k)) for k in skeys]) + 4
556-
for k in skeys:
568+
columns = sorted(self.cols())
569+
space = max([len(str(k)) for k in columns]) + 4
570+
for k in columns:
557571
out = _pfixed(k, space)
558572
N = notnull(series[k]).sum()
559-
out += '%d non-null values\n' % N
560-
output += out
561-
if to_stdout:
562-
print output
563-
else:
564-
return output
573+
out += '%d non-null values' % N
574+
print >> buffer, out
565575

566576
def rows(self):
567577
"""Alias for the frame's index"""
@@ -586,7 +596,7 @@ def append(self, otherFrame):
586596
"""
587597
newIndex = np.concatenate((self.index, otherFrame.index))
588598
newValues = {}
589-
599+
590600
for column, series in self.iteritems():
591601
if column in otherFrame:
592602
newValues[column] = series.append(otherFrame[column])
@@ -793,7 +803,7 @@ def getTS(self, colName=None, fromDate=None, toDate=None, nPeriods=None):
793803
else:
794804
return self.reindex(dateRange)
795805

796-
def truncate(self, before=None, after=None, periods=None):
806+
def truncate(self, before=None, after=None):
797807
"""Function truncate a sorted DataFrame before and/or after
798808
some particular dates.
799809
@@ -803,13 +813,13 @@ def truncate(self, before=None, after=None, periods=None):
803813
Truncate before date
804814
after : date
805815
Truncate after date
806-
816+
807817
Returns
808818
-------
809819
DataFrame
810820
"""
811821
beg_slice, end_slice = self._getIndices(before, after)
812-
822+
813823
return self[beg_slice:end_slice]
814824

815825
def _getIndices(self, before, after):
@@ -833,8 +843,8 @@ def _getIndices(self, before, after):
833843
end_slice = self.index.indexMap[after] + 1
834844

835845
return beg_slice, end_slice
836-
837-
def getXS(self, key, subset=None, asOf=False):
846+
847+
def getXS(self, key, subset=None):
838848
"""
839849
Returns a row from the DataFrame as a Series object.
840850
@@ -843,9 +853,6 @@ def getXS(self, key, subset=None, asOf=False):
843853
key : some index contained in the index
844854
subset : iterable (list, array, set, etc.), optional
845855
columns to be included
846-
asOf : boolean, optional
847-
Whether to use asOf values for TimeSeries objects
848-
Won't do anything for Series objects.
849856
850857
Note
851858
----
@@ -1050,7 +1057,7 @@ def applymap(self, func):
10501057
"""
10511058
results = {}
10521059
for col, series in self.iteritems():
1053-
results[col] = map(func, series)
1060+
results[col] = [func(v) for v in series]
10541061
return DataFrame(data=results, index=self.index)
10551062

10561063
def tgroupby(self, keyfunc, applyfunc):

0 commit comments

Comments
 (0)