Skip to content

Commit 77ad265

Browse files
committed
simpler column reindexing in DataFrame and DataMatrix, some cleanup for release
git-svn-id: http://pandas.googlecode.com/svn/trunk@96 d5231056-7de3-11de-ac95-d976489f1ece
1 parent b55f3a1 commit 77ad265

File tree

8 files changed

+250
-218
lines changed

8 files changed

+250
-218
lines changed

pandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# pylint: disable-msg=W0614,W0401,W0611
1+
# pylint: disable-msg=W0614,W0401,W0611,W0622
22

33
__docformat__ = 'restructuredtext'
44

pandas/core/frame.py

Lines changed: 100 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ class DataFrame(Picklable, Groupable):
4242
----------
4343
data : dict
4444
Mapping of column name --> array or Series/TimeSeries objects
45-
index : array-like
46-
Specific index to use for the Frame, Series will be conformed to this
47-
if you provide it.
45+
index : array-like, optional
46+
Specific index to use for the Frame, Series will be conformed
47+
to this if you provide it. If not input, index will be
48+
inferred from input Series
49+
columns : array-like, optional
4850
4951
Notes
5052
-----
@@ -56,12 +58,12 @@ class DataFrame(Picklable, Groupable):
5658
--------
5759
DataMatrix: more efficient version of DataFrame for most operations
5860
59-
Example usage
60-
-------------
61+
Example
62+
-------
6163
>>> d = {'col1' : ts1, 'col2' : ts2}
6264
>>> df = DataFrame(data=d, index=someIndex)
6365
"""
64-
def __init__(self, data=None, index=None):
66+
def __init__(self, data=None, index=None, columns=None):
6567
self._series = {}
6668
if data is not None and len(data) > 0:
6769
if index is None:
@@ -75,7 +77,7 @@ def __init__(self, data=None, index=None):
7577

7678
for k, v in data.iteritems():
7779
if isinstance(v, Series):
78-
# Forces homogoneity and copies data
80+
# Forces homogeneity and copies data
7981
self._series[k] = v.reindex(self.index)
8082
else:
8183
# Copies data and checks length
@@ -169,8 +171,8 @@ def fromDict(cls, inputDict=None, castFloat=True, **kwds):
169171

170172
def toDict(self):
171173
"""
172-
Simpler pseudo-inverse operation of dictToDataFrame, NaN values will be
173-
included in the resulting dict-tree.
174+
Simpler pseudo-inverse operation of DataFrame.fromDict, NaN
175+
values will be included in the resulting dict-tree.
174176
175177
Return
176178
------
@@ -316,9 +318,9 @@ def __setitem__(self, key, value):
316318

317319
def __delitem__(self, key):
318320
"""
319-
Delete column from DataFrame (only deletes the reference)
321+
Delete column from DataFrame
320322
"""
321-
self._series.pop(key, None)
323+
del self._series[key]
322324

323325
def pop(self, item):
324326
"""
@@ -611,16 +613,16 @@ def append(self, otherFrame):
611613

612614
def asfreq(self, freq, fillMethod=None):
613615
"""
614-
Convert all TimeSeries inside to specified frequency using DateOffset
615-
objects. Optionally provide fill method to pad/backfill/interpolate
616-
missing values.
616+
Convert all TimeSeries inside to specified frequency using
617+
DateOffset objects. Optionally provide fill method to pad or
618+
backfill missing values.
617619
618620
Parameters
619621
----------
620622
offset : DateOffset object, or string in {'WEEKDAY', 'EOM'}
621623
DateOffset object or subclass (e.g. monthEnd)
622624
623-
fillMethod : {'backfill', 'pad', 'interpolate', None}
625+
fillMethod : {'backfill', 'pad', None}
624626
Method to use for filling holes in new inde
625627
"""
626628
if isinstance(freq, datetools.DateOffset):
@@ -886,38 +888,53 @@ def pivot(self, index=None, columns=None, values=None):
886888

887889
return _slow_pivot(self[index], self[columns], self[values])
888890

889-
def reindex(self, newIndex, fillMethod=None):
891+
def reindex(self, index=None, columns=None, fillMethod=None):
890892
"""
891893
Reindex data inside, optionally filling according to some rule.
892894
893895
Parameters
894896
----------
895-
newIndex : array-like
897+
index : array-like, optional
896898
preferably an Index object (to avoid duplicating data)
897-
fillMethod : {'backfill', 'pad', 'interpolate', None}
898-
Method to use for filling holes in reindexed DataFrame
899+
columns : array-like, optional
900+
fillMethod : {'backfill', 'pad', None}
901+
Method to use for filling data holes using the index
902+
903+
Returns
904+
-------
905+
y : same type as calling instance
899906
"""
900-
if self.index.equals(newIndex):
907+
fillMethod = fillMethod.upper() if fillMethod else ''
908+
909+
if fillMethod not in ['BACKFILL', 'PAD', '']:
910+
raise Exception("Don't recognize fillMethod: %s" % fillMethod)
911+
912+
frame = self
913+
914+
if index is not None:
915+
frame = frame._reindex_index(index, fillMethod)
916+
917+
if columns is not None:
918+
frame = frame._reindex_columns(columns)
919+
920+
return frame
921+
922+
def _reindex_index(self, index, method):
923+
if self.index.equals(index):
901924
return self.copy()
902925

903-
if len(newIndex) == 0:
926+
if len(index) == 0:
904927
return DataFrame(index=NULL_INDEX)
905928

906-
if not isinstance(newIndex, Index):
907-
newIndex = Index(newIndex)
929+
if not isinstance(index, Index):
930+
index = Index(index)
908931

909932
if len(self.index) == 0:
910-
return DataFrame(index=newIndex)
933+
return DataFrame(index=index)
911934

912-
oldMap = self.index.indexMap
913-
newMap = newIndex.indexMap
914-
915-
fillMethod = fillMethod.upper() if fillMethod else ''
916-
if fillMethod not in ['BACKFILL', 'PAD', '']:
917-
raise Exception("Don't recognize fillMethod: %s" % fillMethod)
918-
919-
fillVec, mask = tseries.getFillVec(self.index, newIndex, oldMap,
920-
newMap, fillMethod)
935+
fillVec, mask = tseries.getFillVec(self.index, index,
936+
self.index.indexMap,
937+
index.indexMap, method)
921938

922939
# Maybe this is a bit much? Wish I had unit tests...
923940
typeHierarchy = [
@@ -938,14 +955,26 @@ def reindex(self, newIndex, fillMethod=None):
938955
newSeries = {}
939956
for col, series in self.iteritems():
940957
series = series.view(np.ndarray)
941-
for type, dest in typeHierarchy:
942-
if issubclass(series.dtype.type, type):
958+
for klass, dest in typeHierarchy:
959+
if issubclass(series.dtype.type, klass):
943960
new = series.take(fillVec).astype(dest)
944961
new[-mask] = missingValue[dest]
945962
newSeries[col] = new
946963
break
947964

948-
return DataFrame(newSeries, index=newIndex)
965+
return DataFrame(newSeries, index=index)
966+
967+
def _reindex_columns(self, columns):
968+
if len(columns) == 0:
969+
return DataFrame(index=self.index)
970+
971+
newFrame = self.filterItems(columns)
972+
973+
for col in columns:
974+
if col not in newFrame:
975+
newFrame[col] = NaN
976+
977+
return newFrame
949978

950979
@property
951980
def T(self):
@@ -1000,7 +1029,7 @@ def shift(self, periods, offset=None, timeRule=None):
10001029
for col, series in self.iteritems()])
10011030
return DataFrame(data = newValues, index= newIndex)
10021031

1003-
def apply(self, func):
1032+
def apply(self, func, axis=0):
10041033
"""
10051034
Applies func to columns (Series) of this DataFrame and returns either
10061035
a DataFrame (if the function produces another series) or a Series
@@ -1011,6 +1040,7 @@ def apply(self, func):
10111040
----------
10121041
func : function
10131042
Function to apply to each column
1043+
axis : {0, 1}
10141044
10151045
Example
10161046
-------
@@ -1019,30 +1049,28 @@ def apply(self, func):
10191049
10201050
Note
10211051
----
1022-
Do NOT use functions that might toy with the index.
1052+
Functions altering the index are not supported (yet)
10231053
"""
10241054
if not len(self.cols()):
10251055
return self
10261056

1027-
results = {}
1028-
for col, series in self.iteritems():
1029-
result = func(series)
1030-
results[col] = result
1057+
if axis == 0:
1058+
target = self
1059+
elif axis == 1:
1060+
target = self.T
1061+
1062+
results = dict([(k, func(target[k])) for k in target.columns])
10311063

10321064
if hasattr(results.values()[0], '__iter__'):
10331065
return DataFrame(data=results, index=self.index)
10341066
else:
1035-
keyArray = np.asarray(sorted(set(results.keys())), dtype=object)
1036-
newIndex = Index(keyArray)
1037-
1038-
arr = np.array([results[idx] for idx in newIndex])
1039-
return Series(arr, index=newIndex)
1067+
return Series.fromDict(results)
10401068

10411069
def tapply(self, func):
10421070
"""
10431071
Apply func to the transposed DataFrame, results as per apply
10441072
"""
1045-
return self.T.apply(func)
1073+
return self.apply(func, axis=1)
10461074

10471075
def applymap(self, func):
10481076
"""
@@ -1323,8 +1351,8 @@ def plot(self, kind='line', **kwds):
13231351
Plot the DataFrame's series with the index on the x-axis using
13241352
matplotlib / pylab.
13251353
1326-
Params
1327-
------
1354+
Parameters
1355+
----------
13281356
kind : {'line', 'bar', 'hist'}
13291357
Default: line for TimeSeries, hist for Series
13301358
@@ -1414,10 +1442,7 @@ def sum(self, axis=0, asarray=False):
14141442
theCount = self.count(axis)
14151443
theSum[theCount == 0] = NaN
14161444
except Exception:
1417-
if axis == 0:
1418-
theSum = self.apply(np.sum)
1419-
else:
1420-
theSum = self.tapply(np.sum)
1445+
theSum = self.apply(np.sum, axis=axis)
14211446

14221447
if asarray:
14231448
return theSum
@@ -1428,6 +1453,27 @@ def sum(self, axis=0, asarray=False):
14281453
else:
14291454
raise Exception('Must have 0<= axis <= 1')
14301455

1456+
def cumsum(self, axis=0):
1457+
"""
1458+
Return cumulative sum over requested axis as DataFrame
1459+
1460+
Parameters
1461+
----------
1462+
axis : {0, 1}
1463+
0 for row-wise, 1 for column-wise
1464+
1465+
Returns
1466+
-------
1467+
y : DataFrame
1468+
"""
1469+
def get_cumsum(y):
1470+
y = np.array(y)
1471+
if not issubclass(y.dtype.type, np.int_):
1472+
y[np.isnan(y)] = 0
1473+
return y.cumsum()
1474+
1475+
return self.apply(get_cumsum, axis=axis)
1476+
14311477
def product(self, axis=0, asarray=False):
14321478
"""
14331479
Return array or Series of products over requested axis.
@@ -1664,22 +1710,6 @@ def skew(self, axis=0, asarray=False):
16641710
else:
16651711
raise Exception('Must have 0<= axis <= 1')
16661712

1667-
def _withColumns(self, newCols):
1668-
"""
1669-
Utility method, force values matrix to have particular columns
1670-
Can make this as cute as we like
1671-
"""
1672-
if len(newCols) == 0:
1673-
return DataFrame(index=self.index)
1674-
1675-
newFrame = self.filterItems(newCols)
1676-
1677-
for col in newCols:
1678-
if col not in newFrame:
1679-
newFrame[col] = NaN
1680-
1681-
return newFrame
1682-
16831713
def _pfixed(s, space, nanRep=None):
16841714
if isinstance(s, float):
16851715
fstring = '%-' + str(space-4) + 'g'

0 commit comments

Comments
 (0)