Closed
Description
Though read_csv can't read these in yet....
see this: https://github.com/playpauseandstop/kikola/blob/master/kikola/utils/timedelta.py
In [58]: data
Out[58]: ' completed deadline\n15-07-2013 23:10 15-07-2013 23:15\n16-07-2013 00:20 16-07-2013 00:15\n16-07-2013 00:20 16-07-2013 00:15\n16-07-2013 21:04 16-07-2013 21:30\n16-07-2013 21:58 16-07-2013 22:00\n16-07-2013 23:21 16-07-2013 23:15\n16-07-2013 23:21 16-07-2013 23:15\n17-07-2013 00:19 17-07-2013 00:15\n17-07-2013 00:19 17-07-2013 00:15\n17-07-2013 21:18 17-07-2013 21:30\n17-07-2013 22:07 17-07-2013 22:00'
In [59]: df = read_csv(StringIO(data),index_col=None,skiprows=1,header=None,parse_dates=[[0,1],[2,3]],sep='\s+')
In [60]: df.columns = ['completed','deadline']
In [61]: df['delta'] = df['completed']-df['deadline']
In [62]: df
Out[62]:
completed deadline delta
0 2013-07-15 23:10:00 2013-07-15 23:15:00 -00:05:00
1 2013-07-16 00:20:00 2013-07-16 00:15:00 00:05:00
2 2013-07-16 00:20:00 2013-07-16 00:15:00 00:05:00
3 2013-07-16 21:04:00 2013-07-16 21:30:00 -00:26:00
4 2013-07-16 21:58:00 2013-07-16 22:00:00 -00:02:00
5 2013-07-16 23:21:00 2013-07-16 23:15:00 00:06:00
6 2013-07-16 23:21:00 2013-07-16 23:15:00 00:06:00
7 2013-07-17 00:19:00 2013-07-17 00:15:00 00:04:00
8 2013-07-17 00:19:00 2013-07-17 00:15:00 00:04:00
9 2013-07-17 21:18:00 2013-07-17 21:30:00 -00:12:00
10 2013-07-17 22:07:00 2013-07-17 22:00:00 00:07:00
In [63]: df.dtypes
Out[63]:
completed datetime64[ns]
deadline datetime64[ns]
delta timedelta64[ns]
dtype: object
In [64]: df.to_csv('test.csv')
In [65]: !cat test.csv
,completed,deadline,delta
0,2013-07-15 23:10:00,2013-07-15 23:15:00,-300000000000
1,2013-07-16 00:20:00,2013-07-16 00:15:00,300000000000
2,2013-07-16 00:20:00,2013-07-16 00:15:00,300000000000
3,2013-07-16 21:04:00,2013-07-16 21:30:00,-1560000000000
4,2013-07-16 21:58:00,2013-07-16 22:00:00,-120000000000
5,2013-07-16 23:21:00,2013-07-16 23:15:00,360000000000
6,2013-07-16 23:21:00,2013-07-16 23:15:00,360000000000
7,2013-07-17 00:19:00,2013-07-17 00:15:00,240000000000
8,2013-07-17 00:19:00,2013-07-17 00:15:00,240000000000
9,2013-07-17 21:18:00,2013-07-17 21:30:00,-720000000000
10,2013-07-17 22:07:00,2013-07-17 22:00:00,420000000000
work-around (but should be handled in the datetimeblock though)
In [66]: df['delta'] = df['delta'].apply(pd.lib.repr_timedelta64)
In [67]: df.to_csv('test.csv')
In [68]: !cat test.csv
,completed,deadline,delta
0,2013-07-15 23:10:00,2013-07-15 23:15:00,-00:05:00
1,2013-07-16 00:20:00,2013-07-16 00:15:00,00:05:00
2,2013-07-16 00:20:00,2013-07-16 00:15:00,00:05:00
3,2013-07-16 21:04:00,2013-07-16 21:30:00,-00:26:00
4,2013-07-16 21:58:00,2013-07-16 22:00:00,-00:02:00
5,2013-07-16 23:21:00,2013-07-16 23:15:00,00:06:00
6,2013-07-16 23:21:00,2013-07-16 23:15:00,00:06:00
7,2013-07-17 00:19:00,2013-07-17 00:15:00,00:04:00
8,2013-07-17 00:19:00,2013-07-17 00:15:00,00:04:00
9,2013-07-17 21:18:00,2013-07-17 21:30:00,-00:12:00
10,2013-07-17 22:07:00,2013-07-17 22:00:00,00:07:00