@@ -975,8 +975,8 @@ one can use the ExcelWriter class, as in the following example:
975
975
HDF5 (PyTables)
976
976
---------------
977
977
978
- ``HDFStore `` is a dict-like object which reads and writes pandas to the
979
- high performance HDF5 format using the excellent `PyTables
978
+ ``HDFStore `` is a dict-like object which reads and writes pandas using
979
+ the high performance HDF5 format using the excellent `PyTables
980
980
<http://www.pytables.org/> `__ library.
981
981
982
982
.. ipython :: python
@@ -1041,7 +1041,8 @@ Closing a Store
1041
1041
# closing a store
1042
1042
store.close()
1043
1043
1044
- # Working with, and automatically closing the store with the context manager
1044
+ # Working with, and automatically closing the store with the context
1045
+ # manager
1045
1046
with get_store(' store.h5' ) as store:
1046
1047
store.keys()
1047
1048
@@ -1137,7 +1138,7 @@ defaults to `nan`.
1137
1138
df_mixed[' datetime64' ] = Timestamp(' 20010102' )
1138
1139
df_mixed.ix[3 :5 ,[' A' ,' B' ,' string' ,' datetime64' ]] = np.nan
1139
1140
1140
- store.append(' df_mixed' , df_mixed, min_itemsize = { ' values' : 50 })
1141
+ store.append(' df_mixed' , df_mixed, min_itemsize = {' values' : 50 })
1141
1142
df_mixed1 = store.select(' df_mixed' )
1142
1143
df_mixed1
1143
1144
df_mixed1.get_dtype_counts()
@@ -1159,7 +1160,7 @@ storing/selecting from homogenous index DataFrames.
1159
1160
[0 , 1 , 2 , 0 , 1 , 1 , 2 , 0 , 1 , 2 ]],
1160
1161
names = [' foo' , ' bar' ])
1161
1162
df_mi = DataFrame(np.random.randn(10 , 3 ), index = index,
1162
- columns = [' A' , ' B' , ' C' ])
1163
+ columns = [' A' , ' B' , ' C' ])
1163
1164
df_mi
1164
1165
1165
1166
store.append(' df_mi' ,df_mi)
@@ -1192,10 +1193,10 @@ terms.
1192
1193
- ``dict(field = 'index', op = '>', value = '20121114') ``
1193
1194
- ``('index', '>', '20121114') ``
1194
1195
- ``'index > 20121114' ``
1195
- - ``('index', '>', datetime(2012,11,14)) ``
1196
- - ``('index', ['20121114','20121115']) ``
1196
+ - ``('index', '>', datetime(2012, 11, 14)) ``
1197
+ - ``('index', ['20121114', '20121115']) ``
1197
1198
- ``('major_axis', '=', Timestamp('2012/11/14')) ``
1198
- - ``('minor_axis', ['A','B']) ``
1199
+ - ``('minor_axis', ['A', 'B']) ``
1199
1200
1200
1201
Queries are built up using a list of ``Terms `` (currently only
1201
1202
**anding ** of terms is supported). An example query for a panel might be
@@ -1207,15 +1208,15 @@ greater than the date 20000102 and the minor_axis must be A or B`
1207
1208
1208
1209
store.append(' wp' ,wp)
1209
1210
store
1210
- store.select(' wp' ,[ Term(' major_axis>20000102' ), Term(' minor_axis' , ' =' , [' A' ,' B' ]) ])
1211
+ store.select(' wp' , [ Term(' major_axis>20000102' ), Term(' minor_axis' , ' =' , [' A' , ' B' ]) ])
1211
1212
1212
1213
The ``columns `` keyword can be supplied to select to filter a list of
1213
1214
the return columns, this is equivalent to passing a
1214
1215
``Term('columns',list_of_columns_to_filter) ``
1215
1216
1216
1217
.. ipython :: python
1217
1218
1218
- store.select(' df' , columns = [' A' ,' B' ])
1219
+ store.select(' df' , columns = [' A' , ' B' ])
1219
1220
1220
1221
Start and Stop parameters can be specified to limit the total search
1221
1222
space. These are in terms of the total number of rows in a table.
@@ -1226,7 +1227,9 @@ space. These are in terms of the total number of rows in a table.
1226
1227
wp.to_frame()
1227
1228
1228
1229
# limiting the search
1229
- store.select(' wp' ,[ Term(' major_axis>20000102' ), Term(' minor_axis' , ' =' , [' A' ,' B' ]) ], start = 0 , stop = 10 )
1230
+ store.select(' wp' ,[ Term(' major_axis>20000102' ),
1231
+ Term(' minor_axis' , ' =' , [' A' ,' B' ]) ],
1232
+ start = 0 , stop = 10 )
1230
1233
1231
1234
1232
1235
Indexing
@@ -1273,11 +1276,11 @@ be data_columns
1273
1276
df_dc
1274
1277
1275
1278
# on-disk operations
1276
- store.append(' df_dc' , df_dc, data_columns = [' B' ,' C' ,' string' ,' string2' ])
1277
- store.select(' df_dc' ,[ Term(' B>0' ) ])
1279
+ store.append(' df_dc' , df_dc, data_columns = [' B' , ' C' , ' string' , ' string2' ])
1280
+ store.select(' df_dc' , [ Term(' B>0' ) ])
1278
1281
1279
1282
# getting creative
1280
- store.select(' df_dc' ,[ ' B > 0' , ' C > 0' , ' string == foo' ])
1283
+ store.select(' df_dc' , [ ' B > 0' , ' C > 0' , ' string == foo' ])
1281
1284
1282
1285
# this is in-memory version of this type of selection
1283
1286
df_dc[(df_dc.B > 0 ) & (df_dc.C > 0 ) & (df_dc.string == ' foo' )]
@@ -1303,8 +1306,8 @@ very quickly. Note ``nan`` are excluded from the result set.
1303
1306
1304
1307
.. ipython :: python
1305
1308
1306
- store.unique(' df_dc' ,' index' )
1307
- store.unique(' df_dc' ,' string' )
1309
+ store.unique(' df_dc' , ' index' )
1310
+ store.unique(' df_dc' , ' string' )
1308
1311
1309
1312
**Replicating or **
1310
1313
@@ -1317,7 +1320,7 @@ criteria to the table, and then ``concat`` the results.
1317
1320
crit1 = [ Term(' B>0' ), Term(' C>0' ), Term(' string=foo' ) ]
1318
1321
crit2 = [ Term(' B<0' ), Term(' C>0' ), Term(' string=foo' ) ]
1319
1322
1320
- concat([ store.select(' df_dc' ,c) for c in [ crit1, crit2 ] ])
1323
+ concat([store.select(' df_dc' ,c) for c in [crit1, crit2] ])
1321
1324
1322
1325
**Storer Object **
1323
1326
@@ -1357,16 +1360,17 @@ table (optional) to let it have the remaining columns. The argument
1357
1360
df_mt[' foo' ] = ' bar'
1358
1361
1359
1362
# you can also create the tables individually
1360
- store.append_to_multiple({ ' df1_mt' : [' A' ,' B' ], ' df2_mt' : None },
1361
- df_mt, selector = ' df1_mt' )
1363
+ store.append_to_multiple({' df1_mt' : [' A' , ' B' ], ' df2_mt' : None },
1364
+ df_mt, selector = ' df1_mt' )
1362
1365
store
1363
1366
1364
1367
# indiviual tables were created
1365
1368
store.select(' df1_mt' )
1366
1369
store.select(' df2_mt' )
1367
1370
1368
1371
# as a multiple
1369
- store.select_as_multiple([' df1_mt' ,' df2_mt' ], where = [ ' A>0' ,' B>0' ], selector = ' df1_mt' )
1372
+ store.select_as_multiple([' df1_mt' ,' df2_mt' ], where = [' A>0' , ' B>0' ],
1373
+ selector = ' df1_mt' )
1370
1374
1371
1375
1372
1376
Delete from a Table
@@ -1431,7 +1435,8 @@ may not be installed (by Python) by default.
1431
1435
1432
1436
Compression for all objects within the file
1433
1437
1434
- - ``store_compressed = HDFStore('store_compressed.h5', complevel=9, complib='blosc') ``
1438
+ - ``store_compressed = HDFStore('store_compressed.h5', complevel=9,
1439
+ complib='blosc') ``
1435
1440
1436
1441
Or on-the-fly compression (this only applies to tables). You can turn
1437
1442
off file compression for a specific table by passing ``complevel=0 ``
@@ -1446,7 +1451,8 @@ beginning. You can use the supplied ``PyTables`` utility
1446
1451
``ptrepack ``. In addition, ``ptrepack `` can change compression levels
1447
1452
after the fact.
1448
1453
1449
- - ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5 ``
1454
+ - ``ptrepack --chunkshape=auto --propindexes --complevel=9
1455
+ --complib=blosc in.h5 out.h5 ``
1450
1456
1451
1457
Furthermore ``ptrepack in.h5 out.h5 `` will *repack * the file to allow
1452
1458
you to reuse previously deleted space. Aalternatively, one can simply
@@ -1515,12 +1521,13 @@ conversion may not be necessary in future versions of pandas)
1515
1521
.. ipython :: python
1516
1522
1517
1523
import datetime
1518
- df = DataFrame(dict (datelike = Series([datetime.datetime(2001 ,1 ,1 ),datetime.datetime(2001 ,1 ,2 ),np.nan])))
1524
+ df = DataFrame(dict (datelike = Series([datetime.datetime(2001 , 1 , 1 ),
1525
+ datetime.datetime(2001 , 1 , 2 ), np.nan])))
1519
1526
df
1520
1527
df.dtypes
1521
1528
1522
1529
# to convert
1523
- df[' datelike' ] = Series(df[' datelike' ].values,dtype = ' M8[ns]' )
1530
+ df[' datelike' ] = Series(df[' datelike' ].values, dtype = ' M8[ns]' )
1524
1531
df
1525
1532
df.dtypes
1526
1533
@@ -1537,7 +1544,7 @@ format store like this:
1537
1544
.. ipython :: python
1538
1545
1539
1546
store_export = HDFStore(' export.h5' )
1540
- store_export.append('df_dc',df_dc,data_columns=df_dc.columns)
1547
+ store_export.append('df_dc', df_dc, data_columns=df_dc.columns)
1541
1548
store_export
1542
1549
1543
1550
.. ipython :: python
@@ -1629,7 +1636,7 @@ object). This cannot be changed after table creation.
1629
1636
1630
1637
.. ipython :: python
1631
1638
1632
- store.append(' p4d2' , p4d, axes = [' labels' ,' major_axis' ,' minor_axis' ])
1639
+ store.append(' p4d2' , p4d, axes = [' labels' , ' major_axis' , ' minor_axis' ])
1633
1640
store
1634
1641
store.select(' p4d2' , [ Term(' labels=l1' ), Term(' items=Item1' ), Term(' minor_axis=A_big_strings' ) ])
1635
1642
0 commit comments