116
116
dtype : Type name or dict of column -> type, default None
117
117
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
118
118
Use `str` or `object` to preserve and not interpret dtype.
119
- If converters are specified, they will be applied AFTER
120
- dtype conversion.
119
+ If converters are specified, they will be applied INSTEAD
120
+ of dtype conversion.
121
+
122
+ .. versionadded:: 0.20.0 support for the Python parser.
123
+
121
124
%s
122
125
converters : dict, default None
123
126
Dict of functions for converting values in certain columns. Keys can either
@@ -1293,20 +1296,6 @@ def _agg_index(self, index, try_parse_dates=True):
1293
1296
1294
1297
return index
1295
1298
1296
- def _apply_converter (self , values , conv_f , na_values , col_na_values ,
1297
- col_na_fvalues ):
1298
- """ apply converter function to values, respecting NAs """
1299
- try :
1300
- values = lib .map_infer (values , conv_f )
1301
- except ValueError :
1302
- mask = lib .ismember (values , na_values ).view (np .uint8 )
1303
- values = lib .map_infer_mask (values , conv_f , mask )
1304
-
1305
- cvals , na_count = self ._infer_types (
1306
- values , set (col_na_values ) | col_na_fvalues ,
1307
- try_num_bool = False )
1308
- return cvals , na_count
1309
-
1310
1299
def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
1311
1300
converters = None , dtypes = None ):
1312
1301
result = {}
@@ -1324,45 +1313,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
1324
1313
else :
1325
1314
col_na_values , col_na_fvalues = set (), set ()
1326
1315
1327
- if conv_f is not None and cast_type is None :
1328
- # if type is not specified, apply the conversion first, without
1329
- # inference
1330
- cvals , na_count = self ._apply_converter (
1331
- values , conv_f , na_values ,
1332
- col_na_values , col_na_fvalues )
1316
+ if conv_f is not None :
1317
+ # conv_f applied to data before inference
1318
+ # dtype isn't used if a converted specified
1319
+ try :
1320
+ values = lib .map_infer (values , conv_f )
1321
+ except ValueError :
1322
+ mask = lib .ismember (values , na_values ).view (np .uint8 )
1323
+ values = lib .map_infer_mask (values , conv_f , mask )
1324
+
1325
+ cvals , na_count = self ._infer_types (
1326
+ values , set (col_na_values ) | col_na_fvalues ,
1327
+ try_num_bool = False )
1333
1328
else :
1334
- try_num_bool = True
1335
- if cast_type and is_object_dtype (cast_type ):
1336
- # skip inference if specified dtype is object
1337
- try_num_bool = False
1329
+ # skip inference if specified dtype is object
1330
+ try_num_bool = not (cast_type and is_object_dtype (cast_type ))
1338
1331
1339
1332
# general type inference and conversion
1340
1333
cvals , na_count = self ._infer_types (
1341
1334
values , set (col_na_values ) | col_na_fvalues ,
1342
1335
try_num_bool )
1343
1336
1337
+ # type specificed in dtype param
1338
+ if cast_type and not is_dtype_equal (cvals , cast_type ):
1339
+ cvals = self ._cast_types (cvals , cast_type , c )
1340
+
1344
1341
if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1345
1342
cvals = lib .downcast_int64 (
1346
1343
cvals , _parser .na_values ,
1347
1344
self .use_unsigned )
1348
1345
1349
- if cast_type and not is_dtype_equal (cvals , cast_type ):
1350
- # type specificed in dtype param
1351
-
1352
- cvals = self ._cast_types (cvals , cast_type , c )
1353
- # for consistency with c-parser, if a converter and dtype are
1354
- # specified, apply the converter last
1355
- if conv_f is not None :
1356
- values , na_count = self ._apply_converter (
1357
- values , conv_f , na_values ,
1358
- col_na_values , col_na_fvalues )
1359
-
1360
1346
result [c ] = cvals
1361
1347
if verbose and na_count :
1362
1348
print ('Filled %d NA values in column %s' % (na_count , str (c )))
1363
1349
return result
1364
1350
1365
1351
def _infer_types (self , values , na_values , try_num_bool = True ):
1352
+ """
1353
+ Infer types of values, possibly casting
1354
+
1355
+ Parameters
1356
+ ----------
1357
+ values : ndarray
1358
+ na_values : set
1359
+ try_num_bool : bool, default try
1360
+ try to cast values to numeric (first preference) or boolean
1361
+
1362
+ Returns:
1363
+ --------
1364
+ converted : ndarray
1365
+ na_count : int
1366
+ """
1367
+
1366
1368
na_count = 0
1367
1369
if issubclass (values .dtype .type , (np .number , np .bool_ )):
1368
1370
mask = lib .ismember (values , na_values )
@@ -1394,7 +1396,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
1394
1396
return result , na_count
1395
1397
1396
1398
def _cast_types (self , values , cast_type , column ):
1397
- """ cast column to type specified in dtypes= param """
1399
+ """
1400
+ Cast values to specified type
1401
+
1402
+ Parameters
1403
+ ----------
1404
+ values : ndarray
1405
+ cast_type : string or np.dtype
1406
+ dtype to cast values to
1407
+ column : string
1408
+ column name - used only for error reporting
1409
+
1410
+ Returns
1411
+ -------
1412
+ converted : ndarray
1413
+ """
1414
+
1398
1415
if is_categorical_dtype (cast_type ):
1399
1416
# XXX this is for consistency with
1400
1417
# c-parser which parses all categories
0 commit comments