Skip to content

Commit 128995c

Browse files
committed
BUG: do not coerce types in parser if converters specified #2184
1 parent 915162e commit 128995c

File tree

2 files changed

+33
-14
lines changed

2 files changed

+33
-14
lines changed

pandas/io/parsers.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -847,13 +847,15 @@ def get_chunk(self, rows=None):
847847
if self.parse_dates is not None:
848848
data, columns = self._process_date_conversion(data)
849849

850-
# apply converters
850+
#converters to column names
851+
clean_conv = {}
851852
for col, f in self.converters.iteritems():
852853
if isinstance(col, int) and col not in self.orig_columns:
853854
col = self.orig_columns[col]
854-
data[col] = lib.map_infer(data[col], f)
855+
clean_conv[col] = f
855856

856-
data = _convert_to_ndarrays(data, self.na_values, self.verbose)
857+
data = _convert_to_ndarrays(data, self.na_values, self.verbose,
858+
clean_conv)
857859

858860
if self.index_col is None:
859861
numrows = len(content)
@@ -1141,18 +1143,23 @@ def _get_na_values(col, na_values):
11411143
return na_values
11421144

11431145

1144-
def _convert_to_ndarrays(dct, na_values, verbose=False):
1146+
def _convert_to_ndarrays(dct, na_values, verbose=False, converters=None):
11451147
result = {}
11461148
for c, values in dct.iteritems():
1149+
conv_f = None if converters is None else converters.get(c, None)
11471150
col_na_values = _get_na_values(c, na_values)
1148-
cvals, na_count = _convert_types(values, col_na_values)
1151+
coerce_type = True
1152+
if conv_f is not None:
1153+
values = lib.map_infer(values, conv_f)
1154+
coerce_type = False
1155+
cvals, na_count = _convert_types(values, col_na_values, coerce_type)
11491156
result[c] = cvals
11501157
if verbose and na_count:
11511158
print 'Filled %d NA values in column %s' % (na_count, str(c))
11521159
return result
11531160

11541161

1155-
def _convert_types(values, na_values):
1162+
def _convert_types(values, na_values, try_num_bool=True):
11561163
na_count = 0
11571164
if issubclass(values.dtype.type, (np.number, np.bool_)):
11581165
mask = lib.ismember(values, na_values)
@@ -1163,13 +1170,17 @@ def _convert_types(values, na_values):
11631170
np.putmask(values, mask, np.nan)
11641171
return values, na_count
11651172

1166-
try:
1167-
result = lib.maybe_convert_numeric(values, na_values, False)
1168-
except Exception:
1173+
if try_num_bool:
1174+
try:
1175+
result = lib.maybe_convert_numeric(values, na_values, False)
1176+
except Exception:
1177+
na_count = lib.sanitize_objects(values, na_values, False)
1178+
result = values
1179+
else:
11691180
na_count = lib.sanitize_objects(values, na_values, False)
11701181
result = values
11711182

1172-
if result.dtype == np.object_:
1183+
if result.dtype == np.object_ and try_num_bool:
11731184
result = lib.maybe_convert_bool(values)
11741185

11751186
return result, na_count
@@ -1352,7 +1363,7 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
13521363
skip_footer=skip_footer)
13531364

13541365
def _should_parse(self, i, parse_cols):
1355-
1366+
13561367
def _range2cols(areas):
13571368
"""
13581369
Convert comma separated list of column names and column ranges to a
@@ -1363,10 +1374,10 @@ def _range2cols(areas):
13631374
>>> _range2cols('A,C,Z:AB')
13641375
[0, 2, 25, 26, 27]
13651376
"""
1366-
def _excel2num(x):
1377+
def _excel2num(x):
13671378
"Convert Excel column name like 'AB' to 0-based column index"
13681379
return reduce(lambda s,a: s*26+ord(a)-ord('A')+1, x.upper().strip(), 0)-1
1369-
1380+
13701381
cols = []
13711382
for rng in areas.split(','):
13721383
if ':' in rng:

pandas/io/tests/test_parsers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,14 @@ def test_converters(self):
12591259
expected['D'] = expected['D'].map(converter)
12601260
assert_frame_equal(result, expected)
12611261

1262+
def test_converters_no_implicit_conv(self):
1263+
#GH2184
1264+
data = """000102,1.2,A\n001245,2,B"""
1265+
f = lambda x: x.strip()
1266+
converter = {0: f}
1267+
df = read_csv(StringIO(data), header=None, converters=converter)
1268+
self.assert_(df.X0.dtype == object)
1269+
12621270
def test_converters_euro_decimal_format(self):
12631271
data = """Id;Number1;Number2;Text1;Text2;Number3
12641272
1;1521,1541;187101,9543;ABC;poi;4,738797819
@@ -1277,7 +1285,7 @@ def test_converter_return_string_bug(self):
12771285
1;1521,1541;187101,9543;ABC;poi;4,738797819
12781286
2;121,12;14897,76;DEF;uyt;0,377320872
12791287
3;878,158;108013,434;GHI;rez;2,735694704"""
1280-
f = lambda x : x.replace(",", ".")
1288+
f = lambda x : float(x.replace(",", "."))
12811289
converter = {'Number1':f,'Number2':f, 'Number3':f}
12821290
df2 = read_csv(StringIO(data), sep=';',converters=converter)
12831291
self.assert_(df2['Number1'].dtype == float)

0 commit comments

Comments
 (0)