Skip to content

Commit 7e4d704

Browse files
committed
REF: fix c-parser according to api decisions in master
1 parent 503ccf9 commit 7e4d704

File tree

3 files changed

+36
-32
lines changed

3 files changed

+36
-32
lines changed

pandas/io/parsers.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,26 +1123,15 @@ def read(self, rows=None):
11231123

11241124
def _convert_data(self, data):
11251125
# apply converters
1126-
converted = set()
1126+
clean_conv = {}
1127+
11271128
for col, f in self.converters.iteritems():
11281129
if isinstance(col, int) and col not in self.orig_names:
11291130
col = self.orig_names[col]
1130-
data[col] = lib.map_infer(data[col], f)
1131-
converted.add(col)
1132-
1133-
# do type conversions
1134-
result = {}
1135-
for c, values in data.iteritems():
1136-
if c in converted:
1137-
result[c] = values
1138-
1139-
col_na_values = _get_na_values(c, self.na_values)
1140-
cvals, na_count = _convert_types(values, col_na_values)
1141-
result[c] = cvals
1142-
if self.verbose and na_count:
1143-
print 'Filled %d NA values in column %s' % (na_count, str(c))
1144-
1145-
return result
1131+
clean_conv[col] = f
1132+
1133+
return _convert_to_ndarrays(data, self.na_values, self.verbose,
1134+
clean_conv)
11461135

11471136
def _infer_columns(self):
11481137
names = self.names

pandas/io/tests/test_parsers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1120,7 +1120,7 @@ def test_converters_no_implicit_conv(self):
11201120
data = """000102,1.2,A\n001245,2,B"""
11211121
f = lambda x: x.strip()
11221122
converter = {0: f}
1123-
df = read_csv(StringIO(data), header=None, converters=converter)
1123+
df = self.read_csv(StringIO(data), header=None, converters=converter)
11241124
self.assert_(df.X0.dtype == object)
11251125

11261126
def test_converters_euro_decimal_format(self):

pandas/src/parser.pyx

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from libc.stdio cimport fopen, fclose
55
from libc.stdlib cimport malloc, free
6-
from libc.string cimport strncpy, strlen
6+
from libc.string cimport strncpy, strlen, strcmp
77
cimport libc.stdio as stdio
88

99
from cpython cimport (PyObject, PyBytes_FromString,
@@ -44,6 +44,9 @@ import sys
4444

4545
cdef bint PY3 = (sys.version_info[0] >= 3)
4646

47+
cdef double INF = <double> np.inf
48+
cdef double NEGINF = -INF
49+
4750
cdef extern from "stdint.h":
4851
enum: UINT8_MAX
4952
enum: UINT16_MAX
@@ -458,7 +461,7 @@ cdef class TextReader:
458461

459462
if self.memory_map:
460463
ptr = new_mmap(source)
461-
if ptr == NULL:
464+
if ptr == NULL:
462465
# fall back
463466
ptr = new_file_source(source, self.parser.chunksize)
464467
self.parser.cb_io = &buffer_file_bytes
@@ -1152,6 +1155,8 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
11521155

11531156
return result
11541157

1158+
cdef char* cinf = b'inf'
1159+
cdef char* cneginf = b'-inf'
11551160

11561161
cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
11571162
bint na_filter, kh_str_t *na_hashset):
@@ -1182,14 +1187,24 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
11821187
else:
11831188
error = to_double(word, data, parser.sci, parser.decimal)
11841189
if error != 1:
1185-
return None, None
1190+
if strcmp(word, cinf) == 0:
1191+
data[0] = INF
1192+
elif strcmp(word, cneginf) == 0:
1193+
data[0] = NEGINF
1194+
else:
1195+
return None, None
11861196
data += 1
11871197
else:
11881198
for i in range(lines):
11891199
word = COLITER_NEXT(it)
11901200
error = to_double(word, data, parser.sci, parser.decimal)
11911201
if error != 1:
1192-
return None, None
1202+
if strcmp(word, cinf) == 0:
1203+
data[0] = INF
1204+
elif strcmp(word, cneginf) == 0:
1205+
data[0] = NEGINF
1206+
else:
1207+
return None, None
11931208
data += 1
11941209

11951210
return result, na_count
@@ -1492,18 +1507,18 @@ cdef _apply_converter(object f, parser_t *parser, int col,
14921507
c_encoding, errors)
14931508
result[i] = f(val)
14941509

1495-
values = lib.maybe_convert_objects(result)
1510+
return lib.maybe_convert_objects(result)
14961511

1497-
if issubclass(values.dtype.type, (np.number, np.bool_)):
1498-
return values
1512+
# if issubclass(values.dtype.type, (np.number, np.bool_)):
1513+
# return values
14991514

1500-
# XXX
1501-
na_values = set([''])
1502-
try:
1503-
return lib.maybe_convert_numeric(values, na_values, False)
1504-
except Exception:
1505-
na_count = lib.sanitize_objects(values, na_values, False)
1506-
return result
1515+
# # XXX
1516+
# na_values = set([''])
1517+
# try:
1518+
# return lib.maybe_convert_numeric(values, na_values, False)
1519+
# except Exception:
1520+
# na_count = lib.sanitize_objects(values, na_values, False)
1521+
# return result
15071522

15081523
def _to_structured_array(dict columns, object names):
15091524
cdef:

0 commit comments

Comments
 (0)