@@ -12,8 +12,6 @@ from cpython cimport (PyObject, PyBytes_FromString,
12
12
13
13
14
14
cdef extern from " Python.h" :
15
- ctypedef struct FILE
16
-
17
15
object PyUnicode_FromString(char * v)
18
16
19
17
object PyUnicode_Decode(char * v, Py_ssize_t size, char * encoding,
@@ -78,9 +76,14 @@ cdef extern from "parser/parser.h":
78
76
EAT_WHITESPACE
79
77
FINISHED
80
78
79
+ ctypedef void * (* io_callback)(void * src, size_t nbytes, size_t * bytes_read,
80
+ int * status)
81
+ ctypedef int (* io_cleanup)(void * src)
82
+
81
83
ctypedef struct parser_t:
82
84
void * source
83
- char sourcetype # 'M' for mmap, 'F' for FILE, 'A' for array
85
+ io_callback cb_io
86
+ io_cleanup cb_cleanup
84
87
85
88
int chunksize # Number of bytes to prepare for each chunk
86
89
char * data # pointer to data to be processed
@@ -127,8 +130,6 @@ cdef extern from "parser/parser.h":
127
130
int error_bad_lines
128
131
int warn_bad_lines
129
132
130
- int infer_types
131
-
132
133
# floating point options
133
134
char decimal
134
135
char sci
@@ -160,10 +161,6 @@ cdef extern from "parser/parser.h":
160
161
161
162
void parser_set_default_options(parser_t * self )
162
163
163
- int parser_file_source_init(parser_t * self , FILE* fp)
164
- int parser_mmap_init(parser_t * self , FILE* fp)
165
- int parser_rd_source_init(parser_t * self , object source)
166
-
167
164
int parser_consume_rows(parser_t * self , size_t nrows)
168
165
169
166
int parser_trim_buffers(parser_t * self )
@@ -187,7 +184,28 @@ cdef extern from "parser/parser.h":
187
184
inline int to_boolean(char * item, uint8_t * val)
188
185
189
186
190
- DEFAULT_CHUNKSIZE = 1024 * 1024
187
+ cdef extern from " parser/io.h" :
188
+ void * new_mmap(char * fname)
189
+
190
+ void * new_file_source(char * fname, size_t buffer_size)
191
+
192
+ void * new_rd_source(object obj)
193
+
194
+ int del_file_source(void * src)
195
+ int del_mmap(void * src)
196
+ int del_rd_source(void * src)
197
+
198
+ void * buffer_file_bytes(void * source, size_t nbytes,
199
+ size_t * bytes_read, int * status)
200
+
201
+ void * buffer_rd_bytes(void * source, size_t nbytes,
202
+ size_t * bytes_read, int * status)
203
+
204
+ void * buffer_mmap_bytes(void * source, size_t nbytes,
205
+ size_t * bytes_read, int * status)
206
+
207
+
208
+ DEFAULT_CHUNKSIZE = 256 * 1024
191
209
192
210
# common NA values
193
211
# no longer excluding inf representations
@@ -206,12 +224,11 @@ cdef class TextReader:
206
224
207
225
cdef:
208
226
parser_t * parser
209
- object file_handle, should_close
227
+ object file_handle
210
228
bint factorize, na_filter, verbose, has_usecols
211
229
int parser_start
212
230
list clocks
213
231
char * c_encoding
214
- FILE * fp
215
232
216
233
cdef public:
217
234
int leading_cols, table_width, skip_footer, buffer_lines
@@ -330,8 +347,6 @@ cdef class TextReader:
330
347
self .parser.error_bad_lines = 0
331
348
self .parser.warn_bad_lines = 0
332
349
333
- self .should_close = False
334
-
335
350
self .delimiter = delimiter
336
351
self .delim_whitespace = delim_whitespace
337
352
@@ -406,10 +421,6 @@ cdef class TextReader:
406
421
def __dealloc__ (self ):
407
422
parser_free(self .parser)
408
423
409
- def __del__ (self ):
410
- if self .should_close:
411
- fclose(self .fp)
412
-
413
424
def set_error_bad_lines (self , int status ):
414
425
self .parser.error_bad_lines = status
415
426
@@ -423,31 +434,37 @@ cdef class TextReader:
423
434
cdef _setup_parser_source(self , source):
424
435
cdef:
425
436
int status
426
-
427
- self .fp = NULL
437
+ void * ptr
428
438
429
439
if isinstance (source, basestring ):
430
440
if not isinstance (source, bytes):
431
441
source = source.encode(' utf-8' )
432
442
433
- self .should_close = True
434
- self .fp = fopen(source, b' rb' )
435
- stdio.setbuf(self .fp, NULL )
436
-
437
443
if self .memory_map:
438
- status = parser_mmap_init(self .parser, self .fp)
444
+ ptr = new_mmap(source)
445
+ self .parser.cb_io = & buffer_mmap_bytes
446
+ self .parser.cb_cleanup = & del_mmap
439
447
else :
440
- status = parser_file_source_init(self .parser, self .fp)
448
+ ptr = new_file_source(source, self .parser.chunksize)
449
+ self .parser.cb_io = & buffer_file_bytes
450
+ self .parser.cb_cleanup = & del_file_source
441
451
442
- if status ! = 0 :
452
+ if ptr == NULL :
443
453
raise Exception (' Initializing from file failed' )
454
+
455
+ self .parser.source = ptr
456
+
444
457
elif hasattr (source, ' read' ):
445
458
# e.g., StringIO
446
459
447
- status = parser_rd_source_init( self .parser, source)
448
- if status ! = 0 :
460
+ ptr = new_rd_source( source)
461
+ if ptr == NULL :
449
462
raise Exception (' Initializing parser from file-like '
450
463
' object failed' )
464
+
465
+ self .parser.source = ptr
466
+ self .parser.cb_io = & buffer_rd_bytes
467
+ self .parser.cb_cleanup = & del_rd_source
451
468
else :
452
469
raise Exception (' Expected file path name or file-like object,'
453
470
' got %s type' % type (source))
@@ -1185,14 +1202,14 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1185
1202
continue
1186
1203
1187
1204
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
1188
- & error, parser.thousands);
1205
+ & error, parser.thousands)
1189
1206
if error != 0 :
1190
1207
return None , None
1191
1208
else :
1192
1209
for i in range (lines):
1193
1210
word = COLITER_NEXT(it)
1194
1211
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
1195
- & error, parser.thousands);
1212
+ & error, parser.thousands)
1196
1213
if error != 0 :
1197
1214
return None , None
1198
1215
0 commit comments