Skip to content

Commit 268ee21

Browse files
wesmunknown
authored and
unknown
committed
BLD: c-parser branch fixes for win32 platforms
1 parent 0f5fb55 commit 268ee21

File tree

5 files changed

+151
-105
lines changed

5 files changed

+151
-105
lines changed

pandas/io/tests/test_parsers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1735,6 +1735,10 @@ def test_decompression(self):
17351735
os.remove('__tmp__')
17361736
except:
17371737
pass
1738+
1739+
def test_memory_map(self):
1740+
# it works!
1741+
result = self.read_csv(self.csv1, memory_map=True)
17381742

17391743

17401744
class TestParseSQL(unittest.TestCase):

pandas/src/parser.pyx

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ cdef extern from "stdint.h":
4848
enum: UINT8_MAX
4949
enum: UINT16_MAX
5050
enum: UINT32_MAX
51+
enum: UINT64_MAX
5152
enum: INT8_MIN
5253
enum: INT8_MAX
5354
enum: INT16_MIN
@@ -186,13 +187,15 @@ cdef extern from "parser/parser.h":
186187

187188
cdef extern from "parser/io.h":
188189
void *new_mmap(char *fname)
190+
int del_mmap(void *src)
191+
void* buffer_mmap_bytes(void *source, size_t nbytes,
192+
size_t *bytes_read, int *status)
189193

190194
void *new_file_source(char *fname, size_t buffer_size)
191195

192196
void *new_rd_source(object obj)
193197

194198
int del_file_source(void *src)
195-
int del_mmap(void *src)
196199
int del_rd_source(void *src)
197200

198201
void* buffer_file_bytes(void *source, size_t nbytes,
@@ -201,9 +204,6 @@ cdef extern from "parser/io.h":
201204
void* buffer_rd_bytes(void *source, size_t nbytes,
202205
size_t *bytes_read, int *status)
203206

204-
void* buffer_mmap_bytes(void *source, size_t nbytes,
205-
size_t *bytes_read, int *status)
206-
207207

208208
DEFAULT_CHUNKSIZE = 256 * 1024
209209

@@ -294,6 +294,8 @@ cdef class TextReader:
294294
self.clocks = []
295295

296296
self.compression = compression
297+
self.memory_map = memory_map
298+
297299
self._setup_parser_source(source)
298300
parser_set_default_options(self.parser)
299301

@@ -354,7 +356,6 @@ cdef class TextReader:
354356
self.delimiter = delimiter
355357
self.delim_whitespace = delim_whitespace
356358

357-
self.memory_map = memory_map
358359
self.na_values = na_values
359360
self.converters = converters
360361

@@ -457,8 +458,14 @@ cdef class TextReader:
457458

458459
if self.memory_map:
459460
ptr = new_mmap(source)
460-
self.parser.cb_io = &buffer_mmap_bytes
461-
self.parser.cb_cleanup = &del_mmap
461+
if ptr == NULL:
462+
# fall back
463+
ptr = new_file_source(source, self.parser.chunksize)
464+
self.parser.cb_io = &buffer_file_bytes
465+
self.parser.cb_cleanup = &del_file_source
466+
else:
467+
self.parser.cb_io = &buffer_mmap_bytes
468+
self.parser.cb_cleanup = &del_mmap
462469
else:
463470
ptr = new_file_source(source, self.parser.chunksize)
464471
self.parser.cb_io = &buffer_file_bytes
@@ -1436,6 +1443,12 @@ na_values = {
14361443
np.float64 : np.nan,
14371444
np.int64 : INT64_MIN,
14381445
np.int32 : INT32_MIN,
1446+
np.int16 : INT16_MIN,
1447+
np.int8 : INT8_MIN,
1448+
np.uint64 : UINT64_MAX,
1449+
np.uint32 : UINT32_MAX,
1450+
np.uint16 : UINT16_MAX,
1451+
np.uint8 : UINT8_MAX,
14391452
np.bool_ : UINT8_MAX,
14401453
np.object_ : np.nan # oof
14411454
}

pandas/src/parser/io.c

Lines changed: 116 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -38,49 +38,6 @@ void *new_file_source(char *fname, size_t buffer_size) {
3838
* buffer_size is ignored.
3939
*/
4040

41-
void *new_mmap(char *fname)
42-
{
43-
struct stat buf;
44-
int fd;
45-
memory_map *mm;
46-
/* off_t position; */
47-
off_t filesize;
48-
49-
mm = (memory_map *) malloc(sizeof(memory_map));
50-
mm->fp = fopen(fname, "rb");
51-
52-
fd = fileno(mm->fp);
53-
if (fstat(fd, &buf) == -1) {
54-
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno);
55-
return NULL;
56-
}
57-
filesize = buf.st_size; /* XXX This might be 32 bits. */
58-
59-
60-
if (mm == NULL) {
61-
/* XXX Eventually remove this print statement. */
62-
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
63-
return NULL;
64-
}
65-
mm->size = (off_t) filesize;
66-
mm->line_number = 0;
67-
68-
mm->fileno = fd;
69-
mm->position = ftell(mm->fp);
70-
mm->last_pos = (off_t) filesize;
71-
72-
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
73-
if (mm->memmap == NULL) {
74-
/* XXX Eventually remove this print statement. */
75-
fprintf(stderr, "new_file_buffer: mmap() failed.\n");
76-
free(mm);
77-
mm = NULL;
78-
}
79-
80-
return (void*) mm;
81-
}
82-
83-
8441

8542
void* new_rd_source(PyObject *obj) {
8643
rd_source *rds = (rd_source *) malloc(sizeof(rd_source));
@@ -119,59 +76,13 @@ int del_rd_source(void *rds) {
11976
return 0;
12077
}
12178

122-
int del_mmap(void *src)
123-
{
124-
munmap(MM(src)->memmap, MM(src)->size);
125-
126-
fclose(MM(src)->fp);
127-
128-
/*
129-
* With a memory mapped file, there is no need to do
130-
* anything if restore == RESTORE_INITIAL.
131-
*/
132-
/* if (restore == RESTORE_FINAL) { */
133-
/* fseek(FB(fb)->file, FB(fb)->current_pos, SEEK_SET); */
134-
/* } */
135-
free(src);
136-
137-
return 0;
138-
}
139-
14079
/*
14180
14281
IO callbacks
14382
14483
*/
14584

14685

147-
void* buffer_mmap_bytes(void *source, size_t nbytes,
148-
size_t *bytes_read, int *status) {
149-
void *retval;
150-
memory_map *src = MM(source);
151-
152-
if (src->position == src->last_pos) {
153-
*bytes_read = 0;
154-
*status = REACHED_EOF;
155-
return NULL;
156-
}
157-
158-
retval = src->memmap + src->position;
159-
160-
if (src->position + nbytes > src->last_pos) {
161-
// fewer than nbytes remaining
162-
*bytes_read = src->last_pos - src->position;
163-
} else {
164-
*bytes_read = nbytes;
165-
}
166-
167-
*status = 0;
168-
169-
/* advance position in mmap data structure */
170-
src->position += *bytes_read;
171-
172-
return retval;
173-
}
174-
17586
void* buffer_file_bytes(void *source, size_t nbytes,
17687
size_t *bytes_read, int *status) {
17788
file_source *src = FS(source);
@@ -241,3 +152,119 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
241152

242153
return retval;
243154
}
155+
156+
157+
#ifdef HAVE_MMAP
158+
159+
#include <sys/stat.h>
160+
#include <sys/mman.h>
161+
162+
void *new_mmap(char *fname)
163+
{
164+
struct stat buf;
165+
int fd;
166+
memory_map *mm;
167+
/* off_t position; */
168+
off_t filesize;
169+
170+
mm = (memory_map *) malloc(sizeof(memory_map));
171+
mm->fp = fopen(fname, "rb");
172+
173+
fd = fileno(mm->fp);
174+
if (fstat(fd, &buf) == -1) {
175+
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno);
176+
return NULL;
177+
}
178+
filesize = buf.st_size; /* XXX This might be 32 bits. */
179+
180+
181+
if (mm == NULL) {
182+
/* XXX Eventually remove this print statement. */
183+
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
184+
return NULL;
185+
}
186+
mm->size = (off_t) filesize;
187+
mm->line_number = 0;
188+
189+
mm->fileno = fd;
190+
mm->position = ftell(mm->fp);
191+
mm->last_pos = (off_t) filesize;
192+
193+
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
194+
if (mm->memmap == NULL) {
195+
/* XXX Eventually remove this print statement. */
196+
fprintf(stderr, "new_file_buffer: mmap() failed.\n");
197+
free(mm);
198+
mm = NULL;
199+
}
200+
201+
return (void*) mm;
202+
}
203+
204+
205+
int del_mmap(void *src)
206+
{
207+
munmap(MM(src)->memmap, MM(src)->size);
208+
209+
fclose(MM(src)->fp);
210+
211+
/*
212+
* With a memory mapped file, there is no need to do
213+
* anything if restore == RESTORE_INITIAL.
214+
*/
215+
/* if (restore == RESTORE_FINAL) { */
216+
/* fseek(FB(fb)->file, FB(fb)->current_pos, SEEK_SET); */
217+
/* } */
218+
free(src);
219+
220+
return 0;
221+
}
222+
223+
void* buffer_mmap_bytes(void *source, size_t nbytes,
224+
size_t *bytes_read, int *status) {
225+
void *retval;
226+
memory_map *src = MM(source);
227+
228+
if (src->position == src->last_pos) {
229+
*bytes_read = 0;
230+
*status = REACHED_EOF;
231+
return NULL;
232+
}
233+
234+
retval = src->memmap + src->position;
235+
236+
if (src->position + nbytes > src->last_pos) {
237+
// fewer than nbytes remaining
238+
*bytes_read = src->last_pos - src->position;
239+
} else {
240+
*bytes_read = nbytes;
241+
}
242+
243+
*status = 0;
244+
245+
/* advance position in mmap data structure */
246+
src->position += *bytes_read;
247+
248+
return retval;
249+
}
250+
251+
#else
252+
253+
/* kludgy */
254+
255+
void *new_mmap(char *fname) {
256+
return NULL;
257+
}
258+
259+
int del_mmap(void *src) {
260+
return 0;
261+
}
262+
263+
/* don't use this! */
264+
265+
void* buffer_mmap_bytes(void *source, size_t nbytes,
266+
size_t *bytes_read, int *status) {
267+
return NULL;
268+
}
269+
270+
#endif

pandas/src/parser/io.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ typedef struct _file_source {
2929

3030
#define FS(source) ((file_source *)source)
3131

32-
33-
#include <sys/stat.h>
34-
#include <sys/mman.h>
32+
#if !defined(_WIN32)
33+
#define HAVE_MMAP
34+
#endif
3535

3636
typedef struct _memory_map {
3737

@@ -54,6 +54,13 @@ typedef struct _memory_map {
5454

5555
#define MM(src) ((memory_map*) src)
5656

57+
void *new_mmap(char *fname);
58+
59+
int del_mmap(void *src);
60+
61+
void* buffer_mmap_bytes(void *source, size_t nbytes,
62+
size_t *bytes_read, int *status);
63+
5764

5865
typedef struct _rd_source {
5966
PyObject* obj;
@@ -63,14 +70,11 @@ typedef struct _rd_source {
6370

6471
#define RDS(source) ((rd_source *)source)
6572

66-
void *new_mmap(char *fname);
67-
6873
void *new_file_source(char *fname, size_t buffer_size);
6974

7075
void *new_rd_source(PyObject *obj);
7176

7277
int del_file_source(void *src);
73-
int del_mmap(void *src);
7478
int del_rd_source(void *src);
7579

7680
void* buffer_file_bytes(void *source, size_t nbytes,
@@ -79,6 +83,3 @@ void* buffer_file_bytes(void *source, size_t nbytes,
7983
void* buffer_rd_bytes(void *source, size_t nbytes,
8084
size_t *bytes_read, int *status);
8185

82-
void* buffer_mmap_bytes(void *source, size_t nbytes,
83-
size_t *bytes_read, int *status);
84-

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def initialize_options(self):
259259
'np_datetime_strings.c',
260260
'period.c',
261261
'parser.c',
262+
'io.c',
262263
'str_to.c']
263264

264265
for root, dirs, files in list(os.walk('pandas')):

0 commit comments

Comments
 (0)