Skip to content

Commit c8bb881

Browse files
committed
Skiprows callable for C engine
[ci skip]
1 parent b2ffe2d commit c8bb881

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

pandas/parser.pyx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ cdef extern from "parser/tokenizer.h":
178178
int header_end # header row end
179179

180180
void *skipset
181+
PyObject *skipfunc
181182
int64_t skip_first_N_rows
182183
int skipfooter
183184
double (*converter)(const char *, char **, char, char, char, int) nogil
@@ -606,9 +607,11 @@ cdef class TextReader:
606607
cdef _make_skiprow_set(self):
607608
if isinstance(self.skiprows, (int, np.integer)):
608609
parser_set_skipfirstnrows(self.parser, self.skiprows)
609-
else:
610+
elif not callable(self.skiprows):
610611
for i in self.skiprows:
611612
parser_add_skiprow(self.parser, i)
613+
else:
614+
self.parser.skipfunc = <PyObject *> self.skiprows
612615

613616
cdef _setup_parser_source(self, source):
614617
cdef:

pandas/src/parser/tokenizer.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ void parser_set_default_options(parser_t *self) {
124124
self->thousands = '\0';
125125

126126
self->skipset = NULL;
127+
self->skipfunc = NULL;
127128
self->skip_first_N_rows = -1;
128129
self->skip_footer = 0;
129130
}
@@ -679,7 +680,9 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
679680
}
680681

681682
int skip_this_line(parser_t *self, int64_t rownum) {
682-
if (self->skipset != NULL) {
683+
if (self->skipfunc != NULL) {
684+
return PyObject_IsTrue(PyObject_CallFunction(self->skipfunc, "i", rownum));
685+
} else if (self->skipset != NULL) {
683686
return (kh_get_int64((kh_int64_t *)self->skipset, self->file_lines) !=
684687
((kh_int64_t *)self->skipset)->n_buckets);
685688
} else {

pandas/src/parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ typedef struct parser_t {
198198
int header_end; // header row end
199199

200200
void *skipset;
201+
PyObject *skipfunc;
201202
int64_t skip_first_N_rows;
202203
int skip_footer;
203204
double (*converter)(const char *, char **, char, char, char, int);

0 commit comments

Comments
 (0)