@@ -34,6 +34,7 @@ cimport numpy as cnp
34
34
from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t
35
35
cnp.import_array()
36
36
37
+ cimport pandas._libs.util as util
37
38
from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN
38
39
import pandas._libs.lib as lib
39
40
@@ -279,73 +280,61 @@ cdef class TextReader:
279
280
280
281
cdef public:
281
282
int64_t leading_cols, table_width, skipfooter, buffer_lines
282
- object allow_leading_cols
283
- object delimiter, converters, delim_whitespace
283
+ bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory
284
+ bint delim_whitespace
285
+ object delimiter, converters
284
286
object na_values
285
- object memory_map
286
287
object header, orig_header, names, header_start, header_end
287
288
object index_col
288
- object low_memory
289
289
object skiprows
290
290
object dtype
291
291
object encoding
292
292
object compression
293
- object mangle_dupe_cols
294
293
object usecols
295
294
list dtype_cast_order
296
295
set unnamed_cols
297
296
set noconvert
298
297
299
298
def __cinit__ (self , source ,
300
299
delimiter = b' ,' ,
301
-
302
300
header = 0 ,
303
301
header_start = 0 ,
304
302
header_end = 0 ,
305
303
index_col = None ,
306
304
names = None ,
307
-
308
- memory_map = False ,
305
+ bint memory_map = False ,
309
306
tokenize_chunksize = DEFAULT_CHUNKSIZE,
310
- delim_whitespace = False ,
311
-
307
+ bint delim_whitespace = False ,
312
308
compression = None ,
313
-
314
309
converters = None ,
315
-
316
- skipinitialspace = False ,
310
+ bint skipinitialspace = False ,
317
311
escapechar = None ,
318
- doublequote = True ,
312
+ bint doublequote = True ,
319
313
quotechar = b' "' ,
320
314
quoting = 0 ,
321
315
lineterminator = None ,
322
-
323
316
encoding = None ,
324
-
325
317
comment = None ,
326
318
decimal = b' .' ,
327
319
thousands = None ,
328
-
329
320
dtype = None ,
330
321
usecols = None ,
331
- error_bad_lines = True ,
332
- warn_bad_lines = True ,
333
-
334
- na_filter = True ,
322
+ bint error_bad_lines = True ,
323
+ bint warn_bad_lines = True ,
324
+ bint na_filter = True ,
335
325
na_values = None ,
336
326
na_fvalues = None ,
337
- keep_default_na = True ,
338
-
327
+ bint keep_default_na = True ,
339
328
true_values = None ,
340
329
false_values = None ,
341
- allow_leading_cols = True ,
342
- low_memory = False ,
330
+ bint allow_leading_cols = True ,
331
+ bint low_memory = False ,
343
332
skiprows = None ,
344
333
skipfooter = 0 ,
345
- verbose = False ,
346
- mangle_dupe_cols = True ,
334
+ bint verbose = False ,
335
+ bint mangle_dupe_cols = True ,
347
336
float_precision = None ,
348
- skip_blank_lines = True ):
337
+ bint skip_blank_lines = True ):
349
338
350
339
# set encoding for native Python and C library
351
340
if encoding is not None :
@@ -591,7 +580,7 @@ cdef class TextReader:
591
580
self .parser.quotechar = ord (quote_char)
592
581
593
582
cdef _make_skiprow_set(self ):
594
- if isinstance (self .skiprows, ( int , np.integer) ):
583
+ if util.is_integer_object (self .skiprows):
595
584
parser_set_skipfirstnrows(self .parser, self .skiprows)
596
585
elif not callable (self .skiprows):
597
586
for i in self .skiprows:
@@ -683,15 +672,14 @@ cdef class TextReader:
683
672
# header is now a list of lists, so field_count should use header[0]
684
673
685
674
cdef:
686
- Py_ssize_t i, start, field_count, passed_count, unnamed_count
675
+ Py_ssize_t i, start, field_count, passed_count, unnamed_count, level
687
676
char * word
688
677
object name, old_name
689
678
uint64_t hr, data_line = 0
690
679
char * errors = " strict"
691
680
StringPath path = _string_path(self .c_encoding)
692
-
693
- header = []
694
- unnamed_cols = set ()
681
+ list header = []
682
+ set unnamed_cols = set ()
695
683
696
684
if self .parser.header_start >= 0 :
697
685
@@ -847,7 +835,7 @@ cdef class TextReader:
847
835
cdef _read_low_memory(self , rows):
848
836
cdef:
849
837
size_t rows_read = 0
850
- chunks = []
838
+ list chunks = []
851
839
852
840
if rows is None :
853
841
while True :
@@ -2038,12 +2026,11 @@ def _concatenate_chunks(list chunks):
2038
2026
cdef:
2039
2027
list names = list (chunks[0 ].keys())
2040
2028
object name
2041
- list warning_columns
2029
+ list warning_columns = []
2042
2030
object warning_names
2043
2031
object common_type
2044
2032
2045
2033
result = {}
2046
- warning_columns = list ()
2047
2034
for name in names:
2048
2035
arrs = [chunk.pop(name) for chunk in chunks]
2049
2036
# Check each arr for consistent types.
@@ -2147,7 +2134,7 @@ def _maybe_encode(values):
2147
2134
2148
2135
2149
2136
def sanitize_objects (ndarray[object] values , set na_values ,
2150
- convert_empty = True ):
2137
+ bint convert_empty = True ):
2151
2138
"""
2152
2139
Convert specified values, including the given set na_values and empty
2153
2140
strings if convert_empty is True, to np.nan.
@@ -2156,7 +2143,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
2156
2143
----------
2157
2144
values : ndarray[object]
2158
2145
na_values : set
2159
- convert_empty : bool ( default True)
2146
+ convert_empty : bool, default True
2160
2147
"""
2161
2148
cdef:
2162
2149
Py_ssize_t i, n
0 commit comments