diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8187fb508291a..e4aeb7ad69792 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -34,6 +34,7 @@ cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t cnp.import_array() +cimport pandas._libs.util as util from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN import pandas._libs.lib as lib @@ -279,18 +280,16 @@ cdef class TextReader: cdef public: int64_t leading_cols, table_width, skipfooter, buffer_lines - object allow_leading_cols - object delimiter, converters, delim_whitespace + bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory + bint delim_whitespace + object delimiter, converters object na_values - object memory_map object header, orig_header, names, header_start, header_end object index_col - object low_memory object skiprows object dtype object encoding object compression - object mangle_dupe_cols object usecols list dtype_cast_order set unnamed_cols @@ -298,54 +297,44 @@ cdef class TextReader: def __cinit__(self, source, delimiter=b',', - header=0, header_start=0, header_end=0, index_col=None, names=None, - - memory_map=False, + bint memory_map=False, tokenize_chunksize=DEFAULT_CHUNKSIZE, - delim_whitespace=False, - + bint delim_whitespace=False, compression=None, - converters=None, - - skipinitialspace=False, + bint skipinitialspace=False, escapechar=None, - doublequote=True, + bint doublequote=True, quotechar=b'"', quoting=0, lineterminator=None, - encoding=None, - comment=None, decimal=b'.', thousands=None, - dtype=None, usecols=None, - error_bad_lines=True, - warn_bad_lines=True, - - na_filter=True, + bint error_bad_lines=True, + bint warn_bad_lines=True, + bint na_filter=True, na_values=None, na_fvalues=None, - keep_default_na=True, - + bint keep_default_na=True, true_values=None, false_values=None, - allow_leading_cols=True, - low_memory=False, + bint allow_leading_cols=True, + bint low_memory=False, skiprows=None, skipfooter=0, - verbose=False, - mangle_dupe_cols=True, + bint verbose=False, + bint mangle_dupe_cols=True, float_precision=None, - skip_blank_lines=True): + bint skip_blank_lines=True): # set encoding for native Python and C library if encoding is not None: @@ -591,7 +580,7 @@ cdef class TextReader: self.parser.quotechar = ord(quote_char) cdef _make_skiprow_set(self): - if isinstance(self.skiprows, (int, np.integer)): + if util.is_integer_object(self.skiprows): parser_set_skipfirstnrows(self.parser, self.skiprows) elif not callable(self.skiprows): for i in self.skiprows: @@ -683,15 +672,14 @@ cdef class TextReader: # header is now a list of lists, so field_count should use header[0] cdef: - Py_ssize_t i, start, field_count, passed_count, unnamed_count + Py_ssize_t i, start, field_count, passed_count, unnamed_count, level char *word object name, old_name uint64_t hr, data_line = 0 char *errors = "strict" StringPath path = _string_path(self.c_encoding) - - header = [] - unnamed_cols = set() + list header = [] + set unnamed_cols = set() if self.parser.header_start >= 0: @@ -847,7 +835,7 @@ cdef class TextReader: cdef _read_low_memory(self, rows): cdef: size_t rows_read = 0 - chunks = [] + list chunks = [] if rows is None: while True: @@ -2038,12 +2026,11 @@ def _concatenate_chunks(list chunks): cdef: list names = list(chunks[0].keys()) object name - list warning_columns + list warning_columns = [] object warning_names object common_type result = {} - warning_columns = list() for name in names: arrs = [chunk.pop(name) for chunk in chunks] # Check each arr for consistent types. @@ -2147,7 +2134,7 @@ def _maybe_encode(values): def sanitize_objects(ndarray[object] values, set na_values, - convert_empty=True): + bint convert_empty=True): """ Convert specified values, including the given set na_values and empty strings if convert_empty is True, to np.nan. @@ -2156,7 +2143,7 @@ def sanitize_objects(ndarray[object] values, set na_values, ---------- values : ndarray[object] na_values : set - convert_empty : bool (default True) + convert_empty : bool, default True """ cdef: Py_ssize_t i, n