Skip to content

CLN: Added static types #33126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 3, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 25 additions & 38 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ cimport numpy as cnp
from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t
cnp.import_array()

cimport pandas._libs.util as util
from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN
import pandas._libs.lib as lib

Expand Down Expand Up @@ -279,73 +280,61 @@ cdef class TextReader:

cdef public:
int64_t leading_cols, table_width, skipfooter, buffer_lines
object allow_leading_cols
object delimiter, converters, delim_whitespace
bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory
bint delim_whitespace
object delimiter, converters
object na_values
object memory_map
object header, orig_header, names, header_start, header_end
object index_col
object low_memory
object skiprows
object dtype
object encoding
object compression
object mangle_dupe_cols
object usecols
list dtype_cast_order
set unnamed_cols
set noconvert

def __cinit__(self, source,
delimiter=b',',

header=0,
header_start=0,
header_end=0,
index_col=None,
names=None,

memory_map=False,
bint memory_map=False,
tokenize_chunksize=DEFAULT_CHUNKSIZE,
delim_whitespace=False,

bint delim_whitespace=False,
compression=None,

converters=None,

skipinitialspace=False,
bint skipinitialspace=False,
escapechar=None,
doublequote=True,
bint doublequote=True,
quotechar=b'"',
quoting=0,
lineterminator=None,

encoding=None,

comment=None,
decimal=b'.',
thousands=None,

dtype=None,
usecols=None,
error_bad_lines=True,
warn_bad_lines=True,

na_filter=True,
bint error_bad_lines=True,
bint warn_bad_lines=True,
bint na_filter=True,
na_values=None,
na_fvalues=None,
keep_default_na=True,

bint keep_default_na=True,
true_values=None,
false_values=None,
allow_leading_cols=True,
low_memory=False,
bint allow_leading_cols=True,
bint low_memory=False,
skiprows=None,
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
bint verbose=False,
bint mangle_dupe_cols=True,
float_precision=None,
skip_blank_lines=True):
bint skip_blank_lines=True):

# set encoding for native Python and C library
if encoding is not None:
Expand Down Expand Up @@ -591,7 +580,7 @@ cdef class TextReader:
self.parser.quotechar = ord(quote_char)

cdef _make_skiprow_set(self):
if isinstance(self.skiprows, (int, np.integer)):
if util.is_integer_object(self.skiprows):
parser_set_skipfirstnrows(self.parser, self.skiprows)
elif not callable(self.skiprows):
for i in self.skiprows:
Expand Down Expand Up @@ -683,15 +672,14 @@ cdef class TextReader:
# header is now a list of lists, so field_count should use header[0]

cdef:
Py_ssize_t i, start, field_count, passed_count, unnamed_count
Py_ssize_t i, start, field_count, passed_count, unnamed_count, level
char *word
object name, old_name
uint64_t hr, data_line = 0
char *errors = "strict"
StringPath path = _string_path(self.c_encoding)

header = []
unnamed_cols = set()
list header = []
set unnamed_cols = set()

if self.parser.header_start >= 0:

Expand Down Expand Up @@ -847,7 +835,7 @@ cdef class TextReader:
cdef _read_low_memory(self, rows):
cdef:
size_t rows_read = 0
chunks = []
list chunks = []

if rows is None:
while True:
Expand Down Expand Up @@ -2038,12 +2026,11 @@ def _concatenate_chunks(list chunks):
cdef:
list names = list(chunks[0].keys())
object name
list warning_columns
list warning_columns = []
object warning_names
object common_type

result = {}
warning_columns = list()
for name in names:
arrs = [chunk.pop(name) for chunk in chunks]
# Check each arr for consistent types.
Expand Down Expand Up @@ -2147,7 +2134,7 @@ def _maybe_encode(values):


def sanitize_objects(ndarray[object] values, set na_values,
convert_empty=True):
bint convert_empty=True):
"""
Convert specified values, including the given set na_values and empty
strings if convert_empty is True, to np.nan.
Expand All @@ -2156,7 +2143,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
----------
values : ndarray[object]
na_values : set
convert_empty : bool (default True)
convert_empty : bool, default True
"""
cdef:
Py_ssize_t i, n
Expand Down