86
86
def read_csv (filepath_or_buffer , sep = ',' , header = 0 , index_col = None , names = None ,
87
87
skiprows = None , na_values = None , parse_dates = False ,
88
88
date_parser = None , nrows = None , iterator = False , chunksize = None ,
89
- skip_footer = 0 , converters = None , verbose = False , delimiter = None ):
89
+ skip_footer = 0 , converters = None , verbose = False , delimiter = None ,
90
+ encoding = None ):
90
91
if hasattr (filepath_or_buffer , 'read' ):
91
92
f = filepath_or_buffer
92
93
else :
@@ -111,7 +112,8 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
111
112
chunksize = chunksize ,
112
113
skip_footer = skip_footer ,
113
114
converters = converters ,
114
- verbose = verbose )
115
+ verbose = verbose ,
116
+ encoding = encoding )
115
117
116
118
if nrows is not None :
117
119
return parser .get_chunk (nrows )
@@ -124,14 +126,15 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
124
126
def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , index_col = None ,
125
127
names = None , skiprows = None , na_values = None , parse_dates = False ,
126
128
date_parser = None , nrows = None , iterator = False , chunksize = None ,
127
- skip_footer = 0 , converters = None , verbose = False , delimiter = None ):
129
+ skip_footer = 0 , converters = None , verbose = False , delimiter = None ,
130
+ encoding = None ):
128
131
return read_csv (filepath_or_buffer , sep = sep , header = header ,
129
132
skiprows = skiprows , index_col = index_col ,
130
133
na_values = na_values , date_parser = date_parser ,
131
134
names = names , parse_dates = parse_dates ,
132
135
nrows = nrows , iterator = iterator , chunksize = chunksize ,
133
136
skip_footer = skip_footer , converters = converters ,
134
- verbose = verbose , delimiter = delimiter )
137
+ verbose = verbose , delimiter = delimiter , encoding = None )
135
138
136
139
def read_clipboard (** kwargs ): # pragma: no cover
137
140
"""
@@ -194,7 +197,8 @@ class TextParser(object):
194
197
def __init__ (self , f , delimiter = None , names = None , header = 0 ,
195
198
index_col = None , na_values = None , parse_dates = False ,
196
199
date_parser = None , chunksize = None , skiprows = None ,
197
- skip_footer = 0 , converters = None , verbose = False ):
200
+ skip_footer = 0 , converters = None , verbose = False ,
201
+ encoding = None ):
198
202
"""
199
203
Workhorse function for processing nested list into DataFrame
200
204
@@ -210,6 +214,8 @@ def __init__(self, f, delimiter=None, names=None, header=0,
210
214
self .date_parser = date_parser
211
215
self .chunksize = chunksize
212
216
self .passed_names = names is not None
217
+ self .encoding = encoding
218
+
213
219
214
220
if com .is_integer (skiprows ):
215
221
skiprows = range (skiprows )
@@ -261,9 +267,20 @@ def _make_reader(self, f):
261
267
self .pos += 1
262
268
sniffed = csv .Sniffer ().sniff (line )
263
269
dia .delimiter = sniffed .delimiter
264
- self .buf .extend (list (com .UnicodeReader (StringIO (line ),
265
- dialect = dia )))
266
- reader = com .UnicodeReader (f , dialect = dia )
270
+ if self .encoding is not None :
271
+ self .buf .extend (list (
272
+ com .UnicodeReader (StringIO (line ),
273
+ dialect = dia ,
274
+ encoding = self .encoding )))
275
+ else :
276
+ self .buf .extend (list (csv .reader (StringIO (line ),
277
+ dialect = dia )))
278
+
279
+ if self .encoding is not None :
280
+ reader = com .UnicodeReader (f , dialect = dia ,
281
+ encoding = self .encoding )
282
+ else :
283
+ reader = csv .reader (f , dialect = dia )
267
284
else :
268
285
reader = (re .split (sep , line .strip ()) for line in f )
269
286
0 commit comments