@@ -73,9 +73,14 @@ class BaseFile(object):
73
73
""" Class for identifying the type of reader
74
74
"""
75
75
76
- def __init__ (self , try_engine = False ):
76
+ def __init__ (self , engine , extensions , io_class , open_workbook ,
77
+ try_engine = False ):
78
+ self .engine = engine
79
+ self .extensions = extensions
80
+ self .io_class = io_class
81
+ self .open_workbook = open_workbook
77
82
if try_engine :
78
- self .has_engine ()
83
+ self .load_engine ()
79
84
80
85
def is_ext (self , path ):
81
86
"""Verify if the path's extension is supported by the reader
@@ -94,26 +99,29 @@ def is_type(self, io):
94
99
else :
95
100
return False
96
101
97
- def has_engine (self ):
98
- """Verify if the engine is installed
102
+ def load_engine (self ):
103
+ """Load the engine if installed
99
104
"""
100
105
try :
101
- self .load_engine ()
106
+ self ._load_engine ()
102
107
_readers [self .engine ] = True
103
108
except ImportError :
104
109
_readers [self .engine ] = False
110
+ except AttributeError :
111
+ _readers [self .engine ] = False
112
+ msg = 'Excel engine "%s" is not implemented' % self .engine
113
+ raise NotImplementedError (msg )
105
114
106
115
107
116
class XLRDFile (BaseFile ):
108
117
109
- def __init__ (self , ** kwargs ):
110
- self .engine = 'xlrd'
111
- self .extensions = ['xls' , 'xlsx' , 'xlsm' ]
112
- self .io_class = type (None )
113
- self .open_workbook = None
114
- super (XLRDFile , self ).__init__ (** kwargs )
118
+ def __init__ (self , try_engine = False ):
119
+ # engine, extensions, are defined here, but io_class and open_workbook
120
+ # are only defined when importing the engine
121
+ args = ('xlrd' , ['xls' , 'xlsx' , 'xlsm' ], type (None ), None )
122
+ super (XLRDFile , self ).__init__ (* args , try_engine = try_engine )
115
123
116
- def load_engine (self ):
124
+ def _load_engine (self ):
117
125
import xlrd # throw an ImportError if we need to
118
126
ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
119
127
if ver < (0 , 9 ): # pragma: no cover
@@ -126,14 +134,13 @@ def load_engine(self):
126
134
127
135
class EZODFFile (BaseFile ):
128
136
129
- def __init__ (self , ** kwargs ):
130
- self .engine = 'ezodf'
131
- self .extensions = ['ods' ]
132
- self .io_class = type (None )
133
- self .open_workbook = None
134
- super (EZODFFile , self ).__init__ (** kwargs )
137
+ def __init__ (self , try_engine = False ):
138
+ # engine, extensions, are defined here, but io_class and open_workbook
139
+ # are only defined when importing the engine
140
+ args = ('ezodf' , ['ods' ], type (None ), None )
141
+ super (EZODFFile , self ).__init__ (* args , try_engine = try_engine )
135
142
136
- def load_engine (self ):
143
+ def _load_engine (self ):
137
144
import ezodf
138
145
self .open_workbook = ezodf .opendoc
139
146
self .io_class = ezodf .document .PackagedDocument
@@ -150,17 +157,17 @@ def read_excel(io, sheetname=0, **kwds):
150
157
and file. For file URLs, a host is expected. For instance, a local
151
158
file could be file://localhost/path/to/workbook.xlsx
152
159
sheetname : string, int, mixed list of strings/ints, or None, default 0
153
-
154
- Strings are used for sheet names, Integers are used in zero-indexed sheet
155
- positions.
156
-
160
+
161
+ Strings are used for sheet names, Integers are used in zero-indexed sheet
162
+ positions.
163
+
157
164
Lists of strings/integers are used to request multiple sheets.
158
-
165
+
159
166
Specify None to get all sheets.
160
-
167
+
161
168
str|int -> DataFrame is returned.
162
169
list|None -> Dict of DataFrames is returned, with keys representing sheets.
163
-
170
+
164
171
Available Cases
165
172
166
173
* Defaults to 0 -> 1st sheet as a DataFrame
@@ -293,19 +300,19 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
293
300
Parameters
294
301
----------
295
302
sheetname : string, int, mixed list of strings/ints, or None, default 0
296
-
297
- Strings are used for sheet names, Integers are used in zero-indexed sheet
298
- positions.
299
-
303
+
304
+ Strings are used for sheet names, Integers are used in zero-indexed sheet
305
+ positions.
306
+
300
307
Lists of strings/integers are used to request multiple sheets.
301
-
308
+
302
309
Specify None to get all sheets.
303
-
310
+
304
311
str|int -> DataFrame is returned.
305
312
list|None -> Dict of DataFrames is returned, with keys representing sheets.
306
-
313
+
307
314
Available Cases
308
-
315
+
309
316
* Defaults to 0 -> 1st sheet as a DataFrame
310
317
* 1 -> 2nd sheet as a DataFrame
311
318
* "Sheet1" -> 1st sheet as a DataFrame
@@ -429,7 +436,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
429
436
def _parse_cell (cell_contents ,cell_typ ):
430
437
"""converts the contents of the cell into a pandas
431
438
appropriate object"""
432
-
439
+
433
440
if cell_typ == XL_CELL_DATE :
434
441
if xlrd_0_9_3 :
435
442
# Use the newer xlrd datetime handling.
@@ -472,9 +479,9 @@ def _parse_cell(cell_contents,cell_typ):
472
479
xlrd_0_9_3 = True
473
480
else :
474
481
xlrd_0_9_3 = False
475
-
482
+
476
483
ret_dict = False
477
-
484
+
478
485
#Keep sheetname to maintain backwards compatibility.
479
486
if isinstance (sheetname , list ):
480
487
sheets = sheetname
@@ -484,38 +491,38 @@ def _parse_cell(cell_contents,cell_typ):
484
491
ret_dict = True
485
492
else :
486
493
sheets = [sheetname ]
487
-
494
+
488
495
#handle same-type duplicates.
489
496
sheets = list (set (sheets ))
490
-
497
+
491
498
output = {}
492
-
499
+
493
500
for asheetname in sheets :
494
501
if verbose :
495
502
print ("Reading sheet %s" % asheetname )
496
-
503
+
497
504
if isinstance (asheetname , compat .string_types ):
498
505
sheet = self .book .sheet_by_name (asheetname )
499
- else : # assume an integer if not a string
500
- sheet = self .book .sheet_by_index (asheetname )
501
-
506
+ else : # assume an integer if not a string
507
+ sheet = self .book .sheet_by_index (asheetname )
508
+
502
509
data = []
503
510
should_parse = {}
504
-
511
+
505
512
for i in range (sheet .nrows ):
506
513
row = []
507
514
for j , (value , typ ) in enumerate (zip (sheet .row_values (i ),
508
515
sheet .row_types (i ))):
509
516
if parse_cols is not None and j not in should_parse :
510
517
should_parse [j ] = self ._should_parse (j , parse_cols )
511
-
518
+
512
519
if parse_cols is None or should_parse [j ]:
513
520
row .append (_parse_cell (value ,typ ))
514
521
data .append (row )
515
-
522
+
516
523
if header is not None :
517
524
data [header ] = _trim_excel_header (data [header ])
518
-
525
+
519
526
parser = TextParser (data , header = header , index_col = index_col ,
520
527
has_index_names = has_index_names ,
521
528
na_values = na_values ,
@@ -526,14 +533,14 @@ def _parse_cell(cell_contents,cell_typ):
526
533
skip_footer = skip_footer ,
527
534
chunksize = chunksize ,
528
535
** kwds )
529
-
536
+
530
537
output [asheetname ] = parser .read ()
531
-
538
+
532
539
if ret_dict :
533
540
return output
534
541
else :
535
542
return output [asheetname ]
536
-
543
+
537
544
538
545
def _parse_ods (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
539
546
index_col = None , has_index_names = None , parse_cols = None ,
0 commit comments