Skip to content

Commit 71e13b4

Browse files
author
davidovitch
committed
assign parameters in BaseFile class instead of in reader classes, removed leading/trailing white spaces elsewhere
2 parents 9256281 + 8a37f14 commit 71e13b4

File tree

1 file changed

+58
-51
lines changed

1 file changed

+58
-51
lines changed

pandas/io/excel.py

Lines changed: 58 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,14 @@ class BaseFile(object):
7373
""" Class for identifying the type of reader
7474
"""
7575

76-
def __init__(self, try_engine=False):
76+
def __init__(self, engine, extensions, io_class, open_workbook,
77+
try_engine=False):
78+
self.engine = engine
79+
self.extensions = extensions
80+
self.io_class = io_class
81+
self.open_workbook = open_workbook
7782
if try_engine:
78-
self.has_engine()
83+
self.load_engine()
7984

8085
def is_ext(self, path):
8186
"""Verify if the path's extension is supported by the reader
@@ -94,26 +99,29 @@ def is_type(self, io):
9499
else:
95100
return False
96101

97-
def has_engine(self):
98-
"""Verify if the engine is installed
102+
def load_engine(self):
103+
"""Load the engine if installed
99104
"""
100105
try:
101-
self.load_engine()
106+
self._load_engine()
102107
_readers[self.engine] = True
103108
except ImportError:
104109
_readers[self.engine] = False
110+
except AttributeError:
111+
_readers[self.engine] = False
112+
msg = 'Excel engine "%s" is not implemented' % self.engine
113+
raise NotImplementedError(msg)
105114

106115

107116
class XLRDFile(BaseFile):
108117

109-
def __init__(self, **kwargs):
110-
self.engine = 'xlrd'
111-
self.extensions = ['xls', 'xlsx', 'xlsm']
112-
self.io_class = type(None)
113-
self.open_workbook = None
114-
super(XLRDFile, self).__init__(**kwargs)
118+
def __init__(self, try_engine=False):
119+
# engine, extensions, are defined here, but io_class and open_workbook
120+
# are only defined when importing the engine
121+
args = ('xlrd', ['xls', 'xlsx', 'xlsm'], type(None), None)
122+
super(XLRDFile, self).__init__(*args, try_engine=try_engine)
115123

116-
def load_engine(self):
124+
def _load_engine(self):
117125
import xlrd # throw an ImportError if we need to
118126
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
119127
if ver < (0, 9): # pragma: no cover
@@ -126,14 +134,13 @@ def load_engine(self):
126134

127135
class EZODFFile(BaseFile):
128136

129-
def __init__(self, **kwargs):
130-
self.engine = 'ezodf'
131-
self.extensions = ['ods']
132-
self.io_class = type(None)
133-
self.open_workbook = None
134-
super(EZODFFile, self).__init__(**kwargs)
137+
def __init__(self, try_engine=False):
138+
# engine, extensions, are defined here, but io_class and open_workbook
139+
# are only defined when importing the engine
140+
args = ('ezodf', ['ods'], type(None), None)
141+
super(EZODFFile, self).__init__(*args, try_engine=try_engine)
135142

136-
def load_engine(self):
143+
def _load_engine(self):
137144
import ezodf
138145
self.open_workbook = ezodf.opendoc
139146
self.io_class = ezodf.document.PackagedDocument
@@ -150,17 +157,17 @@ def read_excel(io, sheetname=0, **kwds):
150157
and file. For file URLs, a host is expected. For instance, a local
151158
file could be file://localhost/path/to/workbook.xlsx
152159
sheetname : string, int, mixed list of strings/ints, or None, default 0
153-
154-
Strings are used for sheet names, Integers are used in zero-indexed sheet
155-
positions.
156-
160+
161+
Strings are used for sheet names, Integers are used in zero-indexed sheet
162+
positions.
163+
157164
Lists of strings/integers are used to request multiple sheets.
158-
165+
159166
Specify None to get all sheets.
160-
167+
161168
str|int -> DataFrame is returned.
162169
list|None -> Dict of DataFrames is returned, with keys representing sheets.
163-
170+
164171
Available Cases
165172
166173
* Defaults to 0 -> 1st sheet as a DataFrame
@@ -293,19 +300,19 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
293300
Parameters
294301
----------
295302
sheetname : string, int, mixed list of strings/ints, or None, default 0
296-
297-
Strings are used for sheet names, Integers are used in zero-indexed sheet
298-
positions.
299-
303+
304+
Strings are used for sheet names, Integers are used in zero-indexed sheet
305+
positions.
306+
300307
Lists of strings/integers are used to request multiple sheets.
301-
308+
302309
Specify None to get all sheets.
303-
310+
304311
str|int -> DataFrame is returned.
305312
list|None -> Dict of DataFrames is returned, with keys representing sheets.
306-
313+
307314
Available Cases
308-
315+
309316
* Defaults to 0 -> 1st sheet as a DataFrame
310317
* 1 -> 2nd sheet as a DataFrame
311318
* "Sheet1" -> 1st sheet as a DataFrame
@@ -429,7 +436,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
429436
def _parse_cell(cell_contents,cell_typ):
430437
"""converts the contents of the cell into a pandas
431438
appropriate object"""
432-
439+
433440
if cell_typ == XL_CELL_DATE:
434441
if xlrd_0_9_3:
435442
# Use the newer xlrd datetime handling.
@@ -472,9 +479,9 @@ def _parse_cell(cell_contents,cell_typ):
472479
xlrd_0_9_3 = True
473480
else:
474481
xlrd_0_9_3 = False
475-
482+
476483
ret_dict = False
477-
484+
478485
#Keep sheetname to maintain backwards compatibility.
479486
if isinstance(sheetname, list):
480487
sheets = sheetname
@@ -484,38 +491,38 @@ def _parse_cell(cell_contents,cell_typ):
484491
ret_dict = True
485492
else:
486493
sheets = [sheetname]
487-
494+
488495
#handle same-type duplicates.
489496
sheets = list(set(sheets))
490-
497+
491498
output = {}
492-
499+
493500
for asheetname in sheets:
494501
if verbose:
495502
print("Reading sheet %s" % asheetname)
496-
503+
497504
if isinstance(asheetname, compat.string_types):
498505
sheet = self.book.sheet_by_name(asheetname)
499-
else: # assume an integer if not a string
500-
sheet = self.book.sheet_by_index(asheetname)
501-
506+
else: # assume an integer if not a string
507+
sheet = self.book.sheet_by_index(asheetname)
508+
502509
data = []
503510
should_parse = {}
504-
511+
505512
for i in range(sheet.nrows):
506513
row = []
507514
for j, (value, typ) in enumerate(zip(sheet.row_values(i),
508515
sheet.row_types(i))):
509516
if parse_cols is not None and j not in should_parse:
510517
should_parse[j] = self._should_parse(j, parse_cols)
511-
518+
512519
if parse_cols is None or should_parse[j]:
513520
row.append(_parse_cell(value,typ))
514521
data.append(row)
515-
522+
516523
if header is not None:
517524
data[header] = _trim_excel_header(data[header])
518-
525+
519526
parser = TextParser(data, header=header, index_col=index_col,
520527
has_index_names=has_index_names,
521528
na_values=na_values,
@@ -526,14 +533,14 @@ def _parse_cell(cell_contents,cell_typ):
526533
skip_footer=skip_footer,
527534
chunksize=chunksize,
528535
**kwds)
529-
536+
530537
output[asheetname] = parser.read()
531-
538+
532539
if ret_dict:
533540
return output
534541
else:
535542
return output[asheetname]
536-
543+
537544

538545
def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
539546
index_col=None, has_index_names=None, parse_cols=None,

0 commit comments

Comments
 (0)