assign parameters in BaseFile class instead of in reader classes, removed leading/trailing white spaces elsewhere

davidovitch · davidovitch · commit 71e13b492961 · 2015-03-10T20:04:16.000+01:00
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -73,9 +73,14 @@ class BaseFile(object):
     """ Class for identifying the type of reader
     """
 
-    def __init__(self, try_engine=False):
+    def __init__(self, engine, extensions, io_class, open_workbook,
+                 try_engine=False):
+        self.engine = engine
+        self.extensions = extensions
+        self.io_class = io_class
+        self.open_workbook = open_workbook
         if try_engine:
-            self.has_engine()
+            self.load_engine()
 
     def is_ext(self, path):
         """Verify if the path's extension is supported by the reader
@@ -94,26 +99,29 @@ def is_type(self, io):
         else:
             return False
 
-    def has_engine(self):
-        """Verify if the engine is installed
+    def load_engine(self):
+        """Load the engine if installed
         """
         try:
-            self.load_engine()
+            self._load_engine()
             _readers[self.engine] = True
         except ImportError:
             _readers[self.engine] = False
+        except AttributeError:
+            _readers[self.engine] = False
+            msg = 'Excel engine "%s" is not implemented' % self.engine
+            raise NotImplementedError(msg)
 
 
 class XLRDFile(BaseFile):
 
-    def __init__(self, **kwargs):
-        self.engine = 'xlrd'
-        self.extensions = ['xls', 'xlsx', 'xlsm']
-        self.io_class = type(None)
-        self.open_workbook = None
-        super(XLRDFile, self).__init__(**kwargs)
+    def __init__(self, try_engine=False):
+        # engine, extensions, are defined here, but io_class and open_workbook
+        # are only defined when importing the engine
+        args = ('xlrd', ['xls', 'xlsx', 'xlsm'], type(None), None)
+        super(XLRDFile, self).__init__(*args, try_engine=try_engine)
 
-    def load_engine(self):
+    def _load_engine(self):
         import xlrd  # throw an ImportError if we need to
         ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
         if ver < (0, 9):  # pragma: no cover
@@ -126,14 +134,13 @@ def load_engine(self):
 
 class EZODFFile(BaseFile):
 
-    def __init__(self, **kwargs):
-        self.engine = 'ezodf'
-        self.extensions = ['ods']
-        self.io_class = type(None)
-        self.open_workbook = None
-        super(EZODFFile, self).__init__(**kwargs)
+    def __init__(self, try_engine=False):
+        # engine, extensions, are defined here, but io_class and open_workbook
+        # are only defined when importing the engine
+        args = ('ezodf', ['ods'], type(None), None)
+        super(EZODFFile, self).__init__(*args, try_engine=try_engine)
 
-    def load_engine(self):
+    def _load_engine(self):
         import ezodf
         self.open_workbook = ezodf.opendoc
         self.io_class = ezodf.document.PackagedDocument
@@ -150,17 +157,17 @@ def read_excel(io, sheetname=0, **kwds):
         and file. For file URLs, a host is expected. For instance, a local
         file could be file://localhost/path/to/workbook.xlsx
     sheetname : string, int, mixed list of strings/ints, or None, default 0
-        
-        Strings are used for sheet names, Integers are used in zero-indexed sheet 
-        positions. 
-        
+
+        Strings are used for sheet names, Integers are used in zero-indexed sheet
+        positions.
+
         Lists of strings/integers are used to request multiple sheets.
-        
+
         Specify None to get all sheets.
-        
+
         str|int -> DataFrame is returned.
         list|None -> Dict of DataFrames is returned, with keys representing sheets.
-               
+
         Available Cases
 
         * Defaults to 0 -> 1st sheet as a DataFrame
@@ -293,19 +300,19 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         Parameters
         ----------
         sheetname : string, int, mixed list of strings/ints, or None, default 0
-            
-            Strings are used for sheet names, Integers are used in zero-indexed sheet 
-            positions. 
-            
+
+            Strings are used for sheet names, Integers are used in zero-indexed sheet
+            positions.
+
             Lists of strings/integers are used to request multiple sheets.
-            
+
             Specify None to get all sheets.
-            
+
             str|int -> DataFrame is returned.
             list|None -> Dict of DataFrames is returned, with keys representing sheets.
-                   
+
             Available Cases
-    
+
             * Defaults to 0 -> 1st sheet as a DataFrame
             * 1 -> 2nd sheet as a DataFrame
             * "Sheet1" -> 1st sheet as a DataFrame
@@ -429,7 +436,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         def _parse_cell(cell_contents,cell_typ):
             """converts the contents of the cell into a pandas
                appropriate object"""
-               
+
             if cell_typ == XL_CELL_DATE:
                 if xlrd_0_9_3:
                     # Use the newer xlrd datetime handling.
@@ -472,9 +479,9 @@ def _parse_cell(cell_contents,cell_typ):
             xlrd_0_9_3 = True
         else:
             xlrd_0_9_3 = False
-        
+
         ret_dict = False
-        
+
         #Keep sheetname to maintain backwards compatibility.
         if isinstance(sheetname, list):
             sheets = sheetname
@@ -484,38 +491,38 @@ def _parse_cell(cell_contents,cell_typ):
             ret_dict = True
         else:
             sheets = [sheetname]
-        
+
         #handle same-type duplicates.
         sheets = list(set(sheets))
-        
+
         output = {}
-        
+
         for asheetname in sheets:
             if verbose:
                 print("Reading sheet %s" % asheetname)
-            
+
             if isinstance(asheetname, compat.string_types):
                 sheet = self.book.sheet_by_name(asheetname)
-            else:  # assume an integer if not a string    
-                sheet = self.book.sheet_by_index(asheetname)   
-            
+            else:  # assume an integer if not a string
+                sheet = self.book.sheet_by_index(asheetname)
+
             data = []
             should_parse = {}
-            
+
             for i in range(sheet.nrows):
                 row = []
                 for j, (value, typ) in enumerate(zip(sheet.row_values(i),
                                                      sheet.row_types(i))):
                     if parse_cols is not None and j not in should_parse:
                         should_parse[j] = self._should_parse(j, parse_cols)
-    
+
                     if parse_cols is None or should_parse[j]:
                         row.append(_parse_cell(value,typ))
                 data.append(row)
-    
+
             if header is not None:
                 data[header] = _trim_excel_header(data[header])
-    
+
             parser = TextParser(data, header=header, index_col=index_col,
                                 has_index_names=has_index_names,
                                 na_values=na_values,
@@ -526,14 +533,14 @@ def _parse_cell(cell_contents,cell_typ):
                                 skip_footer=skip_footer,
                                 chunksize=chunksize,
                                 **kwds)
-            
+
             output[asheetname] = parser.read()
-            
+
         if ret_dict:
             return output
         else:
             return output[asheetname]
-        
+
 
     def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
                    index_col=None, has_index_names=None, parse_cols=None,