read multiple sheets for ods, small PEP8 changes

davidovitch · davidovitch · commit 109757cde49e · 2015-03-10T21:22:57.000+01:00
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -433,7 +433,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
 
         epoch1904 = self.book.datemode
 
-        def _parse_cell(cell_contents,cell_typ):
+        def _parse_cell(cell_contents, cell_typ):
             """converts the contents of the cell into a pandas
                appropriate object"""
 
@@ -482,7 +482,7 @@ def _parse_cell(cell_contents,cell_typ):
 
         ret_dict = False
 
-        #Keep sheetname to maintain backwards compatibility.
+        # Keep sheetname to maintain backwards compatibility.
         if isinstance(sheetname, list):
             sheets = sheetname
             ret_dict = True
@@ -492,7 +492,7 @@ def _parse_cell(cell_contents,cell_typ):
         else:
             sheets = [sheetname]
 
-        #handle same-type duplicates.
+        # handle same-type duplicates.
         sheets = list(set(sheets))
 
         output = {}
@@ -517,7 +517,7 @@ def _parse_cell(cell_contents,cell_typ):
                         should_parse[j] = self._should_parse(j, parse_cols)
 
                     if parse_cols is None or should_parse[j]:
-                        row.append(_parse_cell(value,typ))
+                        row.append(_parse_cell(value, typ))
                 data.append(row)
 
             if header is not None:
@@ -541,68 +541,95 @@ def _parse_cell(cell_contents,cell_typ):
         else:
             return output[asheetname]
 
-
     def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
                    index_col=None, has_index_names=None, parse_cols=None,
                    parse_dates=False, date_parser=None, na_values=None,
                    thousands=None, chunksize=None, convert_float=True,
-                   **kwds):
-
-        # sheetname can be index or string
-        sheet = self.book.sheets[sheetname]
-
-        data = []
-        should_parse = {}
-        for i in range(sheet.nrows()):
-            row = []
-            for j, cell in enumerate(sheet.row(i)):
-
-                if parse_cols is not None and j not in should_parse:
-                    should_parse[j] = self._should_parse(j, parse_cols)
-
-                if parse_cols is None or should_parse[j]:
-
-                    if isinstance(cell.value, float):
-                        value = cell.value
-                        if convert_float:
-                            # GH5394 - Excel and ODS 'numbers' are always floats
-                            # it's a minimal perf hit and less suprising
-                            # FIXME: this goes wrong when int(cell.value) returns
-                            # a long (>1e18)
-                            val = int(cell.value)
-                            if val == cell.value:
-                                value = val
-                    elif isinstance(cell.value, compat.string_types):
-                        typ = cell.value_type
-#                        if typ == 'string':
-#                            value = cell.value
-                        if typ == 'date' or typ == 'time':
-                            value = self._parse_datetime(cell)
-                        else:
-                            value = cell.value
-                    elif isinstance(cell.value, bool):
-                        value = cell.value
-#                    elif isinstance(cell.value, type(None)):
-#                        value = np.nan
-                    else:
-                        value = np.nan
+                   verbose=False, **kwds):
+
+        def _parse_cell(cell):
+            """converts the contents of the cell into a pandas
+               appropriate object"""
+            if isinstance(cell.value, float):
+                value = cell.value
+                if convert_float:
+                    # GH5394 - Excel and ODS 'numbers' are always floats
+                    # it's a minimal perf hit and less suprising
+                    # FIXME: this goes wrong when int(cell.value) returns
+                    # a long (>1e18)
+                    val = int(cell.value)
+                    if val == cell.value:
+                        value = val
+            elif isinstance(cell.value, compat.string_types):
+                typ = cell.value_type
+#                if typ == 'string':
+#                    value = cell.value
+                if typ == 'date' or typ == 'time':
+                    value = self._parse_datetime(cell)
+                else:
+                    value = cell.value
+            elif isinstance(cell.value, bool):
+                value = cell.value
+#            elif isinstance(cell.value, type(None)):
+#                value = np.nan
+            else:
+                value = np.nan
+            return value
+
+        ret_dict = False
+
+        # Keep sheetname to maintain backwards compatibility.
+        if isinstance(sheetname, list):
+            sheets = sheetname
+            ret_dict = True
+        elif sheetname is None:
+            sheets = self.sheet_names
+            ret_dict = True
+        else:
+            sheets = [sheetname]
+
+        # handle same-type duplicates.
+        sheets = list(set(sheets))
+
+        output = {}
+
+        for asheetname in sheets:
+            if verbose:
+                print("Reading sheet %s" % asheetname)
 
-                    row.append(value)
+            # sheetname can be index or string
+            sheet = self.book.sheets[asheetname]
 
-            data.append(row)
+            data = []
+            should_parse = {}
+            for i in range(sheet.nrows()):
+                row = []
+                for j, cell in enumerate(sheet.row(i)):
+
+                    if parse_cols is not None and j not in should_parse:
+                        should_parse[j] = self._should_parse(j, parse_cols)
 
-        parser = TextParser(data, header=header, index_col=index_col,
-                            has_index_names=has_index_names,
-                            na_values=na_values,
-                            thousands=thousands,
-                            parse_dates=parse_dates,
-                            date_parser=date_parser,
-                            skiprows=skiprows,
-                            skip_footer=skip_footer,
-                            chunksize=chunksize,
-                            **kwds)
+                    if parse_cols is None or should_parse[j]:
+                        row.append(_parse_cell(cell))
+
+                data.append(row)
+
+            parser = TextParser(data, header=header, index_col=index_col,
+                                has_index_names=has_index_names,
+                                na_values=na_values,
+                                thousands=thousands,
+                                parse_dates=parse_dates,
+                                date_parser=date_parser,
+                                skiprows=skiprows,
+                                skip_footer=skip_footer,
+                                chunksize=chunksize,
+                                **kwds)
+            output[asheetname] = parser.read()
 
-        return parser.read()
+        if ret_dict:
+            return output
+        else:
+            return output[asheetname]
 
     def _parse_datetime(self, cell):
         """Parse the date or time from on ods cell to a datetime object.
@@ -616,7 +643,7 @@ def _parse_datetime(self, cell):
         def _value2date(value):
             try:
                 return datetime.datetime.strptime(value, '%Y-%m-%d')
-            except ValueError:#, TypeError):
+            except ValueError:  # , TypeError):
                 return datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S')
 
         # Technically it is not necessary to try to derive the date/time
@@ -650,7 +677,7 @@ def _value2date(value):
             value = _value2date(cell.value)
         elif cell.value_type == 'time':
             try:
-                # FIXME: what if the decimal separator is a comma in the locale?
+                # FIXME: what if the decimal separator is a comma in locale?
                 value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
             except ValueError:
                 value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')
@@ -664,9 +691,9 @@ def _print_ods_cellinfo(self, cell):
         Cell attributes are documented here:
         https://pythonhosted.org/ezodf/tableobjects.html#id2
         """
-        print('   plaintext:', cell.plaintext()) # no formatting
+        print('   plaintext:', cell.plaintext())  # no formatting
         # formatted, but what is difference with value?
-        print('display_form:', cell.display_form) # format, ?=plaintext
+        print('display_form:', cell.display_form)  # format, ?=plaintext
         print('       value:', cell.value)       # data handled
         print('  value_type:', cell.value_type)  # data type
         print('     formula:', cell.formula)