diff --git a/ci/print_versions.py b/ci/print_versions.py index 0df8bb7e28786..a805a43ac256e 100755 --- a/ci/print_versions.py +++ b/ci/print_versions.py @@ -92,6 +92,12 @@ except: print("openpyxl: Not installed") +try: + import xlsxwriter + print("xlsxwriter: %s" % xlsxwriter.__version__) +except: + print("xlwt: Not installed") + try: import xlrd print("xlrd: %s" % xlrd.__VERSION__) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 5038b9e2b6552..b92ccd2754661 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -4,3 +4,4 @@ python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 +xlsxwriter==0.4.2 diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 6a94d48ad7a5f..d35931257d4d3 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -8,6 +8,7 @@ numexpr==2.1 tables==2.3.1 matplotlib==1.1.1 openpyxl==1.6.2 +xlsxwriter==0.4.2 xlrd==0.9.2 patsy==0.1.0 html5lib==1.0b2 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index 70c398816f23c..e09726b6d93d7 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -2,6 +2,7 @@ python-dateutil pytz==2013b xlwt==0.7.5 openpyxl==1.6.2 +xlsxwriter==0.4.2 xlrd==0.9.2 numpy==1.6.1 cython==0.19.1 diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt index e907a2fa828f1..88ba3f4edf723 100644 --- a/ci/requirements-3.2.txt +++ b/ci/requirements-3.2.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.2 xlrd==0.9.2 numpy==1.6.2 cython==0.19.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index eb1e725d98040..5ff99cdb1f627 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.2 xlrd==0.9.2 html5lib==1.0b2 numpy==1.7.1 diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 96f9fd912b664..325573a44409f 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -695,13 +695,13 @@ Writing to an excel file .. ipython:: python - df.to_excel('foo.xlsx', sheet_name='sheet1') + df.to_excel('foo.xlsx', sheet_name='Sheet1') Reading from an excel file .. ipython:: python - pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA']) + pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']) .. ipython:: python :suppress: diff --git a/doc/source/io.rst b/doc/source/io.rst index 67cbe35144461..7de37bc4c8a7d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1667,7 +1667,7 @@ written. For example: .. code-block:: python - df.to_excel('path_to_file.xlsx', sheet_name='sheet1') + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``openpyxl``. @@ -1680,8 +1680,8 @@ one can use the ExcelWriter class, as in the following example: .. code-block:: python writer = ExcelWriter('path_to_file.xlsx') - df1.to_excel(writer, sheet_name='sheet1') - df2.to_excel(writer, sheet_name='sheet2') + df1.to_excel(writer, sheet_name='Sheet1') + df2.to_excel(writer, sheet_name='Sheet2') writer.save() .. _io.hdf5: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e9f28122b43d..cf70bce12c403 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1356,9 +1356,10 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, tupleize_cols=tupleize_cols) formatter.save() - def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', + def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, - index_label=None, startrow=0, startcol=0): + index_label=None, startrow=0, startcol=0, + engine=None): """ Write DataFrame to a excel sheet @@ -1366,7 +1367,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter - sheet_name : string, default 'sheet1' + sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame na_rep : string, default '' Missing data representation @@ -1385,6 +1386,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', sequence should be given if the DataFrame uses MultiIndex. startow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame + engine : Excel writer class Notes @@ -1393,14 +1395,14 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', to the existing workbook. This can be used to save different DataFrames to one workbook >>> writer = ExcelWriter('output.xlsx') - >>> df1.to_excel(writer,'sheet1') - >>> df2.to_excel(writer,'sheet2') + >>> df1.to_excel(writer,'Sheet1') + >>> df2.to_excel(writer,'Sheet2') >>> writer.save() """ from pandas.io.excel import ExcelWriter need_save = False if isinstance(excel_writer, compat.string_types): - excel_writer = ExcelWriter(excel_writer) + excel_writer = ExcelWriter(excel_writer, engine) need_save = True formatter = fmt.ExcelFormatter(self, diff --git a/pandas/core/panel.py b/pandas/core/panel.py index bca6f985ac689..3b1dfdf9c08d8 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -458,7 +458,7 @@ def to_sparse(self, fill_value=None, kind='block'): default_kind=kind, default_fill_value=fill_value) - def to_excel(self, path, na_rep=''): + def to_excel(self, path, na_rep='', engine=None): """ Write each DataFrame in Panel to a separate excel sheet @@ -468,9 +468,10 @@ def to_excel(self, path, na_rep=''): File path or existing ExcelWriter na_rep : string, default '' Missing data representation + engine : string, Excel writer class """ from pandas.io.excel import ExcelWriter - writer = ExcelWriter(path) + writer = ExcelWriter(path, engine=engine) for item, df in compat.iteritems(self): name = str(item) df.to_excel(writer, name, na_rep=na_rep) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 5ff42c5cd12a6..ad7c0b081b9b5 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -12,9 +12,20 @@ from pandas.tseries.period import Period from pandas import json from pandas.compat import map, zip, reduce, range, lrange +from pandas.core import config import pandas.compat as compat from warnings import warn +# Set up the io.excel specific configuration. +writer_engine_doc = """ +: string + The default Excel engine. The options are 'openpyxl' (the default), 'xlwt' + and 'xlsxwriter'. +""" +with config.config_prefix('io.excel'): + config.register_option('writer_engine', None, writer_engine_doc, + validator=str) + def read_excel(path_or_buf, sheetname, **kwds): """Read an Excel table into a pandas DataFrame @@ -256,7 +267,7 @@ def to_xls(style_dict, num_format_str=None): import xlwt def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): - """helper wich recursively generate an xlwt easy style string + """helper which recursively generate an xlwt easy style string for example: hstyle = {"font": {"bold": True}, @@ -318,6 +329,37 @@ def to_xlsx(style_dict): return xls_style + @staticmethod + def to_xlsxwriter(workbook, style_dict, num_format_str=None): + """ + Converts a style_dict to an XlxsWriter format object. + Parameters + ---------- + workbook: Reference to the ExcelWriter XlxsWriter workbook. + style_dict: Style dictionary to convert. + num_format: Optional number format for the cell format. + """ + if style_dict is None: + return None + + # Create a XlsxWriter format object. + xl_format = workbook.add_format() + + # Map the cell font to XlsxWriter font properties. + if style_dict.get('font'): + font = style_dict['font'] + if font.get('bold'): + xl_format.set_bold() + + # Map the cell borders to XlsxWriter border properties. + if style_dict.get('borders'): + xl_format.set_border() + + if num_format_str is not None: + xl_format.set_num_format(num_format_str) + + return xl_format + def _conv_value(val): # convert value for excel dump @@ -341,22 +383,24 @@ class ExcelWriter(object): path : string Path to xls file """ - def __init__(self, path): - self.use_xlsx = True - if path.endswith('.xls'): - self.use_xlsx = False - import xlwt - self.book = xlwt.Workbook() - self.fm_datetime = xlwt.easyxf( - num_format_str='YYYY-MM-DD HH:MM:SS') - self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') - else: - from openpyxl.workbook import Workbook - self.book = Workbook() # optimized_write=True) - # open pyxl 1.6.1 adds a dummy sheet remove it - if self.book.worksheets: - self.book.remove_sheet(self.book.worksheets[0]) - self.path = path + def __init__(self, path, engine=None, **engine_kwargs): + + if engine is None: + default = config.get_option('io.excel.writer_engine') + if default is not None: + engine = default + elif path.endswith('.xls'): + engine = 'xlwt' + else: + engine = 'openpyxl' + + try: + writer_init = getattr(self, "_init_%s" % engine) + except AttributeError: + raise ValueError("No engine: %s" % engine) + + writer_init(path, **engine_kwargs) + self.sheets = {} self.cur_sheet = None @@ -364,7 +408,10 @@ def save(self): """ Save workbook to disk """ - self.book.save(self.path) + if self.engine == 'xlsxwriter': + self.book.close() + else: + self.book.save(self.path) def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): """ @@ -381,16 +428,20 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): """ if sheet_name is None: sheet_name = self.cur_sheet + if sheet_name is None: # pragma: no cover raise ValueError('Must pass explicit sheet_name or set ' - 'cur_sheet property') - if self.use_xlsx: - self._writecells_xlsx(cells, sheet_name, startrow, startcol) - else: - self._writecells_xls(cells, sheet_name, startrow, startcol) + 'cur_sheet property') - def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): + try: + _writecells = getattr(self, "_writecells_%s" % self.engine) + except AttributeError: + raise ValueError("No _writecells_%s() method" % self.engine) + _writecells(cells, sheet_name, startrow, startcol) + + def _writecells_openpyxl(self, cells, sheet_name, startrow, startcol): + # Write the frame cells using openpyxl. from openpyxl.cell import get_column_letter if sheet_name in self.sheets: @@ -426,7 +477,8 @@ def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): cletterend, startrow + cell.mergestart + 1)) - def _writecells_xls(self, cells, sheet_name, startrow, startcol): + def _writecells_xlwt(self, cells, sheet_name, startrow, startcol): + # Write the frame cells using xlwt. if sheet_name in self.sheets: wks = self.sheets[sheet_name] else: @@ -464,3 +516,78 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): wks.write(startrow + cell.row, startcol + cell.col, val, style) + + def _writecells_xlsxwriter(self, cells, sheet_name, startrow, startcol): + # Write the frame cells using xlsxwriter. + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_worksheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {} + + for cell in cells: + val = _conv_value(cell.val) + + num_format_str = None + if isinstance(cell.val, datetime.datetime): + num_format_str = "YYYY-MM-DD HH:MM:SS" + if isinstance(cell.val, datetime.date): + num_format_str = "YYYY-MM-DD" + + stylekey = json.dumps(cell.style) + if num_format_str: + stylekey += num_format_str + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = CellStyleConverter.to_xlsxwriter(self.book, + cell.style, + num_format_str) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) + + def _init_xlwt(self, filename, **engine_kwargs): + # Use the xlwt module as the Excel writer. + import xlwt + + self.engine = 'xlwt' + self.path = filename + self.book = xlwt.Workbook() + self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') + + def _init_openpyxl(self, filename, **engine_kwargs): + # Use the openpyxl module as the Excel writer. + from openpyxl.workbook import Workbook + + self.engine = 'openpyxl' + self.path = filename + # Create workbook object with default optimized_write=True. + self.book = Workbook() + # Openpyxl 1.6.1 adds a dummy sheet. We remove it. + if self.book.worksheets: + self.book.remove_sheet(self.book.worksheets[0]) + + def _init_xlsxwriter(self, filename, **engine_kwargs): + # Use the xlsxwriter module as the Excel writer. + import xlsxwriter + + options = dict(engine_kwargs) + + options.setdefault('default_date_format', 'YYYY-MM-DD HH:MM:SS') + + self.engine = 'xlsxwriter' + self.book = xlsxwriter.Workbook(filename, options) diff --git a/pandas/io/tests/data/xw_frame01.xlsx b/pandas/io/tests/data/xw_frame01.xlsx new file mode 100644 index 0000000000000..afc3cbc0eee09 Binary files /dev/null and b/pandas/io/tests/data/xw_frame01.xlsx differ diff --git a/pandas/io/tests/data/xw_frame02.xlsx b/pandas/io/tests/data/xw_frame02.xlsx new file mode 100644 index 0000000000000..0c4c70210b665 Binary files /dev/null and b/pandas/io/tests/data/xw_frame02.xlsx differ diff --git a/pandas/io/tests/data/xw_frame03.xlsx b/pandas/io/tests/data/xw_frame03.xlsx new file mode 100644 index 0000000000000..46c1d437ae3d5 Binary files /dev/null and b/pandas/io/tests/data/xw_frame03.xlsx differ diff --git a/pandas/io/tests/data/xw_frame04.xlsx b/pandas/io/tests/data/xw_frame04.xlsx new file mode 100644 index 0000000000000..c817d3f7bd803 Binary files /dev/null and b/pandas/io/tests/data/xw_frame04.xlsx differ diff --git a/pandas/io/tests/data/xw_frame05.xlsx b/pandas/io/tests/data/xw_frame05.xlsx new file mode 100644 index 0000000000000..7e18d7282df8e Binary files /dev/null and b/pandas/io/tests/data/xw_frame05.xlsx differ diff --git a/pandas/io/tests/data/xw_frame06.xlsx b/pandas/io/tests/data/xw_frame06.xlsx new file mode 100644 index 0000000000000..f16c4375ebd79 Binary files /dev/null and b/pandas/io/tests/data/xw_frame06.xlsx differ diff --git a/pandas/io/tests/data/xw_frame07.xlsx b/pandas/io/tests/data/xw_frame07.xlsx new file mode 100644 index 0000000000000..1ef363edd887d Binary files /dev/null and b/pandas/io/tests/data/xw_frame07.xlsx differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 3f41be6ae64c6..76f3a5fd3c8f8 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -35,6 +35,8 @@ from numpy.testing.decorators import slow from pandas.parser import OverflowError +from pandas.core import config + def _skip_if_no_xlrd(): try: @@ -60,6 +62,13 @@ def _skip_if_no_openpyxl(): raise nose.SkipTest('openpyxl not installed, skipping') +def _skip_if_no_xlsxwriter(): + try: + import xlsxwriter + except ImportError: + raise nose.SkipTest('xlsxwriter not installed, skipping') + + def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -239,6 +248,7 @@ def check_excel_sheet_by_name_raise(self, ext): def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() _skip_if_no_xlwt() + _skip_if_no_openpyxl for ext in ('xls', 'xlsx'): self.check_excel_sheet_by_name_raise(ext) @@ -320,6 +330,13 @@ def test_excel_roundtrip_xlsx(self): _skip_if_no_excelsuite() self._check_extension('xlsx') + def test_excel_roundtrip_xlsxwriter(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension(self, ext): path = '__tmp_to_excel_from_excel__.' + ext @@ -366,6 +383,14 @@ def test_excel_roundtrip_xlsx_mixed(self): self._check_extension_mixed('xlsx') + def test_excel_roundtrip_xlsxwriter_mixed(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_mixed('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_mixed(self, ext): path = '__tmp_to_excel_from_excel_mixed__.' + ext @@ -386,6 +411,13 @@ def test_excel_roundtrip_xlsx_tsframe(self): _skip_if_no_xlrd() self._check_extension_tsframe('xlsx') + def test_excel_roundtrip_xlsxwriter_tsframe(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_tsframe('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_tsframe(self, ext): path = '__tmp_to_excel_from_excel_tsframe__.' + ext @@ -405,6 +437,13 @@ def test_excel_roundtrip_xlsx_int64(self): _skip_if_no_excelsuite() self._check_extension_int64('xlsx') + def test_excel_roundtrip_xlsxwriter_int64(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_int64('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_int64(self, ext): path = '__tmp_to_excel_from_excel_int64__.' + ext @@ -431,6 +470,13 @@ def test_excel_roundtrip_xlsx_bool(self): _skip_if_no_excelsuite() self._check_extension_bool('xlsx') + def test_excel_roundtrip_xlsxwriter_bool(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_bool('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_bool(self, ext): path = '__tmp_to_excel_from_excel_bool__.' + ext @@ -457,6 +503,13 @@ def test_excel_roundtrip_xlsx_sheets(self): _skip_if_no_excelsuite() self._check_extension_sheets('xlsx') + def test_excel_roundtrip_xlsxwriter_sheets(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_sheets('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_sheets(self, ext): path = '__tmp_to_excel_from_excel_sheets__.' + ext @@ -490,6 +543,13 @@ def test_excel_roundtrip_xlsx_colaliases(self): _skip_if_no_excelsuite() self._check_extension_colaliases('xlsx') + def test_excel_roundtrip_xlsxwriter_colaliases(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_colaliases('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_colaliases(self, ext): path = '__tmp_to_excel_from_excel_aliases__.' + ext @@ -518,6 +578,13 @@ def test_excel_roundtrip_xlsx_indexlabels(self): _skip_if_no_excelsuite() self._check_extension_indexlabels('xlsx') + def test_excel_roundtrip_xlsxwriter_indexlabels(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_extension_indexlabels('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_extension_indexlabels(self, ext): path = '__tmp_to_excel_from_excel_indexlabels__.' + ext @@ -554,7 +621,7 @@ def _check_extension_indexlabels(self, ext): self.assertEqual(frame.index.names, recons.index.names) # test index_labels in same row as column names - path = '%s.xls' % tm.rands(10) + path = '%s.%s' % (tm.rands(10), ext) with ensure_clean(path) as path: @@ -567,7 +634,9 @@ def _check_extension_indexlabels(self, ext): reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) - tm.assert_frame_equal(df, recons) + # Test with less_precise or else xlsxwriter fails with no + # visible precision difference. + tm.assert_frame_equal(df, recons, check_less_precise=True) def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() @@ -617,6 +686,21 @@ def test_to_excel_periodindex(self): rs = reader.parse('sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M')) + def test_to_excel_periodindex_xlsxwriter(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + + path = '__tmp_to_excel_periodindex__.xlsx' + frame = self.tsframe + xp = frame.resample('M', kind='period') + + with ensure_clean(path) as path: + xp.to_excel(path, 'sht1', engine='xlsxwriter') + + reader = ExcelFile(path) + rs = reader.parse('sht1', index_col=0, parse_dates=True) + tm.assert_frame_equal(xp, rs.to_period('M')) + def test_to_excel_multiindex(self): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -628,6 +712,13 @@ def test_to_excel_multiindex_xlsx(self): _skip_if_no_openpyxl() self._check_excel_multiindex('xlsx') + def test_to_excel_multiindex_xlsxwriter(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_excel_multiindex('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_excel_multiindex(self, ext): path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext @@ -660,6 +751,13 @@ def test_to_excel_multiindex_xlsx_dates(self): _skip_if_no_xlrd() self._check_excel_multiindex_dates('xlsx') + def test_to_excel_multiindex_xlsxwriter_dates(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + config.set_option('io.excel.writer_engine', 'xlsxwriter') + self._check_excel_multiindex_dates('xlsx') + config.set_option('io.excel.writer_engine', None) + def _check_excel_multiindex_dates(self, ext): path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext @@ -703,6 +801,26 @@ def test_to_excel_float_format(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) + def test_to_excel_float_format_xlsxwriter(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + + filename = '__tmp_to_excel_float_format__.xlsx' + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with ensure_clean(filename) as filename: + df.to_excel(filename, 'test1', float_format='%.2f', + engine='xlsxwriter') + + reader = ExcelFile(filename) + rs = reader.parse('test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) + def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() @@ -730,6 +848,34 @@ def test_to_excel_unicode_filename(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) + def test_to_excel_unicode_filename_xlsxwriter(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + + filename = u('\u0192u.xlsx') + + try: + f = open(filename, 'wb') + except UnicodeEncodeError: + raise nose.SkipTest('no unicode file names on this system') + else: + f.close() + + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with ensure_clean(filename) as filename: + df.to_excel(filename, 'test1', float_format='%.2f', + engine='xlsxwriter') + + reader = ExcelFile(filename) + rs = reader.parse('test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) + def test_to_excel_styleconverter(self): from pandas.io.excel import CellStyleConverter diff --git a/pandas/io/tests/test_xlsxwriter_frame01.py b/pandas/io/tests/test_xlsxwriter_frame01.py new file mode 100644 index 0000000000000..48a124d286998 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame01.py @@ -0,0 +1,144 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame01.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=False, + index=False, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=False, + index=False) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame02.py b/pandas/io/tests/test_xlsxwriter_frame02.py new file mode 100644 index 0000000000000..130921a03b2ef --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame02.py @@ -0,0 +1,142 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame02.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=False, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=False) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame03.py b/pandas/io/tests/test_xlsxwriter_frame03.py new file mode 100644 index 0000000000000..dda70136633b8 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame03.py @@ -0,0 +1,140 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame03.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=True, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, + engine='xlsxwriter') + + df.to_excel(writer, sheet_name='Sheet1') + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=True) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1') + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame04.py b/pandas/io/tests/test_xlsxwriter_frame04.py new file mode 100644 index 0000000000000..cf6d567dcb758 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame04.py @@ -0,0 +1,145 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from numpy import nan +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame04.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [nan, 11, 12, 13], + 'B': [2, 4, nan, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=False, + index=False, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [nan, 11, 12, 13], + 'B': [2, 4, nan, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [nan, 11, 12, 13], + 'B': [2, 4, nan, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=False, + index=False) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [nan, 11, 12, 13], + 'B': [2, 4, nan, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame05.py b/pandas/io/tests/test_xlsxwriter_frame05.py new file mode 100644 index 0000000000000..342df9fc5f9e5 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame05.py @@ -0,0 +1,148 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame05.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + cols=['B', 'A'], + header=False, + index=False, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + cols=['B', 'A'], + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + cols=['B', 'A'], + header=False, + index=False) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + cols=['B', 'A'], + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame06.py b/pandas/io/tests/test_xlsxwriter_frame06.py new file mode 100644 index 0000000000000..3a47954a7f478 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame06.py @@ -0,0 +1,144 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame06.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=True, + index_label='Foo', + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + index_label='Foo',) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + header=True, + index=True, + index_label='Foo') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + index_label='Foo') + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/test_xlsxwriter_frame07.py b/pandas/io/tests/test_xlsxwriter_frame07.py new file mode 100644 index 0000000000000..989bfbcb5b879 --- /dev/null +++ b/pandas/io/tests/test_xlsxwriter_frame07.py @@ -0,0 +1,152 @@ +############################################################################### +# +# Tests for Pandas ExcelWriter xlsxwriter option. +# + +import unittest +import os +from pandas.core.api import DataFrame +import pandas.util.testing as testutil +from pandas.io.excel import ExcelWriter +from .xlsxwriter_test_helper import _compare_xlsx_files + + +class TestCompareXLSXFiles(unittest.TestCase): + """ + Test file created by XlsxWriter against a file created by Excel. + + """ + + def setUp(self): + self.maxDiff = None + + filename = 'xw_frame07.xlsx' + test_dir = testutil.get_data_path() + self.got_filename = test_dir + '_test_' + filename + self.exp_filename = test_dir + filename + + self.ignore_files = [] + self.ignore_elements = {} + + def test_to_excel(self): + """Test the creation of a simple workbook using to_excel().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + startcol=2, + startrow=1, + header=False, + index=False, + engine='xlsxwriter') + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel(self): + """Test the creation of a simple workbook using ExcelWriter().""" + filename = self.got_filename + + #################################################### + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename, engine='xlsxwriter') + + df.to_excel(writer, + sheet_name='Sheet1', + startcol=2, + startrow=1, + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_to_excel_with_config(self): + """Test workbook creation using to_excel() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + df.to_excel(filename, + sheet_name='Sheet1', + startcol=2, + startrow=1, + header=False, + index=False) + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def test_excelwriter_to_excel_with_config(self): + """Test workbook creation using ExcelWriter() and pandas.config.""" + filename = self.got_filename + + #################################################### + + from pandas.core import config + config.set_option('io.excel.writer_engine', 'xlsxwriter') + + df = DataFrame({'A': [10, 11, 12, 13], + 'B': [2, 4, 6, 8]}) + + writer = ExcelWriter(filename) + + df.to_excel(writer, + sheet_name='Sheet1', + startcol=2, + startrow=1, + header=False, + index=False) + + writer.save() + + #################################################### + + got, exp = _compare_xlsx_files(self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements) + + self.assertEqual(got, exp) + + def tearDown(self): + # Cleanup. + if os.path.exists(self.got_filename): + os.remove(self.got_filename) + +if __name__ == '__main__': + unittest.main() diff --git a/pandas/io/tests/xlsxwriter_test_helper.py b/pandas/io/tests/xlsxwriter_test_helper.py new file mode 100644 index 0000000000000..1780db2e48d71 --- /dev/null +++ b/pandas/io/tests/xlsxwriter_test_helper.py @@ -0,0 +1,220 @@ +############################################################################### +# +# Helper functions for testing XlsxWriter generated files against Excel +# files. Copy of helperfunctions.py in XlsxWriter. +# + +import re +import sys +import os.path +from zipfile import ZipFile +from zipfile import BadZipfile +from zipfile import LargeZipFile + + +def _xml_to_list(xml_str): + # Convert test generated XML strings into lists for comparison testing. + + # Split the XML string at tag boundaries. + parser = re.compile(r'>\s*<') + elements = parser.split(xml_str.strip()) + + # Add back the removed brackets. + for index, element in enumerate(elements): + if not element[0] == '<': + elements[index] = '<' + elements[index] + if not element[-1] == '>': + elements[index] = elements[index] + '>' + + return elements + + +def _vml_to_list(vml_str): + # Convert an Excel generated VML string into a list for comparison testing. + # + # The VML data in the testcases is taken from Excel 2007 files. The data + # has to be massaged significantly to make it suitable for comparison. + # + # The VML produced by XlsxWriter can be parsed as ordinary XML. + vml_str = vml_str.replace("\r", "") + + vml = vml_str.split("\n") + vml_str = '' + + for line in vml: + # Skip blank lines. + if not line: + continue + + # Strip leading and trailing whitespace. + line = line.strip() + + # Convert VMLs attribute quotes. + line = line.replace("'", '"') + + # Add space between attributes. + if re.search('"$', line): + line += " " + + # Add newline after element end. + if re.search('>$', line): + line += "\n" + + # Split multiple elements. + line = line.replace('><', ">\n<") + + # Put all of Anchor on one line. + if line == "\n": + line = line.strip() + + vml_str += line + + # Remove the final newline. + vml_str = vml_str.rstrip() + + return vml_str.split("\n") + + +def _sort_rel_file_data(xml_elements): + # Re-order the relationship elements in an array of XLSX XML rel + # (relationship) data. This is necessary for comparison since + # Excel can produce the elements in a semi-random order. + + # We don't want to sort the first or last elements. + first = xml_elements.pop(0) + last = xml_elements.pop() + + # Sort the relationship elements. + xml_elements.sort() + + # Add back the first and last elements. + xml_elements.insert(0, first) + xml_elements.append(last) + + return xml_elements + + +def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): + # Compare two XLSX files by extracting the XML files from each + # zip archive and comparing them. + # + # This is used to compare an "expected" file produced by Excel + # with a "got" file produced by XlsxWriter. + # + # In order to compare the XLSX files we convert the data in each + # XML file into an list of XML elements. + try: + # Open the XlsxWriter as a zip file for testing. + got_zip = ZipFile(got_file, 'r') + except IOError: + e = sys.exc_info()[1] + error = "XlsxWriter file error: " + str(e) + return error, '' + except (BadZipfile, LargeZipFile): + e = sys.exc_info()[1] + error = "XlsxWriter zipfile error, '" + exp_file + "': " + str(e) + return error, '' + + try: + # Open the Excel as a zip file for testing. + exp_zip = ZipFile(exp_file, 'r') + except IOError: + # For Python 2.5+ compatibility. + e = sys.exc_info()[1] + error = "Excel file error: " + str(e) + return error, '' + except (BadZipfile, LargeZipFile): + e = sys.exc_info()[1] + error = "Excel zipfile error, '" + exp_file + "': " + str(e) + return error, '' + + # Get the filenames from the zip files. + got_files = sorted(got_zip.namelist()) + exp_files = sorted(exp_zip.namelist()) + + # Ignore some test specific filenames. + got_files = [name for name in got_files if name not in ignore_files] + exp_files = [name for name in exp_files if name not in ignore_files] + + # Check that each XLSX container has the same files. + if got_files != exp_files: + return got_files, exp_files + + # Compare each file in the XLSX containers. + for filename in exp_files: + + # Skip comparison of binary files based on extension. + extension = os.path.splitext(filename)[1] + if extension in ('.png', '.jpeg', '.bmp'): + continue + + got_xml_str = got_zip.read(filename) + exp_xml_str = exp_zip.read(filename) + + if sys.hexversion >= 0x030000: + got_xml_str = got_xml_str.decode('utf-8') + exp_xml_str = exp_xml_str.decode('utf-8') + + # Remove dates and user specific data from the core.xml data. + if filename == 'docProps/core.xml': + exp_xml_str = re.sub(r' ?John', '', exp_xml_str) + exp_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', + '', exp_xml_str) + got_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', + '', got_xml_str) + + # Remove workbookView dimensions which are almost always different + # and calcPr which can have different Excel version ids. + if filename == 'xl/workbook.xml': + exp_xml_str = re.sub(r']*>', + '', exp_xml_str) + got_xml_str = re.sub(r']*>', + '', got_xml_str) + exp_xml_str = re.sub(r']*>', + '', exp_xml_str) + got_xml_str = re.sub(r']*>', + '', got_xml_str) + + # Remove printer specific settings from Worksheet pageSetup elements. + if re.match(r'xl/worksheets/sheet\d.xml', filename): + exp_xml_str = re.sub(r'horizontalDpi="200" ', '', exp_xml_str) + exp_xml_str = re.sub(r'verticalDpi="200" ', '', exp_xml_str) + exp_xml_str = re.sub(r'(]*>', + '', exp_xml_str) + got_xml_str = re.sub(r']*>', + '', got_xml_str) + + # Convert the XML string to lists for comparison. + if re.search('.vml$', filename): + got_xml = _xml_to_list(got_xml_str) + exp_xml = _vml_to_list(exp_xml_str) + else: + got_xml = _xml_to_list(got_xml_str) + exp_xml = _xml_to_list(exp_xml_str) + + # Ignore test specific XML elements for defined filenames. + if filename in ignore_elements: + patterns = ignore_elements[filename] + + for pat in patterns: + exp_xml = [tag for tag in exp_xml if not re.match(pat, tag)] + got_xml = [tag for tag in got_xml if not re.match(pat, tag)] + + # Reorder the XML elements in the XLSX relationship files. + if filename == '[Content_Types].xml' or re.search('.rels$', filename): + got_xml = _sort_rel_file_data(got_xml) + exp_xml = _sort_rel_file_data(exp_xml) + + # Compared the XML elements in each file. + if got_xml != exp_xml: + got_xml.insert(0, filename) + exp_xml.insert(0, filename) + return got_xml, exp_xml + + # If we got here the files are the same. + return 'Ok', 'Ok' diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 8ad88374f40f6..0c7e8b15bc7a8 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1423,6 +1423,26 @@ def test_to_excel(self): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) + def test_to_excel_xlsxwriter(self): + try: + import xlrd + import xlsxwriter + from pandas.io.excel import ExcelFile + except ImportError: + raise nose.SkipTest + + path = '__tmp__.xlsx' + with ensure_clean(path) as path: + self.panel.to_excel(path, engine='xlsxwriter') + try: + reader = ExcelFile(path) + except ImportError: + raise nose.SkipTest + + for item, df in compat.iteritems(self.panel): + recdf = reader.parse(str(item), index_col=0) + assert_frame_equal(df, recdf) + def test_dropna(self): p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) p.ix[:, ['b', 'd'], 0] = np.nan