From 086e6f92628730c4dbde0c982b320125737332ad Mon Sep 17 00:00:00 2001 From: Tim Morris <35537687+TimoMorris@users.noreply.github.com> Date: Sat, 11 Jun 2022 18:05:05 +0100 Subject: [PATCH 1/7] TYP: add type annotation for datetime_format parameter in ODSWriter (GH44284) --- pandas/io/excel/_odswriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index f5367df6f228d..4295af23054cb 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -33,7 +33,7 @@ def __init__( path: FilePath | WriteExcelBuffer | ExcelWriter, engine: str | None = None, date_format: str | None = None, - datetime_format=None, + datetime_format: str | None = None, mode: str = "w", storage_options: StorageOptions = None, if_sheet_exists: str | None = None, From 9723e49990163113d7a905852927c4c569877e53 Mon Sep 17 00:00:00 2001 From: Tim Morris <35537687+TimoMorris@users.noreply.github.com> Date: Sat, 11 Jun 2022 18:43:42 +0100 Subject: [PATCH 2/7] TST: Fix date/datetime format test (GH44284) Swap rows and columns so that columns are homogenous and dates aren't converted to datetimes (i.e. datetime64[ns]) on initialisation of df. Clarify scope of test. --- pandas/tests/io/excel/test_writers.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 42483645d9fc3..e2d9a956ad332 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -652,25 +652,27 @@ def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): tm.assert_frame_equal(tsframe, recons) - def test_excel_date_datetime_format(self, ext, path): + def test_excel_roundtrip_date_datetime_format(self, ext, path): # see gh-4133 # - # Excel output format strings + # Test that df written with custom date/datetime format strings + # is read back the same as if written using the default formats, + # and that the values are still recognised as dates/datetimes. df = DataFrame( [ - [date(2014, 1, 31), date(1999, 9, 24)], - [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], ], - index=["DATE", "DATETIME"], - columns=["X", "Y"], + index=["X", "Y"], + columns=["DATE", "DATETIME"], ) df_expected = DataFrame( [ - [datetime(2014, 1, 31), datetime(1999, 9, 24)], - [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + [datetime(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [datetime(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], ], - index=["DATE", "DATETIME"], - columns=["X", "Y"], + index=["X", "Y"], + columns=["DATE", "DATETIME"], ) with tm.ensure_clean(ext) as filename2: From e7e213864484951fb526dc9884946797d1963ad0 Mon Sep 17 00:00:00 2001 From: Tim Morris <35537687+TimoMorris@users.noreply.github.com> Date: Sat, 11 Jun 2022 23:34:09 +0100 Subject: [PATCH 3/7] BUG: Fix date/datetime formatting being ignored by Excel writers (GH44284) --- pandas/io/excel/_odswriter.py | 2 + pandas/io/excel/_openpyxl.py | 2 + pandas/io/excel/_xlwt.py | 2 + pandas/tests/io/excel/test_writers.py | 58 +++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 4295af23054cb..8b4a7425c6586 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -47,6 +47,8 @@ def __init__( super().__init__( path, + date_format=date_format, + datetime_format=datetime_format, mode=mode, storage_options=storage_options, if_sheet_exists=if_sheet_exists, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 6d70b3f319f37..0117a735197c5 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -58,6 +58,8 @@ def __init__( super().__init__( path, + date_format=date_format, + datetime_format=datetime_format, mode=mode, storage_options=storage_options, if_sheet_exists=if_sheet_exists, diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 234d9e72de10d..777f3a3131ee2 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -51,6 +51,8 @@ def __init__( super().__init__( path, + date_format=date_format, + datetime_format=datetime_format, mode=mode, storage_options=storage_options, if_sheet_exists=if_sheet_exists, diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e2d9a956ad332..e4c16b33f2559 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -698,6 +698,64 @@ def test_excel_roundtrip_date_datetime_format(self, ext, path): # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected) + def test_excel_date_datetime_format(self, engine, path): + # see gh-44284 + # + # Test that custom date/datetime formats are respected + # by inspecting formatting info in written file. + df = DataFrame( + [ + [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["X", "Y"], + columns=["DATE", "DATETIME"], + ) + + with ExcelWriter( + path, + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer: + df.to_excel(writer, "test1") + + if engine == "odf": + pytest.skip("Feature not supported by odf.") + elif engine == "xlwt": + # formatting_info defaults to False + # so have to use xlrd.open_workbook() directly + # as there's no way to pass this parameter through + # if using ExcelFile to open the file + from xlrd import open_workbook + + with open_workbook(path, formatting_info=True) as book: + sh = book["test1"] + xf_list = book.xf_list + format_map = book.format_map + date_cells = (sh[1, 1], sh[2, 1]) + assert all( + format_map[xf_list[cell.xf_index].format_key].format_str + == "DD.MM.YYYY" + for cell in date_cells + ) + datetime_cells = (sh[1, 2], sh[2, 2]) + assert all( + format_map[xf_list[cell.xf_index].format_key].format_str + == "DD.MM.YYYY HH-MM-SS" + for cell in datetime_cells + ) + else: + with ExcelFile(path) as reader: + wb = reader.book + ws = wb["test1"] + date_cells = (ws["B2"], ws["B3"]) + assert all(cell.number_format == "DD.MM.YYYY" for cell in date_cells) + datetime_cells = (ws["C2"], ws["C3"]) + assert all( + cell.number_format == "DD.MM.YYYY HH-MM-SS" + for cell in datetime_cells + ) + def test_to_excel_interval_no_labels(self, path): # see gh-19242 # From 20fa8b0cacfd58aca3c3c18e5168e79092d613ec Mon Sep 17 00:00:00 2001 From: Tim Morris <35537687+TimoMorris@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:04:58 +0100 Subject: [PATCH 4/7] CLN: Move tests to test files for individual engines (GH44284) Avoid messy branching within test due to handling different engines in parametrization on TestExcelWriter class. Different engines have different APIs for accessing formatting info so probably better as tests in per-engine test files, even though this entails some duplication across files. --- pandas/tests/io/excel/test_openpyxl.py | 40 ++++++++++++++++ pandas/tests/io/excel/test_writers.py | 58 ------------------------ pandas/tests/io/excel/test_xlsxwriter.py | 44 +++++++++++++++++- pandas/tests/io/excel/test_xlwt.py | 50 ++++++++++++++++++++ 4 files changed, 133 insertions(+), 59 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index bab1a1eed97c2..dc77c1a7668b6 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,4 +1,8 @@ import contextlib +from datetime import ( + date, + datetime, +) from pathlib import Path import re @@ -10,6 +14,7 @@ import pandas._testing as tm from pandas.io.excel import ( + ExcelFile, ExcelWriter, _OpenpyxlWriter, ) @@ -388,3 +393,38 @@ def test_book_and_sheets_consistent(ext): assert writer.sheets == {} sheet = writer.book.create_sheet("test_name", 0) assert writer.sheets == {"test_name": sheet} + + +def test_write_date_datetime_format(ext): + # see gh-44284 + # + # Test that custom date/datetime formats are respected + # by inspecting formatting info in written file. + df = DataFrame( + [ + [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["X", "Y"], + columns=["DATE", "DATETIME"], + ) + + with tm.ensure_clean(ext) as f: + with ExcelWriter( + f, + engine="xlsxwriter", + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer: + df.to_excel(writer, "test1") + + with ExcelFile(f) as reader: + ws = reader.book["test1"] + + date_cells = (ws["B2"], ws["B3"]) + assert all(cell.number_format == "DD.MM.YYYY" for cell in date_cells) + + datetime_cells = (ws["C2"], ws["C3"]) + assert all( + cell.number_format == "DD.MM.YYYY HH-MM-SS" for cell in datetime_cells + ) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e4c16b33f2559..e2d9a956ad332 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -698,64 +698,6 @@ def test_excel_roundtrip_date_datetime_format(self, ext, path): # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected) - def test_excel_date_datetime_format(self, engine, path): - # see gh-44284 - # - # Test that custom date/datetime formats are respected - # by inspecting formatting info in written file. - df = DataFrame( - [ - [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], - [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], - ], - index=["X", "Y"], - columns=["DATE", "DATETIME"], - ) - - with ExcelWriter( - path, - date_format="DD.MM.YYYY", - datetime_format="DD.MM.YYYY HH-MM-SS", - ) as writer: - df.to_excel(writer, "test1") - - if engine == "odf": - pytest.skip("Feature not supported by odf.") - elif engine == "xlwt": - # formatting_info defaults to False - # so have to use xlrd.open_workbook() directly - # as there's no way to pass this parameter through - # if using ExcelFile to open the file - from xlrd import open_workbook - - with open_workbook(path, formatting_info=True) as book: - sh = book["test1"] - xf_list = book.xf_list - format_map = book.format_map - date_cells = (sh[1, 1], sh[2, 1]) - assert all( - format_map[xf_list[cell.xf_index].format_key].format_str - == "DD.MM.YYYY" - for cell in date_cells - ) - datetime_cells = (sh[1, 2], sh[2, 2]) - assert all( - format_map[xf_list[cell.xf_index].format_key].format_str - == "DD.MM.YYYY HH-MM-SS" - for cell in datetime_cells - ) - else: - with ExcelFile(path) as reader: - wb = reader.book - ws = wb["test1"] - date_cells = (ws["B2"], ws["B3"]) - assert all(cell.number_format == "DD.MM.YYYY" for cell in date_cells) - datetime_cells = (ws["C2"], ws["C3"]) - assert all( - cell.number_format == "DD.MM.YYYY HH-MM-SS" - for cell in datetime_cells - ) - def test_to_excel_interval_no_labels(self, path): # see gh-19242 # diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 82d47a13aefbc..ff3db018bb26a 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -1,4 +1,8 @@ import contextlib +from datetime import ( + date, + datetime, +) import re import warnings @@ -7,7 +11,10 @@ from pandas import DataFrame import pandas._testing as tm -from pandas.io.excel import ExcelWriter +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, +) xlsxwriter = pytest.importorskip("xlsxwriter") @@ -92,3 +99,38 @@ def test_book_and_sheets_consistent(ext): assert writer.sheets == {} sheet = writer.book.add_worksheet("test_name") assert writer.sheets == {"test_name": sheet} + + +def test_write_date_datetime_format(ext): + # see gh-44284 + # + # Test that custom date/datetime formats are respected + # by inspecting formatting info in written file. + df = DataFrame( + [ + [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["X", "Y"], + columns=["DATE", "DATETIME"], + ) + + with tm.ensure_clean(ext) as f: + with ExcelWriter( + f, + engine="xlsxwriter", + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer: + df.to_excel(writer, "test1") + + with ExcelFile(f) as reader: + ws = reader.book["test1"] + + date_cells = (ws["B2"], ws["B3"]) + assert all(cell.number_format == "DD.MM.YYYY" for cell in date_cells) + + datetime_cells = (ws["C2"], ws["C3"]) + assert all( + cell.number_format == "DD.MM.YYYY HH-MM-SS" for cell in datetime_cells + ) diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py index 3aa405eb1e275..0016c4b339563 100644 --- a/pandas/tests/io/excel/test_xlwt.py +++ b/pandas/tests/io/excel/test_xlwt.py @@ -1,3 +1,7 @@ +from datetime import ( + date, + datetime, +) import re import numpy as np @@ -144,3 +148,49 @@ def test_deprecated_attr(ext, attr): msg = f"{attr} is not part of the public API" with tm.assert_produces_warning(FutureWarning, match=msg): getattr(writer, attr) + + +def test_write_date_datetime_format(ext): + # see gh-44284 + # + # Test that custom date/datetime formats are respected + # by inspecting formatting info in written file. + xlrd = pytest.importorskip("xlrd") + + df = DataFrame( + [ + [date(2014, 1, 31), datetime(1998, 5, 26, 23, 33, 4)], + [date(1999, 9, 24), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["X", "Y"], + columns=["DATE", "DATETIME"], + ) + + with tm.ensure_clean(ext) as f: + with ExcelWriter( + f, + engine="xlwt", + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) as writer: + df.to_excel(writer, "test1") + + # formatting_info defaults to False + # so have to use xlrd.open_workbook() directly + with xlrd.open_workbook(f, formatting_info=True) as book: + sh = book["test1"] + xf_list = book.xf_list + format_map = book.format_map + + date_cells = (sh[1, 1], sh[2, 1]) + assert all( + format_map[xf_list[cell.xf_index].format_key].format_str == "DD.MM.YYYY" + for cell in date_cells + ) + + datetime_cells = (sh[1, 2], sh[2, 2]) + assert all( + format_map[xf_list[cell.xf_index].format_key].format_str + == "DD.MM.YYYY HH-MM-SS" + for cell in datetime_cells + ) From c9cb84034ce5d20412a1fddfe06a68701bede14c Mon Sep 17 00:00:00 2001 From: Tim Morris <35537687+TimoMorris@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:31:59 +0100 Subject: [PATCH 5/7] DOC: Add bugfix to docs (GH44284) --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 61848cb127029..39f8e55c2ffbb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -820,6 +820,7 @@ I/O - Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) - :meth:`to_html` now excludes the ``border`` attribute from ``