diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 8af129cec2490..494a5d527c6b3 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -196,6 +196,7 @@ Other enhancements - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`) - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`) +- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`) - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`) - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 7eefd26b194ab..5796d77a2d027 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -664,6 +664,15 @@ class ExcelWriter(metaclass=abc.ABCMeta): be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". .. versionadded:: 1.2.0 + if_sheet_exists : {'error', 'new', 'replace'}, default 'error' + How to behave when trying to write to a sheet that already + exists (append mode only). + + * error: raise a ValueError. + * new: Create a new sheet, with a name determined by the engine. + * replace: Delete the contents of the sheet before writing to it. + + .. versionadded:: 1.3.0 engine_kwargs : dict, optional Keyword arguments to be passed into the engine. @@ -760,6 +769,7 @@ def __new__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, engine_kwargs: dict | None = None, **kwargs, ): @@ -861,6 +871,7 @@ def __init__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, engine_kwargs: dict | None = None, **kwargs, ): @@ -896,6 +907,17 @@ def __init__( self.mode = mode + if if_sheet_exists not in [None, "error", "new", "replace"]: + raise ValueError( + f"'{if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new' and 'replace'." + ) + if if_sheet_exists and "r+" not in mode: + raise ValueError("if_sheet_exists is only valid in append mode (mode='a')") + if if_sheet_exists is None: + if_sheet_exists = "error" + self.if_sheet_exists = if_sheet_exists + def __fspath__(self): return getattr(self.handles.handle, "name", "") diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index bfd1bcf466a7a..7b6634fff1c16 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -30,6 +30,7 @@ def __init__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: Optional[str] = None, engine_kwargs: Optional[Dict[str, Any]] = None, ): from odf.opendocument import OpenDocumentSpreadsheet @@ -41,6 +42,7 @@ def __init__( path, mode=mode, storage_options=storage_options, + if_sheet_exists=if_sheet_exists, engine_kwargs=engine_kwargs, ) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 860971a7967da..a99f8e2625602 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -37,6 +37,7 @@ def __init__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: str | None = None, engine_kwargs: dict[str, Any] | None = None, ): # Use the openpyxl module as the Excel writer. @@ -46,6 +47,7 @@ def __init__( path, mode=mode, storage_options=storage_options, + if_sheet_exists=if_sheet_exists, engine_kwargs=engine_kwargs, ) @@ -56,6 +58,8 @@ def __init__( self.book = load_workbook(self.handles.handle) self.handles.handle.seek(0) + self.sheets = {name: self.book[name] for name in self.book.sheetnames} + else: # Create workbook object with default optimized_write=True. self.book = Workbook() @@ -414,8 +418,26 @@ def write_cells( _style_cache: dict[str, dict[str, Serialisable]] = {} - if sheet_name in self.sheets: - wks = self.sheets[sheet_name] + if sheet_name in self.sheets and self.if_sheet_exists != "new": + if "r+" in self.mode: + if self.if_sheet_exists == "replace": + old_wks = self.sheets[sheet_name] + target_index = self.book.index(old_wks) + del self.book[sheet_name] + wks = self.book.create_sheet(sheet_name, target_index) + self.sheets[sheet_name] = wks + elif self.if_sheet_exists == "error": + raise ValueError( + f"Sheet '{sheet_name}' already exists and " + f"if_sheet_exists is set to 'error'." + ) + else: + raise ValueError( + f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. " + "Valid options are 'error', 'new' and 'replace'." + ) + else: + wks = self.sheets[sheet_name] else: wks = self.book.create_sheet() wks.title = sheet_name diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 6e1b064534707..27b3ae3fab9bc 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -177,6 +177,7 @@ def __init__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: Optional[str] = None, engine_kwargs: Optional[Dict[str, Any]] = None, ): # Use the xlsxwriter module as the Excel writer. @@ -194,6 +195,7 @@ def __init__( datetime_format=datetime_format, mode=mode, storage_options=storage_options, + if_sheet_exists=if_sheet_exists, engine_kwargs=engine_kwargs, ) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 776baf66536b1..8d5bd4a9608d4 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -28,6 +28,7 @@ def __init__( encoding=None, mode: str = "w", storage_options: StorageOptions = None, + if_sheet_exists: Optional[str] = None, engine_kwargs: Optional[Dict[str, Any]] = None, ): # Use the xlwt module as the Excel writer. @@ -40,6 +41,7 @@ def __init__( path, mode=mode, storage_options=storage_options, + if_sheet_exists=if_sheet_exists, engine_kwargs=engine_kwargs, ) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 9010f978d268d..62f567457c3ab 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,4 +1,5 @@ from pathlib import Path +import re import numpy as np import pytest @@ -109,6 +110,66 @@ def test_write_append_mode(ext, mode, expected): assert wb2.worksheets[index]["A1"].value == cell_value +@pytest.mark.parametrize( + "if_sheet_exists,num_sheets,expected", + [ + ("new", 2, ["apple", "banana"]), + ("replace", 1, ["pear"]), + ], +) +def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected): + # GH 40230 + df1 = DataFrame({"fruit": ["apple", "banana"]}) + df2 = DataFrame({"fruit": ["pear"]}) + + with tm.ensure_clean(ext) as f: + df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False) + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df2.to_excel(writer, sheet_name="foo", index=False) + + wb = openpyxl.load_workbook(f) + assert len(wb.sheetnames) == num_sheets + assert wb.sheetnames[0] == "foo" + result = pd.read_excel(wb, "foo", engine="openpyxl") + assert list(result["fruit"]) == expected + if len(wb.sheetnames) == 2: + result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl") + tm.assert_frame_equal(result, df2) + wb.close() + + +@pytest.mark.parametrize( + "if_sheet_exists,msg", + [ + ( + "invalid", + "'invalid' is not valid for if_sheet_exists. Valid options " + "are 'error', 'new' and 'replace'.", + ), + ( + "error", + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ( + None, + "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.", + ), + ], +) +def test_if_sheet_exists_raises(ext, if_sheet_exists, msg): + # GH 40230 + df = DataFrame({"fruit": ["pear"]}) + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + df.to_excel(f, "foo", engine="openpyxl") + with ExcelWriter( + f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists + ) as writer: + df.to_excel(writer, sheet_name="foo") + + def test_to_excel_with_openpyxl_engine(ext): # GH 29854 with tm.ensure_clean(ext) as filename: @@ -175,7 +236,9 @@ def test_append_mode_file(ext): with tm.ensure_clean(ext) as f: df.to_excel(f, engine="openpyxl") - with ExcelWriter(f, mode="a", engine="openpyxl") as writer: + with ExcelWriter( + f, mode="a", engine="openpyxl", if_sheet_exists="new" + ) as writer: df.to_excel(writer) # make sure that zip files are not concatenated by making sure that diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index cce8c3d01025d..67a78f2b1de76 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1325,6 +1325,14 @@ def test_excel_duplicate_columns_with_names(self, path): expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) tm.assert_frame_equal(result, expected) + def test_if_sheet_exists_raises(self, ext): + # GH 40230 + msg = "if_sheet_exists is only valid in append mode (mode='a')" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=re.escape(msg)): + ExcelWriter(f, if_sheet_exists="replace") + class TestExcelWriterEngineTests: @pytest.mark.parametrize(