From 5c3449a84befa94998ddc4ef252a629580e38473 Mon Sep 17 00:00:00 2001 From: Aaron Rahman Date: Sun, 3 Dec 2023 16:50:25 -0500 Subject: [PATCH 1/5] Fixed GH # 55828: When specifying local to_csv file paths with the file scheme, Pandas will now create the file instead of raising an exception --- pandas/io/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index d08612f4f09f6..fb2ffe92abf0c 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -378,6 +378,18 @@ def _get_filepath_or_buffer( # server responded with gzipped data storage_options = storage_options or {} + # Fix for GH #55828 + parsed_url = parse_url(filepath_or_buffer) + if parse_url(filepath_or_buffer).scheme == "file": + file_path = os.path.normpath(parsed_url.path) + return IOArgs( + filepath_or_buffer=open(file_path, "rb"), + encoding=encoding, + compression=compression, + should_close=True, + mode=fsspec_mode, + ) + # waiting until now for importing to match intended lazy logic of # urlopen function defined elsewhere in this module import urllib.request From 543594c4a80ad8d4e74598bfcf7a7a2f99c575e0 Mon Sep 17 00:00:00 2001 From: Flytre Date: Sun, 3 Dec 2023 16:56:54 -0500 Subject: [PATCH 2/5] Fixed GH # 55828: When specifying local to_csv file paths with the file scheme, Pandas will now create the file instead of raising an exception --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ade87c4215a38..819d1246d2410 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -572,8 +572,8 @@ I/O - Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`) - Bug in :meth:`pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`) - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) +- Bug where :meth:`DataFrame.to_csv` would raise a ``URLError`` when specifying local file scheme paths to not-yet-created files (:issue:`55828`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) - Period ^^^^^^ - Bug in :class:`PeriodIndex` construction when more than one of ``data``, ``ordinal`` and ``**fields`` are passed failing to raise ``ValueError`` (:issue:`55961`) From 4da1380fb3e729c538adbe3ec8ba8812f792dc98 Mon Sep 17 00:00:00 2001 From: Flytre Date: Sun, 3 Dec 2023 17:45:12 -0500 Subject: [PATCH 3/5] Fixed GH # 55828: When specifying local to_csv file paths with the file scheme, Pandas will now create the file instead of raising an exception --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 819d1246d2410..ce16c8ced28b9 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -574,6 +574,7 @@ I/O - Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`) - Bug where :meth:`DataFrame.to_csv` would raise a ``URLError`` when specifying local file scheme paths to not-yet-created files (:issue:`55828`) - Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`) + Period ^^^^^^ - Bug in :class:`PeriodIndex` construction when more than one of ``data``, ``ordinal`` and ``**fields`` are passed failing to raise ``ValueError`` (:issue:`55961`) From 329c17d2373f2f2ce82fb42e23390f07497f01d4 Mon Sep 17 00:00:00 2001 From: Flytre Date: Sun, 3 Dec 2023 19:50:07 -0500 Subject: [PATCH 4/5] Fixed GH # 55828: When specifying local to_csv file paths with the file scheme, Pandas will now create the file instead of raising an exception --- pandas/io/common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index fb2ffe92abf0c..838aee3c8ac38 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -378,9 +378,14 @@ def _get_filepath_or_buffer( # server responded with gzipped data storage_options = storage_options or {} + # waiting until now for importing to match intended lazy logic of + # urlopen function defined elsewhere in this module + import urllib.request + # Fix for GH #55828 parsed_url = parse_url(filepath_or_buffer) if parse_url(filepath_or_buffer).scheme == "file": + file_path = urllib.request.url2pathname(parsed_url.path) file_path = os.path.normpath(parsed_url.path) return IOArgs( filepath_or_buffer=open(file_path, "rb"), @@ -390,10 +395,6 @@ def _get_filepath_or_buffer( mode=fsspec_mode, ) - # waiting until now for importing to match intended lazy logic of - # urlopen function defined elsewhere in this module - import urllib.request - # assuming storage_options is to be interpreted as headers req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) with urlopen(req_info) as req: From 1d9c577b1a04f9e6c2c38615b77426bb17c65c53 Mon Sep 17 00:00:00 2001 From: Flytre Date: Sun, 3 Dec 2023 21:07:10 -0500 Subject: [PATCH 5/5] Fixed GH # 55828: When specifying local to_csv file paths with the file scheme, Pandas will now create the file instead of raising an exception --- pandas/io/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 838aee3c8ac38..d65b0c8bcb77b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -384,9 +384,9 @@ def _get_filepath_or_buffer( # Fix for GH #55828 parsed_url = parse_url(filepath_or_buffer) - if parse_url(filepath_or_buffer).scheme == "file": + if parsed_url.scheme == "file": file_path = urllib.request.url2pathname(parsed_url.path) - file_path = os.path.normpath(parsed_url.path) + file_path = os.path.normpath(file_path) return IOArgs( filepath_or_buffer=open(file_path, "rb"), encoding=encoding,