From 84e89765886dea5b95af8042e58cebcdcf618b71 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 19 Dec 2021 00:25:58 +0100 Subject: [PATCH 1/3] Bug: to_csv coercing datetime columns to same format --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/internals/blocks.py | 9 ++++++--- pandas/tests/io/formats/test_to_csv.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 413dbb9cd0850..8549cfbc1846c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -758,6 +758,7 @@ I/O - Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`) - Bug in :func:`read_csv` not replacing ``NaN`` values with ``np.nan`` before attempting date conversion (:issue:`26203`) - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`) +- Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`) - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`) - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index da2ff58ea3d0d..95696baaa29c7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2103,9 +2103,12 @@ def to_native_types( values = ensure_wrapped_if_datetimelike(values) if isinstance(values, (DatetimeArray, TimedeltaArray)): - result = values._format_native_types(na_rep=na_rep, **kwargs) - result = result.astype(object, copy=False) - return result + # GH#21734 Process every column separate, they migh have different formats + results_converted = [] + for i in range(len(values)): + result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) + results_converted.append(result.astype(object, copy=False)) + return np.vstack(results_converted) elif isinstance(values, ExtensionArray): mask = isna(values) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 8815423d95d65..d88a2ba657bbe 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -282,6 +282,22 @@ def test_to_csv_date_format(self): df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"]) assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec + def test_to_csv_different_datetime_formats(self): + # GH#21734 + df = DataFrame( + { + "date": pd.to_datetime("1970-01-01"), + "datetime": pd.date_range("1970-01-01", periods=2, freq="H"), + } + ) + expected_rows = [ + "date,datetime", + "1970-01-01,1970-01-01 00:00:00", + "1970-01-01,1970-01-01 01:00:00", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(index=False) == expected + def test_to_csv_multi_index(self): # see gh-6618 df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) From 796c36034dcfd87e32f0e5ad598349fc7a602e6e Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 19 Dec 2021 01:33:10 +0100 Subject: [PATCH 2/3] Try fix ci --- pandas/core/internals/blocks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 95696baaa29c7..cc76317fe5307 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2103,7 +2103,11 @@ def to_native_types( values = ensure_wrapped_if_datetimelike(values) if isinstance(values, (DatetimeArray, TimedeltaArray)): - # GH#21734 Process every column separate, they migh have different formats + if values.ndim == 1: + result = values._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) + return result + # GH#21734 Process every column separately, they might have different formats results_converted = [] for i in range(len(values)): result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) From 7faa7640ebdebdd25a8e5f2d852d3d4b3d866b17 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 19 Dec 2021 02:07:20 +0100 Subject: [PATCH 3/3] Format code --- pandas/core/internals/blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cc76317fe5307..0a27b9d523e52 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2107,6 +2107,7 @@ def to_native_types( result = values._format_native_types(na_rep=na_rep, **kwargs) result = result.astype(object, copy=False) return result + # GH#21734 Process every column separately, they might have different formats results_converted = [] for i in range(len(values)):