From 506c222571099de4b054a6f0f10cefa7b8da56ac Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 03:23:39 +0800 Subject: [PATCH 01/15] added encoding to to_html --- pandas/core/frame.py | 5 ++++- pandas/io/formats/format.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16f34fee5e1ff..a50fee6a53f33 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2180,6 +2180,7 @@ def to_html( header=True, index=True, na_rep="NaN", + encoding="utf-8", formatters=None, float_format=None, sparsify=None, @@ -2211,6 +2212,8 @@ def to_html( border : int A ``border=border`` attribute is included in the opening `` tag. Default ``pd.options.display.html.border``. + encoding : str + Default is utf-8. table_id : str, optional A css id is included in the opening `
` tag if specified. @@ -2252,7 +2255,7 @@ def to_html( ) # TODO: a generic formatter wld b in DataFrameFormatter return formatter.to_html( - buf=buf, classes=classes, notebook=notebook, border=border + buf=buf, classes=classes, notebook=notebook, border=border, encoding=encoding ) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3a50f63409582..3f89500ae466d 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -930,6 +930,7 @@ def _format_col(self, i: int) -> List[str]: def to_html( self, buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, classes: Optional[Union[str, List, Tuple]] = None, notebook: bool = False, border: Optional[int] = None, @@ -951,7 +952,7 @@ def to_html( from pandas.io.formats.html import HTMLFormatter, NotebookFormatter Klass = NotebookFormatter if notebook else HTMLFormatter - return Klass(self, classes=classes, border=border).get_result(buf=buf) + return Klass(self, classes=classes, border=border).get_result(buf=buf, encoding=encoding) def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: from pandas.core.index import _sparsify From 12fb0c5c5191d2141cbc529ca2d5a0382de048b8 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 03:28:13 +0800 Subject: [PATCH 02/15] added encoding to to_html --- pandas/io/formats/format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3f89500ae466d..af6e1e963adc5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -952,7 +952,8 @@ def to_html( from pandas.io.formats.html import HTMLFormatter, NotebookFormatter Klass = NotebookFormatter if notebook else HTMLFormatter - return Klass(self, classes=classes, border=border).get_result(buf=buf, encoding=encoding) + return Klass(self, classes=classes, border=border).get_result(buf=buf, + encoding=encoding) def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: from pandas.core.index import _sparsify From 842bbc716eabbf1c815047d47a17a5984bf9cec5 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 03:31:14 +0800 Subject: [PATCH 03/15] added encoding to to_html --- pandas/core/frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a50fee6a53f33..1431708fc1bce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2255,7 +2255,11 @@ def to_html( ) # TODO: a generic formatter wld b in DataFrameFormatter return formatter.to_html( - buf=buf, classes=classes, notebook=notebook, border=border, encoding=encoding + buf=buf, + classes=classes, + notebook=notebook, + border=border, + encoding=encoding ) # ---------------------------------------------------------------------- From 0a3df684330804e190656eaf2301a4c46af110b4 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 04:10:08 +0800 Subject: [PATCH 04/15] fixed docstring with default v --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1431708fc1bce..df10d96286128 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2212,8 +2212,8 @@ def to_html( border : int A ``border=border`` attribute is included in the opening `
` tag. Default ``pd.options.display.html.border``. - encoding : str - Default is utf-8. + encoding : str, default "utf-8" + Set character encoding table_id : str, optional A css id is included in the opening `
` tag if specified. From 70450a2198a5b7f0cac86df8f2d77dd549c6da49 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 05:00:16 +0800 Subject: [PATCH 05/15] fixed black format issue --- pandas/core/frame.py | 2 +- pandas/io/formats/format.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df10d96286128..aa8d59dfa2796 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2259,7 +2259,7 @@ def to_html( classes=classes, notebook=notebook, border=border, - encoding=encoding + encoding=encoding, ) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index af6e1e963adc5..f081075f6ba3b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -952,8 +952,9 @@ def to_html( from pandas.io.formats.html import HTMLFormatter, NotebookFormatter Klass = NotebookFormatter if notebook else HTMLFormatter - return Klass(self, classes=classes, border=border).get_result(buf=buf, - encoding=encoding) + return Klass(self, classes=classes, border=border).get_result( + buf=buf, encoding=encoding + ) def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: from pandas.core.index import _sparsify From 0e2bcaf128de0bd2a203f97796cbd28a7fcafa40 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 17:06:02 +0800 Subject: [PATCH 06/15] added test for pd.to_html encoding arg --- pandas/tests/io/formats/test_to_html.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 004dffd128dd6..11cedf20aad43 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -4,7 +4,7 @@ import numpy as np import pytest - +import codecs import pandas as pd from pandas import DataFrame, Index, MultiIndex, option_context from pandas.util import testing as tm @@ -99,6 +99,14 @@ def test_to_html_unicode(df, expected, datapath): assert result == expected +def test_to_html_encoding(): + df = DataFrame({"A": ["a", "b"]}) + with tm.ensure_clean("test.csv") as path: + df.to_html(path, encoding="gbk") + with codecs.open(path, "r", encoding="gbk") as f: + assert df.to_html() == f.read() + + def test_to_html_decimal(datapath): # GH 12031 df = DataFrame({"A": [6.0, 3.1, 2.2]}) From 0cd9c2145dcea3288ed1b1450a8c52dcadc9652c Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 1 Oct 2019 17:32:25 +0800 Subject: [PATCH 07/15] fixed imports sort issue with isort --- pandas/tests/io/formats/test_to_html.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 11cedf20aad43..58c1c279f8855 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1,10 +1,11 @@ +import codecs from datetime import datetime from io import StringIO import re import numpy as np import pytest -import codecs + import pandas as pd from pandas import DataFrame, Index, MultiIndex, option_context from pandas.util import testing as tm From 0689ad439716af5fe5627e5c6c9118b921009e60 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Wed, 2 Oct 2019 09:32:50 +0800 Subject: [PATCH 08/15] added issue number as comment and use built-in --- pandas/tests/io/formats/test_to_html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 58c1c279f8855..ec6754e17a6e3 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1,4 +1,3 @@ -import codecs from datetime import datetime from io import StringIO import re @@ -101,10 +100,11 @@ def test_to_html_unicode(df, expected, datapath): def test_to_html_encoding(): + # GH 28663 df = DataFrame({"A": ["a", "b"]}) with tm.ensure_clean("test.csv") as path: df.to_html(path, encoding="gbk") - with codecs.open(path, "r", encoding="gbk") as f: + with open(path, "r", encoding="gbk") as f: assert df.to_html() == f.read() From cb1bd05dfce85dc31594c3342af945460ff039db Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Wed, 2 Oct 2019 13:55:03 +0800 Subject: [PATCH 09/15] keep None in highlevel frame api --- pandas/core/frame.py | 2 +- pandas/io/formats/format.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa8d59dfa2796..f97358148e2f9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2180,7 +2180,7 @@ def to_html( header=True, index=True, na_rep="NaN", - encoding="utf-8", + encoding=None, formatters=None, float_format=None, sparsify=None, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f081075f6ba3b..13754da97d95b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -943,6 +943,8 @@ def to_html( classes : str or list-like classes to include in the `class` attribute of the opening ``
`` tag, in addition to the default "dataframe". + encoding : str, optional, default "utf-8" + Set character encoding. notebook : {True, False}, optional, default False Whether the generated HTML is for IPython Notebook. border : int From 3c792e7a845987a34a2567fe2a6561e446289dc5 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 8 Oct 2019 13:36:54 +0800 Subject: [PATCH 10/15] versionadded info added, duplicated docstring removed, arg list reranged, and replace the ensure_clean and DF with pytest built-in tmp_path and float_frame fixture --- pandas/core/frame.py | 4 +++- pandas/io/formats/format.py | 2 -- pandas/tests/io/formats/test_to_html.py | 11 +++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f97358148e2f9..945237eb62dc5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2180,7 +2180,6 @@ def to_html( header=True, index=True, na_rep="NaN", - encoding=None, formatters=None, float_format=None, sparsify=None, @@ -2197,6 +2196,7 @@ def to_html( border=None, table_id=None, render_links=False, + encoding=None, ): """ Render a DataFrame as an HTML table. @@ -2214,6 +2214,8 @@ def to_html( `
` tag. Default ``pd.options.display.html.border``. encoding : str, default "utf-8" Set character encoding + + .. versionadded:: 1.0 table_id : str, optional A css id is included in the opening `
` tag if specified. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 13754da97d95b..f081075f6ba3b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -943,8 +943,6 @@ def to_html( classes : str or list-like classes to include in the `class` attribute of the opening ``
`` tag, in addition to the default "dataframe". - encoding : str, optional, default "utf-8" - Set character encoding. notebook : {True, False}, optional, default False Whether the generated HTML is for IPython Notebook. border : int diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index ec6754e17a6e3..f61d9fb00a193 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -99,13 +99,12 @@ def test_to_html_unicode(df, expected, datapath): assert result == expected -def test_to_html_encoding(): +def test_to_html_encoding(float_frame, tmp_path): # GH 28663 - df = DataFrame({"A": ["a", "b"]}) - with tm.ensure_clean("test.csv") as path: - df.to_html(path, encoding="gbk") - with open(path, "r", encoding="gbk") as f: - assert df.to_html() == f.read() + path = tmp_path / "test.html" + float_frame.to_html(path, encoding="gbk") + with open(path, "r", encoding="gbk") as f: + assert float_frame.to_html(encoding="gbk") == f.read() def test_to_html_decimal(datapath): From 9f5763d34b682c1bb732b9556b5906396a6b12a2 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 8 Oct 2019 14:20:20 +0800 Subject: [PATCH 11/15] fixed tmp_path str type for py35 --- pandas/tests/io/formats/test_to_html.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index f61d9fb00a193..f00f55223c741 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -68,6 +68,7 @@ def justify(request): @pytest.mark.parametrize("col_space", [30, 50]) def test_to_html_with_col_space(col_space): df = DataFrame(np.random.random(size=(1, 3))) + print(float_frame) # check that col_space affects HTML generation # and be very brittle about it. result = df.to_html(col_space=col_space) @@ -103,7 +104,7 @@ def test_to_html_encoding(float_frame, tmp_path): # GH 28663 path = tmp_path / "test.html" float_frame.to_html(path, encoding="gbk") - with open(path, "r", encoding="gbk") as f: + with open(str(path), "r", encoding="gbk") as f: assert float_frame.to_html(encoding="gbk") == f.read() From d4de34827364960543db62b446100240899cc5a9 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Tue, 8 Oct 2019 15:43:15 +0800 Subject: [PATCH 12/15] fixed error typing --- pandas/tests/io/formats/test_to_html.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index f00f55223c741..e1dbc8dceec33 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -68,7 +68,6 @@ def justify(request): @pytest.mark.parametrize("col_space", [30, 50]) def test_to_html_with_col_space(col_space): df = DataFrame(np.random.random(size=(1, 3))) - print(float_frame) # check that col_space affects HTML generation # and be very brittle about it. result = df.to_html(col_space=col_space) From c8f41c0d804a0cddcb0887a1e167497d4e0ec04d Mon Sep 17 00:00:00 2001 From: Jinyang Zhou Date: Wed, 9 Oct 2019 09:52:35 +0800 Subject: [PATCH 13/15] Update pandas/tests/io/formats/test_to_html.py Co-Authored-By: Simon Hawkins --- pandas/tests/io/formats/test_to_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index e1dbc8dceec33..9b6bdc99bdc67 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -104,7 +104,7 @@ def test_to_html_encoding(float_frame, tmp_path): path = tmp_path / "test.html" float_frame.to_html(path, encoding="gbk") with open(str(path), "r", encoding="gbk") as f: - assert float_frame.to_html(encoding="gbk") == f.read() + assert float_frame.to_html() == f.read() def test_to_html_decimal(datapath): From 6c890187a475fd9f5439accc7432d8062adbf736 Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Wed, 9 Oct 2019 10:39:26 +0800 Subject: [PATCH 14/15] added whatsnew in v1.0.0 rst --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index eb4b72d01d59a..f404d7d07f151 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -253,6 +253,7 @@ I/O - Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) - Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) - Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) +- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) Plotting ^^^^^^^^ From 0cbf56a404ba4bd5756d62e98a2500762822db4c Mon Sep 17 00:00:00 2001 From: ailurus1991 Date: Sun, 13 Oct 2019 19:50:52 +0800 Subject: [PATCH 15/15] move note to other enhancements section --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b0297eaa6ddfe..69b582508d843 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -109,6 +109,7 @@ Other enhancements (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) Build Changes ^^^^^^^^^^^^^ @@ -312,7 +313,6 @@ I/O - Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) - Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) -- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) Plotting ^^^^^^^^