From ae76e461f82445d717f9bd95def1627f763fcbfc Mon Sep 17 00:00:00 2001 From: Mohit Anand Date: Sun, 13 Oct 2019 09:34:23 +0530 Subject: [PATCH 01/28] Added code in core/frame.py to include encoding in to_string --- pandas/core/frame.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5200ad0ba0d23..7c1a86a61201f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -744,6 +744,7 @@ def to_string( decimal: str = ".", line_width: Optional[int] = None, max_colwidth: Optional[int] = None, + encoding = None, ) -> Optional[str]: """ Render a DataFrame to a console-friendly tabular output. @@ -754,6 +755,10 @@ def to_string( Max width to truncate each column in characters. By default, no limit. .. versionadded:: 1.0.0 + encoding : str, default "utf-8" + Set character encoding + + .. versionadded:: 1.0 %(returns)s See Also -------- From 39850404daa4e429c547644aa3de73e50bcc006c Mon Sep 17 00:00:00 2001 From: Mohit Anand Date: Sun, 13 Oct 2019 09:40:27 +0530 Subject: [PATCH 02/28] Modified io/formats/format.py to include encoding parameter in to_strin --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ad62c56a337b6..b9a7ff7772e21 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -895,8 +895,8 @@ def _join_multiline(self, *args) -> str: st = ed return "\n\n".join(str_lst) - def to_string(self, buf: Optional[FilePathOrBuffer[str]] = None) -> Optional[str]: - return self.get_result(buf=buf) + def to_string(self, buf: Optional[FilePathOrBuffer[str]] = None, encoding: Optional[str] = None) -> Optional[str]: + return self.get_result(buf=buf, encoding = encoding) def to_latex( self, From 8eea18e1c8853cd94ae185a0163cc20a35f9280e Mon Sep 17 00:00:00 2001 From: Mohit Anand Date: Sun, 13 Oct 2019 11:17:37 +0530 Subject: [PATCH 03/28] Added encoding to pandas/core/format.py for to_string --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7c1a86a61201f..49db094b2dfbf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -797,7 +797,7 @@ def to_string( decimal=decimal, line_width=line_width, ) - return formatter.to_string(buf=buf) + return formatter.to_string(buf=buf, encoding=encoding) # ---------------------------------------------------------------------- From d2c70ee1da9981b1ae0d491b89ce8dbf509e9294 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 11:27:25 +0530 Subject: [PATCH 04/28] Changed formatting with black --- pandas/core/frame.py | 2 +- pandas/io/formats/format.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49db094b2dfbf..a02f0aa7d4437 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -744,7 +744,7 @@ def to_string( decimal: str = ".", line_width: Optional[int] = None, max_colwidth: Optional[int] = None, - encoding = None, + encoding=None, ) -> Optional[str]: """ Render a DataFrame to a console-friendly tabular output. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b9a7ff7772e21..06a4c1f1425c8 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -895,8 +895,12 @@ def _join_multiline(self, *args) -> str: st = ed return "\n\n".join(str_lst) - def to_string(self, buf: Optional[FilePathOrBuffer[str]] = None, encoding: Optional[str] = None) -> Optional[str]: - return self.get_result(buf=buf, encoding = encoding) + def to_string( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + return self.get_result(buf=buf, encoding=encoding) def to_latex( self, From 835b22d53336dbf84c76fcffe093b747df331320 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 11:38:15 +0530 Subject: [PATCH 05/28] Added test for to_string encoding --- pandas/tests/io/formats/test_to_string.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 pandas/tests/io/formats/test_to_string.py diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py new file mode 100644 index 0000000000000..586be7e19259d --- /dev/null +++ b/pandas/tests/io/formats/test_to_string.py @@ -0,0 +1,9 @@ +import pandas as pd + + +def test_to_string_encoding(float_frame,): + # GH 28766 + path = "test_to_string_file" + float_frame.to_string(path, encoding="gbk") + with open(str(path), "r", encoding="gbk") as f: + assert float_frame.to_string() == f.read() From 4e30d7ca053eb45ec617cdbd04fdc533856595c6 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 11:40:09 +0530 Subject: [PATCH 06/28] Removed spaces from test_to_string.py file --- pandas/tests/io/formats/test_to_string.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 586be7e19259d..19bf53e3c8cf8 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -1,6 +1,3 @@ -import pandas as pd - - def test_to_string_encoding(float_frame,): # GH 28766 path = "test_to_string_file" From 78ba34ba86ced4f4ddee1096066168868133ab4c Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 12:26:11 +0530 Subject: [PATCH 07/28] Added whatsnew for to_string with encoding param. --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1112e42489342..4bab8582f26ee 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -317,6 +317,7 @@ I/O - Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) - Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) +- Added ``encoding`` argument to :func:`DataFrame.to_string` for non-ascii text (:issue:`28766`) Plotting ^^^^^^^^ From 56fdad9d864dd26fdfb5956811a83a26856b6f2b Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 20:57:44 +0530 Subject: [PATCH 08/28] Modified whatsnew with to_string encoding note in Others section. --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4bab8582f26ee..f5c9414798629 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -317,7 +317,6 @@ I/O - Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) - Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) -- Added ``encoding`` argument to :func:`DataFrame.to_string` for non-ascii text (:issue:`28766`) Plotting ^^^^^^^^ @@ -372,6 +371,7 @@ Other - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) +- Added ``encoding`` argument to :func:`DataFrame.to_string` for non-ascii text (:issue:`28766`) .. _whatsnew_1000.contributors: From 83ccffd20f3407cc9e2cdfbd43a672aea68658cb Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 20:59:54 +0530 Subject: [PATCH 09/28] Modified func to meth in for to_string note added in whatsnew. --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f5c9414798629..7f37c6b79d958 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -371,7 +371,7 @@ Other - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) -- Added ``encoding`` argument to :func:`DataFrame.to_string` for non-ascii text (:issue:`28766`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) .. _whatsnew_1000.contributors: From 19801bbedeeffb540c45c344200b6cb1a8a272fb Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 13 Oct 2019 21:01:46 +0530 Subject: [PATCH 10/28] Added full stop at end of line in to_string docstring. --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a02f0aa7d4437..9bd2fa4733f3f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -756,7 +756,7 @@ def to_string( .. versionadded:: 1.0.0 encoding : str, default "utf-8" - Set character encoding + Set character encoding. .. versionadded:: 1.0 %(returns)s From d44afa76e18cc1d03cbe38f99ce58fa45b6685f5 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Mon, 14 Oct 2019 09:17:46 +0530 Subject: [PATCH 11/28] Moved whatsnew note from Other to Other enhancements section. --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7f37c6b79d958..17d2481ef4e08 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -109,6 +109,7 @@ Other enhancements (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) Build Changes ^^^^^^^^^^^^^ @@ -371,7 +372,6 @@ Other - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) -- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) .. _whatsnew_1000.contributors: From b4b983bccf8e72316f2e78717a2f54f0489c59f7 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Mon, 14 Oct 2019 23:32:57 +0530 Subject: [PATCH 12/28] Added annotation to encoding parameter in to_string in pandas/core/frame.py --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9bd2fa4733f3f..e78b4300f95c1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -744,7 +744,7 @@ def to_string( decimal: str = ".", line_width: Optional[int] = None, max_colwidth: Optional[int] = None, - encoding=None, + encoding: Optional[str] = None, ) -> Optional[str]: """ Render a DataFrame to a console-friendly tabular output. From 236db38eb9311a771a02555c3456f86d86875c5b Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 09:15:48 +0530 Subject: [PATCH 13/28] Added encoding in test_format.py and removed test_to_string.py --- pandas/tests/io/formats/test_format.py | 4 ++-- pandas/tests/io/formats/test_to_string.py | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 pandas/tests/io/formats/test_to_string.py diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 454e2afb8abe0..e50809cb8561d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3241,12 +3241,12 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) def test_filepath_or_buffer_arg( - float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals + float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals, encoding ): df = float_frame expected = getattr(df, method)() - getattr(df, method)(buf=filepath_or_buffer) + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) assert_filepath_or_buffer_equals(expected) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py deleted file mode 100644 index 19bf53e3c8cf8..0000000000000 --- a/pandas/tests/io/formats/test_to_string.py +++ /dev/null @@ -1,6 +0,0 @@ -def test_to_string_encoding(float_frame,): - # GH 28766 - path = "test_to_string_file" - float_frame.to_string(path, encoding="gbk") - with open(str(path), "r", encoding="gbk") as f: - assert float_frame.to_string() == f.read() From 1f3e55dad658726bc4a0d070bbec29d28d0edd19 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 14:46:46 +0530 Subject: [PATCH 14/28] Added encoding in pytest parameter in test_format.py --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index e50809cb8561d..e55b66d4b4635 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3249,8 +3249,8 @@ def test_filepath_or_buffer_arg( getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) assert_filepath_or_buffer_equals(expected) - @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +@pytest.mark.parametrize("encoding", ["utf-8", "gbk"]) def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): From b0364d213033d5014eaf15997b91f5eeae210d03 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 15:02:05 +0530 Subject: [PATCH 15/28] Fixed encoding paramter placement in test_format.py --- pandas/tests/io/formats/test_format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index e55b66d4b4635..f84c0d73e7365 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3240,6 +3240,7 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +@pytest.mark.parametrize("encoding", ["utf-8", "gbk"]) def test_filepath_or_buffer_arg( float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals, encoding ): @@ -3249,8 +3250,8 @@ def test_filepath_or_buffer_arg( getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) assert_filepath_or_buffer_equals(expected) + @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) -@pytest.mark.parametrize("encoding", ["utf-8", "gbk"]) def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): From 12ddc8f774a0ca119954c7f8c10b72bcf72d534e Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 16:46:12 +0530 Subject: [PATCH 16/28] Added test to check if encoding is present and filepath is not string or pathlike. --- pandas/tests/io/formats/test_format.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f84c0d73e7365..c3504c79a0a17 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3240,10 +3240,18 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) -@pytest.mark.parametrize("encoding", ["utf-8", "gbk"]) +@pytest.mark.parametrize("encoding", [None, "utf-8", "gbk", "foo"]) def test_filepath_or_buffer_arg( - float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals, encoding + float_frame, + method, + filepath_or_buffer, + assert_filepath_or_buffer_equals, + encoding, + filepath_or_buffer_id, ): + if encoding is not None: + assert filepath_or_buffer_id is not None + assert filepath_or_buffer_id in ["string", "pathlike"] df = float_frame expected = getattr(df, method)() From f82fe78a13bc689665ded3f6af4e6e12d172f4b0 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 18:12:30 +0530 Subject: [PATCH 17/28] Added pytest.raises instead of assert. --- pandas/tests/io/formats/test_format.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c3504c79a0a17..5cb1588fbb83e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3249,14 +3249,24 @@ def test_filepath_or_buffer_arg( encoding, filepath_or_buffer_id, ): - if encoding is not None: - assert filepath_or_buffer_id is not None - assert filepath_or_buffer_id in ["string", "pathlike"] df = float_frame expected = getattr(df, method)() + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: + with pytest.raises( + ValueError, + match="filepath is not a string or path but encoding is specified.", + ): + raise ValueError( + "filepath is not a string or path but encoding is specified." + ) + elif encoding == "foo": + with pytest.raises(LookupError, match="LookupError: unknown encoding: foo"): + raise LookupError("unknown encoding: foo") + else: + assert_filepath_or_buffer_equals(expected) + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) - assert_filepath_or_buffer_equals(expected) @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) From 5ef9cec5150f863d217837824dcd73a2994d4dd0 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 22:09:28 +0530 Subject: [PATCH 18/28] Fix pytest.raise for test_format.py --- pandas/tests/io/formats/test_format.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 5cb1588fbb83e..6c0f560368af7 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3257,16 +3257,13 @@ def test_filepath_or_buffer_arg( ValueError, match="filepath is not a string or path but encoding is specified.", ): - raise ValueError( - "filepath is not a string or path but encoding is specified." - ) + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) elif encoding == "foo": - with pytest.raises(LookupError, match="LookupError: unknown encoding: foo"): - raise LookupError("unknown encoding: foo") + with pytest.raises(LookupError, match="unknown encoding"): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) else: assert_filepath_or_buffer_equals(expected) - - getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) From df58c1f1a612bddef49af1154986ce04f2f12582 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 22:44:48 +0530 Subject: [PATCH 19/28] If encoding is specified and buf is not file, raise ValueError. --- pandas/io/formats/format.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c9efe2ee2d693..d1682364d6953 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -482,6 +482,8 @@ def get_buffer( buf = _stringify_path(buf) else: buf = StringIO() + if encoding is not None: + raise ValueError("buf is not a file name and encoding is specified.") if encoding is None: encoding = "utf-8" From 496f68fcdbd8f20c70f2842193e89d59bb078a57 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 22:48:51 +0530 Subject: [PATCH 20/28] Fix pytest.raise in test_filepath_or_buffer_arg. --- pandas/tests/io/formats/test_format.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6c0f560368af7..a696d8ddfc730 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3250,20 +3250,20 @@ def test_filepath_or_buffer_arg( filepath_or_buffer_id, ): df = float_frame - expected = getattr(df, method)() if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: with pytest.raises( ValueError, - match="filepath is not a string or path but encoding is specified.", + match="buf is not a file name and encoding is specified.", ): getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) elif encoding == "foo": with pytest.raises(LookupError, match="unknown encoding"): getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) else: - assert_filepath_or_buffer_equals(expected) + expected = getattr(df, method)() getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + assert_filepath_or_buffer_equals(expected) @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) From 725b2ecb844b7c67997f850b3eee0b9ce1b0ac24 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sat, 19 Oct 2019 22:49:50 +0530 Subject: [PATCH 21/28] Reformatted with black. --- pandas/tests/io/formats/test_format.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index a696d8ddfc730..096fc6cb45073 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3253,8 +3253,7 @@ def test_filepath_or_buffer_arg( if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: with pytest.raises( - ValueError, - match="buf is not a file name and encoding is specified.", + ValueError, match="buf is not a file name and encoding is specified." ): getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) elif encoding == "foo": From 698d3b7cf44ac02b38dcc2629a39a916ffecc86d Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 20 Oct 2019 01:23:01 +0530 Subject: [PATCH 22/28] Raise ValueError in to_html, to_latex, and to_string if buf is a buffer and encoding is specified. --- pandas/io/formats/format.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d1682364d6953..79333ee59f9b9 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -480,6 +480,8 @@ def get_buffer( """ if buf is not None: buf = _stringify_path(buf) + if (hasattr(buf, "write")) & (encoding is not None): + raise ValueError("buf is not a file name and encoding is specified.") else: buf = StringIO() if encoding is not None: From 96cc8101779ff578426463d088b0f1a1fb554591 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 20 Oct 2019 11:02:12 +0530 Subject: [PATCH 23/28] ValueError placement fixed in formats.py --- pandas/io/formats/format.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 79333ee59f9b9..85666d8fdd4fd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -480,15 +480,17 @@ def get_buffer( """ if buf is not None: buf = _stringify_path(buf) - if (hasattr(buf, "write")) & (encoding is not None): - raise ValueError("buf is not a file name and encoding is specified.") + # if (hasattr(buf, "write")) & (encoding is not None): + # raise ValueError("buf is not a file name and encoding is specified.") else: buf = StringIO() - if encoding is not None: - raise ValueError("buf is not a file name and encoding is specified.") + # if encoding is not None: + # raise ValueError("buf is not a file name and encoding is specified.") if encoding is None: encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") if hasattr(buf, "write"): yield buf From 1a35eeb1408b464bcc5f163dd3c33d61ccaeb9dd Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 20 Oct 2019 11:06:06 +0530 Subject: [PATCH 24/28] Removed commented code from format.py --- pandas/io/formats/format.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 85666d8fdd4fd..7c58eafd2ec39 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -480,12 +480,8 @@ def get_buffer( """ if buf is not None: buf = _stringify_path(buf) - # if (hasattr(buf, "write")) & (encoding is not None): - # raise ValueError("buf is not a file name and encoding is specified.") else: buf = StringIO() - # if encoding is not None: - # raise ValueError("buf is not a file name and encoding is specified.") if encoding is None: encoding = "utf-8" From f865ac319bada4313ae83b9099cd334d3528e60e Mon Sep 17 00:00:00 2001 From: farziengineer Date: Sun, 20 Oct 2019 11:06:06 +0530 Subject: [PATCH 25/28] Removed commented code from format.py --- pandas/io/formats/format.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 85666d8fdd4fd..7c58eafd2ec39 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -480,12 +480,8 @@ def get_buffer( """ if buf is not None: buf = _stringify_path(buf) - # if (hasattr(buf, "write")) & (encoding is not None): - # raise ValueError("buf is not a file name and encoding is specified.") else: buf = StringIO() - # if encoding is not None: - # raise ValueError("buf is not a file name and encoding is specified.") if encoding is None: encoding = "utf-8" From 835cdb8f81dea31d2c41eeac371ec1d519f2fb91 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Tue, 22 Oct 2019 01:29:47 +0530 Subject: [PATCH 26/28] Added encoding in assert_filepath_or_buffer_equals fixture. --- pandas/tests/io/formats/test_format.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 096fc6cb45073..f48d55a05d6dd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -73,17 +73,19 @@ def filepath_or_buffer(filepath_or_buffer_id, tmp_path): @pytest.fixture -def assert_filepath_or_buffer_equals(filepath_or_buffer, filepath_or_buffer_id): +def assert_filepath_or_buffer_equals( + filepath_or_buffer, filepath_or_buffer_id, encoding +): """ Assertion helper for checking filepath_or_buffer. """ def _assert_filepath_or_buffer_equals(expected): if filepath_or_buffer_id == "string": - with open(filepath_or_buffer) as f: + with open(filepath_or_buffer, encoding=encoding) as f: result = f.read() elif filepath_or_buffer_id == "pathlike": - result = filepath_or_buffer.read_text() + result = filepath_or_buffer.read_text(encoding=encoding) elif filepath_or_buffer_id == "buffer": result = filepath_or_buffer.getvalue() assert result == expected @@ -3250,7 +3252,8 @@ def test_filepath_or_buffer_arg( filepath_or_buffer_id, ): df = float_frame - + if encoding == "gbk": + float_frame.iloc[0, 0] = "造成输出中文显示乱码" if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: with pytest.raises( ValueError, match="buf is not a file name and encoding is specified." From 648fa556e73ed361444f03d43219032673ae7f54 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Tue, 22 Oct 2019 22:41:25 +0530 Subject: [PATCH 27/28] Modified test_filepath_or_buffer_arg to have custom data(removed float_frame). --- pandas/tests/io/formats/test_format.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f48d55a05d6dd..9aba4c8aa5019 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3242,18 +3242,20 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) -@pytest.mark.parametrize("encoding", [None, "utf-8", "gbk", "foo"]) +@pytest.mark.parametrize( + "encoding, data", + [(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], +) def test_filepath_or_buffer_arg( - float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals, encoding, + data, filepath_or_buffer_id, ): - df = float_frame - if encoding == "gbk": - float_frame.iloc[0, 0] = "造成输出中文显示乱码" + df = DataFrame([data]) + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: with pytest.raises( ValueError, match="buf is not a file name and encoding is specified." From 3698a2a69ce23e886f6df64b796f00722ab69165 Mon Sep 17 00:00:00 2001 From: farziengineer Date: Tue, 22 Oct 2019 22:41:25 +0530 Subject: [PATCH 28/28] Modified test_filepath_or_buffer_arg to have custom data(removed float_frame). --- pandas/tests/io/formats/test_format.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f48d55a05d6dd..9aba4c8aa5019 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3242,18 +3242,20 @@ def test_repr_html_ipython_config(ip): @pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) -@pytest.mark.parametrize("encoding", [None, "utf-8", "gbk", "foo"]) +@pytest.mark.parametrize( + "encoding, data", + [(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], +) def test_filepath_or_buffer_arg( - float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals, encoding, + data, filepath_or_buffer_id, ): - df = float_frame - if encoding == "gbk": - float_frame.iloc[0, 0] = "造成输出中文显示乱码" + df = DataFrame([data]) + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: with pytest.raises( ValueError, match="buf is not a file name and encoding is specified."