From 18a888e257da3637904c175e30d61fc71f2038bf Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Thu, 3 Nov 2022 12:59:19 -0400 Subject: [PATCH 1/6] Add collate option --- MySQLdb/connections.py | 29 ++++++++++++++++++++++++++--- doc/user_guide.rst | 31 ++++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/MySQLdb/connections.py b/MySQLdb/connections.py index 38324665..a1ce0a14 100644 --- a/MySQLdb/connections.py +++ b/MySQLdb/connections.py @@ -97,6 +97,17 @@ class object, used to create cursors (keyword only) If supplied, the connection character set will be changed to this character set. + If omitted, empty string, or None, the default character set + from the server will be used. + + :param str collate: + If ``charset`` and ``collation`` are both supplied, the + character set and collation for the current conneciton + will be set. + + If omitted, empty string, or None, the default collation + for the ``charset`` is implied. + :param str auth_plugin: If supplied, the connection default authentication plugin will be changed to this value. Example values: @@ -168,6 +179,7 @@ class object, used to create cursors (keyword only) cursorclass = kwargs2.pop("cursorclass", self.default_cursor) charset = kwargs2.get("charset", "") + collate = kwargs2.pop("collate", "") use_unicode = kwargs2.pop("use_unicode", True) sql_mode = kwargs2.pop("sql_mode", "") self._binary_prefix = kwargs2.pop("binary_prefix", False) @@ -192,9 +204,12 @@ class object, used to create cursors (keyword only) self.encoding = "ascii" # overridden in set_character_set() - if not charset: - charset = self.character_set_name() - self.set_character_set(charset) + if charset and collate: + self.set_character_set_collation(charset, collate) + else: + if not charset: + charset = self.character_set_name() + self.set_character_set(charset) if sql_mode: self.set_sql_mode(sql_mode) @@ -298,6 +313,14 @@ def set_character_set(self, charset): super().set_character_set(charset) self.encoding = _charset_to_encoding.get(charset, charset) + def set_character_set_collation(self, charset, collate): + """Set the connection character set and collation. Use this as + an alternative to ``set_character_set``. + """ + self.query("SET NAMES %s COLLATE %s" % (charset, collate)) + self.store_result() + self.encoding = _charset_to_encoding.get(charset, charset) + def set_sql_mode(self, sql_mode): """Set the connection sql_mode. See MySQL documentation for legal values.""" diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 555adf15..b1d7b5b8 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -336,15 +336,28 @@ connect(parameters...) *This must be a keyword parameter.* charset - If present, the connection character set will be changed - to this character set, if they are not equal. Support for - changing the character set requires MySQL-4.1 and later - server; if the server is too old, UnsupportedError will be - raised. This option implies use_unicode=True, but you can - override this with use_unicode=False, though you probably - shouldn't. - - If not present, the default character set is used. + If supplied, the connection character set will be changed + to this character set. + + If omitted, empty string, or None, the default character + set from the server will be used. + + *This must be a keyword parameter.* + + collate + + If ``charset`` and ``collation`` are both supplied, the + character set and collation for the current conneciton + will be set. + + If omitted, empty string, or None, the default collation + for the ``charset`` is implied by the database server. + + To learn more about the quiddities of character sets and + collations, consult the `MySQL docs + `_ + and `MariaDB docs + `_ *This must be a keyword parameter.* From e610777fad6fa50013fa00c0ef7167475207eaf5 Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Sat, 12 Nov 2022 14:34:23 -0500 Subject: [PATCH 2/6] Add unit test for collate option --- tests/test_MySQLdb_nonstandard.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_MySQLdb_nonstandard.py b/tests/test_MySQLdb_nonstandard.py index c517dad3..4cb19681 100644 --- a/tests/test_MySQLdb_nonstandard.py +++ b/tests/test_MySQLdb_nonstandard.py @@ -114,3 +114,33 @@ def test_context_manager(self): with connection_factory() as conn: self.assertFalse(conn.closed) self.assertTrue(conn.closed) + + +class TestCollation(unittest.TestCase): + """Test charset and collation connection options.""" + + def setUp(self): + # Initialize a connection with a non-default character set and + # collation. + self.conn = connection_factory( + charset="utf8mb4", + collate="utf8mb4_esperanto_ci", + ) + + def tearDown(self): + self.conn.close() + + def test_charset_collate(self): + c = self.conn.cursor() + c.execute( + """ + SHOW VARIABLES WHERE + Variable_Name="character_set_connection" OR + Variable_Name="collation_connection"; + """ + ) + row = c.fetchall() + charset = row[0][1] + collate = row[1][1] + self.assertEqual(charset, "utf8mb4") + self.assertEqual(collate, "utf8mb4_esperanto_ci") From c272111a262a88e51d31b36150e14ae06b227f17 Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Fri, 18 Nov 2022 12:05:25 -0500 Subject: [PATCH 3/6] Fix typo in docs --- doc/user_guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user_guide.rst b/doc/user_guide.rst index b1d7b5b8..24842c62 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -346,7 +346,7 @@ connect(parameters...) collate - If ``charset`` and ``collation`` are both supplied, the + If ``charset`` and ``collate`` are both supplied, the character set and collation for the current conneciton will be set. From 6028b9d8edcd3985b4060c6561c0b60a02353f6e Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Tue, 21 Mar 2023 14:22:16 -0400 Subject: [PATCH 4/6] Change 'collate' to 'collation' per request --- MySQLdb/connections.py | 12 ++++++------ doc/user_guide.rst | 4 ++-- tests/test_MySQLdb_nonstandard.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MySQLdb/connections.py b/MySQLdb/connections.py index a1ce0a14..beb2a9e8 100644 --- a/MySQLdb/connections.py +++ b/MySQLdb/connections.py @@ -100,7 +100,7 @@ class object, used to create cursors (keyword only) If omitted, empty string, or None, the default character set from the server will be used. - :param str collate: + :param str collation: If ``charset`` and ``collation`` are both supplied, the character set and collation for the current conneciton will be set. @@ -179,7 +179,7 @@ class object, used to create cursors (keyword only) cursorclass = kwargs2.pop("cursorclass", self.default_cursor) charset = kwargs2.get("charset", "") - collate = kwargs2.pop("collate", "") + collation = kwargs2.pop("collation", "") use_unicode = kwargs2.pop("use_unicode", True) sql_mode = kwargs2.pop("sql_mode", "") self._binary_prefix = kwargs2.pop("binary_prefix", False) @@ -204,8 +204,8 @@ class object, used to create cursors (keyword only) self.encoding = "ascii" # overridden in set_character_set() - if charset and collate: - self.set_character_set_collation(charset, collate) + if charset and collation: + self.set_character_set_collation(charset, collation) else: if not charset: charset = self.character_set_name() @@ -313,11 +313,11 @@ def set_character_set(self, charset): super().set_character_set(charset) self.encoding = _charset_to_encoding.get(charset, charset) - def set_character_set_collation(self, charset, collate): + def set_character_set_collation(self, charset, collation): """Set the connection character set and collation. Use this as an alternative to ``set_character_set``. """ - self.query("SET NAMES %s COLLATE %s" % (charset, collate)) + self.query("SET NAMES %s COLLATE %s" % (charset, collation)) self.store_result() self.encoding = _charset_to_encoding.get(charset, charset) diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 24842c62..25916e84 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -344,9 +344,9 @@ connect(parameters...) *This must be a keyword parameter.* - collate + collation - If ``charset`` and ``collate`` are both supplied, the + If ``charset`` and ``collation`` are both supplied, the character set and collation for the current conneciton will be set. diff --git a/tests/test_MySQLdb_nonstandard.py b/tests/test_MySQLdb_nonstandard.py index 4cb19681..5e841791 100644 --- a/tests/test_MySQLdb_nonstandard.py +++ b/tests/test_MySQLdb_nonstandard.py @@ -124,13 +124,13 @@ def setUp(self): # collation. self.conn = connection_factory( charset="utf8mb4", - collate="utf8mb4_esperanto_ci", + collation="utf8mb4_esperanto_ci", ) def tearDown(self): self.conn.close() - def test_charset_collate(self): + def test_charset_collation(self): c = self.conn.cursor() c.execute( """ @@ -141,6 +141,6 @@ def test_charset_collate(self): ) row = c.fetchall() charset = row[0][1] - collate = row[1][1] + collation = row[1][1] self.assertEqual(charset, "utf8mb4") - self.assertEqual(collate, "utf8mb4_esperanto_ci") + self.assertEqual(collation, "utf8mb4_esperanto_ci") From fa2b059b97ecdc47faef4952e1143bee82908e54 Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Wed, 22 Mar 2023 16:47:36 -0400 Subject: [PATCH 5/6] Pull request feedback, always call set_character_set() --- MySQLdb/connections.py | 27 ++++++++------------------- doc/user_guide.rst | 15 +++++++++------ 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/MySQLdb/connections.py b/MySQLdb/connections.py index beb2a9e8..40296b85 100644 --- a/MySQLdb/connections.py +++ b/MySQLdb/connections.py @@ -97,12 +97,9 @@ class object, used to create cursors (keyword only) If supplied, the connection character set will be changed to this character set. - If omitted, empty string, or None, the default character set - from the server will be used. - :param str collation: If ``charset`` and ``collation`` are both supplied, the - character set and collation for the current conneciton + character set and collation for the current connection will be set. If omitted, empty string, or None, the default collation @@ -204,12 +201,9 @@ class object, used to create cursors (keyword only) self.encoding = "ascii" # overridden in set_character_set() - if charset and collation: - self.set_character_set_collation(charset, collation) - else: - if not charset: - charset = self.character_set_name() - self.set_character_set(charset) + if not charset: + charset = self.character_set_name() + self.set_character_set(charset, collation) if sql_mode: self.set_sql_mode(sql_mode) @@ -308,18 +302,13 @@ def begin(self): """ self.query(b"BEGIN") - def set_character_set(self, charset): + def set_character_set(self, charset, collation = None): """Set the connection character set to charset.""" super().set_character_set(charset) self.encoding = _charset_to_encoding.get(charset, charset) - - def set_character_set_collation(self, charset, collation): - """Set the connection character set and collation. Use this as - an alternative to ``set_character_set``. - """ - self.query("SET NAMES %s COLLATE %s" % (charset, collation)) - self.store_result() - self.encoding = _charset_to_encoding.get(charset, charset) + if collation: + self.query("SET NAMES %s COLLATE %s" % (charset, collation)) + self.store_result() def set_sql_mode(self, sql_mode): """Set the connection sql_mode. See MySQL documentation for diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 25916e84..5c9577bc 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -336,18 +336,21 @@ connect(parameters...) *This must be a keyword parameter.* charset - If supplied, the connection character set will be changed - to this character set. + If present, the connection character set will be changed + to this character set, if they are not equal. Support for + changing the character set requires MySQL-4.1 and later + server; if the server is too old, UnsupportedError will be + raised. This option implies use_unicode=True, but you can + override this with use_unicode=False, though you probably + shouldn't. - If omitted, empty string, or None, the default character - set from the server will be used. + If not present, the default character set is used. *This must be a keyword parameter.* collation - If ``charset`` and ``collation`` are both supplied, the - character set and collation for the current conneciton + character set and collation for the current connection will be set. If omitted, empty string, or None, the default collation From fe0bb09440a597548dbc387a46be6d068d629341 Mon Sep 17 00:00:00 2001 From: Vince Salvino Date: Wed, 22 Mar 2023 16:51:27 -0400 Subject: [PATCH 6/6] Blacken --- MySQLdb/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MySQLdb/connections.py b/MySQLdb/connections.py index 40296b85..1ea214ad 100644 --- a/MySQLdb/connections.py +++ b/MySQLdb/connections.py @@ -302,7 +302,7 @@ def begin(self): """ self.query(b"BEGIN") - def set_character_set(self, charset, collation = None): + def set_character_set(self, charset, collation=None): """Set the connection character set to charset.""" super().set_character_set(charset) self.encoding = _charset_to_encoding.get(charset, charset)