From dd01017a13ef80ded96e8a48024c98174d2110b8 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Thu, 31 Mar 2022 18:49:15 +0300 Subject: [PATCH 1/2] changelog: describe changes since 0.7.0 This patch add CHANGELOG.md file. Current CHANGELOG contents covers version 0.7.0, 0.7.1 changes and currently unreleased changes. For changes in versions before 0.7.0, see releases GitHub page [1]. 1. https://github.com/tarantool/tarantool-python/releases --- CHANGELOG.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..cc8cb96b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,48 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added +- Reusable testing workflow for integration with tarantool artifacts + (PR #192). + +### Changed +- Clarify license of the project (BSD-2-Clause) (PR #210, #197). +- Migrate CI to GitHub Actions (PR #213, PR #216, #182). +- Various improvements and fixes in README (PR #210, PR #215). + +### Fixed +- json.dumps compatibility with Python 2 (PR #186). +- Unix socket support in mesh_connection (PR #189, #111). +- Various fixes in tests (PR #189, #111, PR #195, #194). + + +## 0.7.1 - 2020-12-28 + +### Fixed +- msgpack library dependency (PR #185). + + +## 0.7.0 - 2020-12-28 + +Caution: Use tarantool-python 0.7.1 instead of 0.7.0. It fixes +the dependency on the msgpack library. + +### Added +- Support msgpack 1.0.0 (#155, PR #173). +- SQL support (.execute() method) (#159, PR #161). +- Allow to receive a Tarantool tuple as a Python tuple, not a list, with + use_list=False connection option (#166, PR #161). +- Support the Database API (PEP-0249) (PR #161). + +### Changed +- Various improvements in README (PR #147, PR #151, PR #180). + +### Fixed +- Support encoding=None connections (PR #172). +- Various improvements and fixes in tests (8ff9a3f, bd37703, PR #165, + #178, PR #179, PR #181). From dd6ef5b51f6f64c65bfe2d9a0bc3640ad28be50b Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Tue, 15 Mar 2022 17:00:48 +0300 Subject: [PATCH 2/2] types: support working with binary for Python 3 This is a breaking change. Before this patch, both bytes and str were encoded as mp_str. It was possible to work with utf and non-utf strings, but not with varbinary (mp_bin) [1]. This patch adds varbinary support for Python 3 by default. With Python 2 the behavior of the connector remains the same. Before this patch: * encoding="utf-8" (default) Python 3 -> Tarantool -> Python 3 str -> mp_str (string) -> str bytes -> mp_str (string) -> str mp_bin (varbinary) -> bytes * encoding=None Python 3 -> Tarantool -> Python 3 bytes -> mp_str (string) -> bytes str -> mp_str (string) -> bytes mp_bin (varbinary) -> bytes Using bytes as key was not supported by several methods (delete, update, select). After this patch: * encoding="utf-8" (default) Python 3 -> Tarantool -> Python 3 str -> mp_str (string) -> str bytes -> mp_bin (varbinary) -> bytes * encoding=None Python 3 -> Tarantool -> Python 3 bytes -> mp_str (string) -> bytes str -> mp_str (string) -> bytes mp_bin (varbinary) -> bytes Using bytes as key are now supported by all methods. Thus, encoding="utf-8" connection may be used to work with utf-8 strings and varbinary and encodine=None connection may be used to work with non-utf-8 strings. This patch does not add new restrictions (like "do not permit to use str in encoding=None mode because result may be confusing") to preserve current behavior (for example, using space name as str in schema get_space). 1. https://github.com/tarantool/tarantool/issues/4201 Closes #105 --- CHANGELOG.md | 48 +++++++++ tarantool/request.py | 28 +++++- tarantool/utils.py | 14 ++- test/suites/__init__.py | 4 +- test/suites/lib/skip.py | 82 +++++++++++---- test/suites/test_encoding.py | 187 +++++++++++++++++++++++++++++++++++ 6 files changed, 338 insertions(+), 25 deletions(-) create mode 100644 test/suites/test_encoding.py diff --git a/CHANGELOG.md b/CHANGELOG.md index cc8cb96b..b1f6e082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,54 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 (PR #192). ### Changed +- **Breaking**: change binary types encode/decode for Python 3 + to support working with varbinary (PR #211, #105). + With Python 2 the behavior of the connector remains the same. + + Before this patch: + + * encoding="utf-8" (default) + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | str | -> | mp_str (string) | -> | str | + | bytes | -> | mp_str (string) | -> | str | + | | | mp_bin (varbinary) | -> | bytes | + + * encoding=None + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | bytes | -> | mp_str (string) | -> | bytes | + | str | -> | mp_str (string) | -> | bytes | + | | | mp_bin (varbinary) | -> | bytes | + + Using bytes as key was not supported by several methods (delete, + update, select). + + After this patch: + + * encoding="utf-8" (default) + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | str | -> | mp_str (string) | -> | str | + | bytes | -> | mp_bin (varbinary) | -> | bytes | + + * encoding=None + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | bytes | -> | mp_str (string) | -> | bytes | + | str | -> | mp_str (string) | -> | bytes | + | | | mp_bin (varbinary) | -> | bytes | + + Using bytes as key are now supported by all methods. + + Thus, encoding="utf-8" connection may be used to work with + utf-8 strings and varbinary and encodine=None connection + may be used to work with non-utf-8 strings. + - Clarify license of the project (BSD-2-Clause) (PR #210, #197). - Migrate CI to GitHub Actions (PR #213, PR #216, #182). - Various improvements and fixes in README (PR #210, PR #215). diff --git a/tarantool/request.py b/tarantool/request.py index d1a5a829..d58960cc 100644 --- a/tarantool/request.py +++ b/tarantool/request.py @@ -4,6 +4,7 @@ Request types definitions ''' +import sys import collections import msgpack import hashlib @@ -84,8 +85,26 @@ def __init__(self, conn): # The option controls whether to pack binary (non-unicode) # string values as mp_bin or as mp_str. # - # The default behaviour of the connector is to pack both - # bytes and Unicode strings as mp_str. + # The default behaviour of the Python 2 connector is to pack + # both bytes and Unicode strings as mp_str. + # + # The default behaviour of the Python 3 connector (since + # default encoding is "utf-8") is to pack bytes as mp_bin + # and Unicode strings as mp_str. encoding=None mode must + # be used to work with non-utf strings. + # + # encoding = 'utf-8' + # + # Python 3 -> Tarantool -> Python 3 + # str -> mp_str (string) -> str + # bytes -> mp_bin (varbinary) -> bytes + # + # encoding = None + # + # Python 3 -> Tarantool -> Python 3 + # bytes -> mp_str (string) -> bytes + # str -> mp_str (string) -> bytes + # mp_bin (varbinary) -> bytes # # msgpack-0.5.0 (and only this version) warns when the # option is unset: @@ -98,7 +117,10 @@ def __init__(self, conn): # just always set it for all msgpack versions to get rid # of the warning on msgpack-0.5.0 and to keep our # behaviour on msgpack-1.0.0. - packer_kwargs['use_bin_type'] = False + if conn.encoding is None or sys.version_info.major == 2: + packer_kwargs['use_bin_type'] = False + else: + packer_kwargs['use_bin_type'] = True self.packer = msgpack.Packer(**packer_kwargs) diff --git a/tarantool/utils.py b/tarantool/utils.py index c365e7cb..7909e12a 100644 --- a/tarantool/utils.py +++ b/tarantool/utils.py @@ -6,7 +6,10 @@ if sys.version_info.major == 2: string_types = (basestring, ) integer_types = (int, long) + supported_types = integer_types + string_types + (float,) + ENCODING_DEFAULT = None + if sys.version_info.minor < 6: binary_types = (str, ) else: @@ -17,10 +20,13 @@ def strxor(rhs, lhs): return "".join(chr(ord(x) ^ ord(y)) for x, y in zip(rhs, lhs)) elif sys.version_info.major == 3: - binary_types = (bytes, ) - string_types = (str, ) - integer_types = (int, ) + binary_types = (bytes, ) + string_types = (str, ) + integer_types = (int, ) + supported_types = integer_types + string_types + binary_types + (float,) + ENCODING_DEFAULT = "utf-8" + from base64 import decodebytes as base64_decode def strxor(rhs, lhs): @@ -43,7 +49,7 @@ def check_key(*args, **kwargs): elif args[0] is None and kwargs['select']: return [] for key in args: - assert isinstance(key, integer_types + string_types + (float,)) + assert isinstance(key, supported_types) return list(args) diff --git a/test/suites/__init__.py b/test/suites/__init__.py index ecf3a201..8e2eafc1 100644 --- a/test/suites/__init__.py +++ b/test/suites/__init__.py @@ -12,11 +12,13 @@ from .test_mesh import TestSuite_Mesh from .test_execute import TestSuite_Execute from .test_dbapi import TestSuite_DBAPI +from .test_encoding import TestSuite_Encoding test_cases = (TestSuite_Schema_UnicodeConnection, TestSuite_Schema_BinaryConnection, TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect, - TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI) + TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI, + TestSuite_Encoding) def load_tests(loader, tests, pattern): suite = unittest.TestSuite() diff --git a/test/suites/lib/skip.py b/test/suites/lib/skip.py index 495a716c..f8f5a475 100644 --- a/test/suites/lib/skip.py +++ b/test/suites/lib/skip.py @@ -1,20 +1,15 @@ import functools import pkg_resources import re +import sys -SQL_SUPPORT_TNT_VERSION = '2.0.0' - -def skip_or_run_sql_test(func): - """Decorator to skip or run SQL-related tests depending on the tarantool +def skip_or_run_test_tarantool(func, REQUIRED_TNT_VERSION, msg): + """Decorator to skip or run tests depending on the tarantool version. - Tarantool supports SQL-related stuff only since 2.0.0 version. So this - decorator should wrap every SQL-related test to skip it if the tarantool - version < 2.0.0 is used for testing. - - Also, it can be used with the 'setUp' method for skipping the whole test - suite. + Also, it can be used with the 'setUp' method for skipping + the whole test suite. """ @functools.wraps(func) @@ -28,16 +23,69 @@ def wrapper(self, *args, **kwargs): ).group() tnt_version = pkg_resources.parse_version(self.tnt_version) - sql_support_tnt_version = pkg_resources.parse_version( - SQL_SUPPORT_TNT_VERSION - ) + support_version = pkg_resources.parse_version(REQUIRED_TNT_VERSION) - if tnt_version < sql_support_tnt_version: - self.skipTest( - 'Tarantool %s does not support SQL' % self.tnt_version - ) + if tnt_version < support_version: + self.skipTest('Tarantool %s %s' % (self.tnt_version, msg)) if func.__name__ != 'setUp': func(self, *args, **kwargs) return wrapper + + +def skip_or_run_test_python_major(func, REQUIRED_PYTHON_MAJOR, msg): + """Decorator to skip or run tests depending on the Python major + version. + + Also, it can be used with the 'setUp' method for skipping + the whole test suite. + """ + + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if func.__name__ == 'setUp': + func(self, *args, **kwargs) + + major = sys.version_info.major + if major != REQUIRED_PYTHON_MAJOR: + self.skipTest('Python %s connector %s' % (major, msg)) + + if func.__name__ != 'setUp': + func(self, *args, **kwargs) + + return wrapper + + +def skip_or_run_sql_test(func): + """Decorator to skip or run SQL-related tests depending on the + tarantool version. + + Tarantool supports SQL-related stuff only since 2.0.0 version. + So this decorator should wrap every SQL-related test to skip it if + the tarantool version < 2.0.0 is used for testing. + """ + + return skip_or_run_test_tarantool(func, '2.0.0', 'does not support SQL') + + +def skip_or_run_varbinary_test(func): + """Decorator to skip or run VARBINARY-related tests depending on + the tarantool version. + + Tarantool supports VARBINARY type only since 2.2.1 version. + See https://github.com/tarantool/tarantool/issues/4201 + """ + + return skip_or_run_test_tarantool(func, '2.2.1', + 'does not support VARBINARY type') + + +def skip_or_run_mp_bin_test(func): + """Decorator to skip or run mp_bin-related tests depending on + the Python version. + + Python 2 connector do not support mp_bin. + """ + + return skip_or_run_test_python_major(func, 3, 'does not support mp_bin') \ No newline at end of file diff --git a/test/suites/test_encoding.py b/test/suites/test_encoding.py new file mode 100644 index 00000000..1ee0f4aa --- /dev/null +++ b/test/suites/test_encoding.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import unittest +import tarantool + +from .lib.skip import skip_or_run_mp_bin_test, skip_or_run_varbinary_test +from .lib.tarantool_server import TarantoolServer + +class TestSuite_Encoding(unittest.TestCase): + @classmethod + def setUpClass(self): + print(' ENCODING '.center(70, '='), file=sys.stderr) + print('-' * 70, file=sys.stderr) + self.srv = TarantoolServer() + self.srv.script = 'test/suites/box.lua' + self.srv.start() + + self.srv.admin(""" + box.schema.user.create('test', { password = 'test' }) + box.schema.user.grant('test', 'execute,read,write', 'universe') + """) + + args = [self.srv.host, self.srv.args['primary']] + kwargs = { 'user': 'test', 'password': 'test' } + self.con_encoding_utf8 = tarantool.Connection(*args, encoding='utf-8', **kwargs) + self.con_encoding_none = tarantool.Connection(*args, encoding=None, **kwargs) + self.conns = [self.con_encoding_utf8, self.con_encoding_none] + + self.srv.admin("box.schema.create_space('space_str')") + self.srv.admin(""" + box.space['space_str']:create_index('primary', { + type = 'tree', + parts = {1, 'str'}, + unique = true}) + """.replace('\n', ' ')) + + self.srv.admin("box.schema.create_space('space_varbin')") + self.srv.admin(r""" + box.space['space_varbin']:format({ + { + 'id', + type = 'number', + is_nullable = false + }, + { + 'varbin', + type = 'varbinary', + is_nullable = false, + } + }) + """.replace('\n', ' ')) + self.srv.admin(""" + box.space['space_varbin']:create_index('id', { + type = 'tree', + parts = {1, 'number'}, + unique = true}) + """.replace('\n', ' ')) + self.srv.admin(""" + box.space['space_varbin']:create_index('varbin', { + type = 'tree', + parts = {2, 'varbinary'}, + unique = true}) + """.replace('\n', ' ')) + + def assertNotRaises(self, func, *args, **kwargs): + try: + func(*args, **kwargs) + except Exception as e: + self.fail('Function raised Exception: %s' % repr(e)) + + def setUp(self): + # prevent a remote tarantool from clean our session + if self.srv.is_started(): + self.srv.touch_lock() + + # encoding = 'utf-8' + # + # Python 3 -> Tarantool -> Python 3 + # str -> mp_str (string) -> str + # bytes -> mp_bin (varbinary) -> bytes + def test_01_01_str_encode_for_encoding_utf8_behavior(self): + data = 'test_01_01' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_utf8.insert, space, [data]) + + resp = self.con_encoding_utf8.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + def test_01_02_string_decode_for_encoding_utf8_behavior(self): + data = 'test_01_02' + space = 'space_str' + + self.srv.admin("box.space['%s']:insert{'%s'}" % (space, data)) + + resp = self.con_encoding_utf8.eval("return box.space['%s']:get('%s')" % (space, data)) + self.assertSequenceEqual(resp, [[data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_01_03_bytes_encode_for_encoding_utf8_behavior(self): + data_id = 103 + data = bytes(bytearray.fromhex('DEADBEAF0103')) + space = 'space_varbin' + + self.assertNotRaises(self.con_encoding_utf8.insert, space, [data_id, data]) + + resp = self.con_encoding_utf8.select(space, [ data ], index='varbin') + self.assertSequenceEqual(resp, [[data_id, data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_01_04_varbinary_decode_for_encoding_utf8_behavior(self): + data_id = 104 + data_hex = 'DEADBEAF0104' + data = bytes(bytearray.fromhex(data_hex)) + space = 'space_varbin' + + self.con_encoding_utf8.execute(""" + INSERT INTO "%s" VALUES (%d, x'%s'); + """ % (space, data_id, data_hex)) + + resp = self.con_encoding_utf8.execute(""" + SELECT * FROM "%s" WHERE "varbin" == x'%s'; + """ % (space, data_hex)) + self.assertSequenceEqual(resp, [[data_id, data]]) + + # encoding = None + # + # Python 3 -> Tarantool -> Python 3 + # bytes -> mp_str (string) -> bytes + # str -> mp_str (string) -> bytes + # mp_bin (varbinary) -> bytes + def test_02_01_str_encode_for_encoding_none_behavior(self): + data = 'test_02_01' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_none.insert, space, [data]) + + resp = self.con_encoding_utf8.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + def test_02_02_string_decode_for_encoding_none_behavior(self): + data = 'test_02_02' + data_decoded = b'test_02_02' + space = 'space_str' + + self.srv.admin("box.space['%s']:insert{'%s'}" % (space, data)) + + resp = self.con_encoding_none.eval("return box.space['%s']:get('%s')" % (space, data)) + self.assertSequenceEqual(resp, [[data_decoded]]) + + def test_02_03_bytes_encode_for_encoding_none_behavior(self): + data = b'test_02_03' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_none.insert, space, [data]) + + resp = self.con_encoding_none.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_02_04_varbinary_decode_for_encoding_none_behavior(self): + data_id = 204 + data_hex = 'DEADBEAF0204' + data = bytes(bytearray.fromhex(data_hex)) + space = 'space_varbin' + + self.con_encoding_none.execute(""" + INSERT INTO "%s" VALUES (%d, x'%s'); + """ % (space, data_id, data_hex)) + + resp = self.con_encoding_none.execute(""" + SELECT * FROM "%s" WHERE "varbin" == x'%s'; + """ % (space, data_hex)) + self.assertSequenceEqual(resp, [[data_id, data]]) + + @classmethod + def tearDownClass(self): + for con in self.conns: + con.close() + self.srv.stop() + self.srv.clean()