diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..b1f6e082 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,96 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added +- Reusable testing workflow for integration with tarantool artifacts + (PR #192). + +### Changed +- **Breaking**: change binary types encode/decode for Python 3 + to support working with varbinary (PR #211, #105). + With Python 2 the behavior of the connector remains the same. + + Before this patch: + + * encoding="utf-8" (default) + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | str | -> | mp_str (string) | -> | str | + | bytes | -> | mp_str (string) | -> | str | + | | | mp_bin (varbinary) | -> | bytes | + + * encoding=None + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | bytes | -> | mp_str (string) | -> | bytes | + | str | -> | mp_str (string) | -> | bytes | + | | | mp_bin (varbinary) | -> | bytes | + + Using bytes as key was not supported by several methods (delete, + update, select). + + After this patch: + + * encoding="utf-8" (default) + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | str | -> | mp_str (string) | -> | str | + | bytes | -> | mp_bin (varbinary) | -> | bytes | + + * encoding=None + + | Python 3 | -> | Tarantool | -> | Python 3 | + |----------|----|--------------------|----|----------| + | bytes | -> | mp_str (string) | -> | bytes | + | str | -> | mp_str (string) | -> | bytes | + | | | mp_bin (varbinary) | -> | bytes | + + Using bytes as key are now supported by all methods. + + Thus, encoding="utf-8" connection may be used to work with + utf-8 strings and varbinary and encodine=None connection + may be used to work with non-utf-8 strings. + +- Clarify license of the project (BSD-2-Clause) (PR #210, #197). +- Migrate CI to GitHub Actions (PR #213, PR #216, #182). +- Various improvements and fixes in README (PR #210, PR #215). + +### Fixed +- json.dumps compatibility with Python 2 (PR #186). +- Unix socket support in mesh_connection (PR #189, #111). +- Various fixes in tests (PR #189, #111, PR #195, #194). + + +## 0.7.1 - 2020-12-28 + +### Fixed +- msgpack library dependency (PR #185). + + +## 0.7.0 - 2020-12-28 + +Caution: Use tarantool-python 0.7.1 instead of 0.7.0. It fixes +the dependency on the msgpack library. + +### Added +- Support msgpack 1.0.0 (#155, PR #173). +- SQL support (.execute() method) (#159, PR #161). +- Allow to receive a Tarantool tuple as a Python tuple, not a list, with + use_list=False connection option (#166, PR #161). +- Support the Database API (PEP-0249) (PR #161). + +### Changed +- Various improvements in README (PR #147, PR #151, PR #180). + +### Fixed +- Support encoding=None connections (PR #172). +- Various improvements and fixes in tests (8ff9a3f, bd37703, PR #165, + #178, PR #179, PR #181). diff --git a/tarantool/request.py b/tarantool/request.py index d1a5a829..d58960cc 100644 --- a/tarantool/request.py +++ b/tarantool/request.py @@ -4,6 +4,7 @@ Request types definitions ''' +import sys import collections import msgpack import hashlib @@ -84,8 +85,26 @@ def __init__(self, conn): # The option controls whether to pack binary (non-unicode) # string values as mp_bin or as mp_str. # - # The default behaviour of the connector is to pack both - # bytes and Unicode strings as mp_str. + # The default behaviour of the Python 2 connector is to pack + # both bytes and Unicode strings as mp_str. + # + # The default behaviour of the Python 3 connector (since + # default encoding is "utf-8") is to pack bytes as mp_bin + # and Unicode strings as mp_str. encoding=None mode must + # be used to work with non-utf strings. + # + # encoding = 'utf-8' + # + # Python 3 -> Tarantool -> Python 3 + # str -> mp_str (string) -> str + # bytes -> mp_bin (varbinary) -> bytes + # + # encoding = None + # + # Python 3 -> Tarantool -> Python 3 + # bytes -> mp_str (string) -> bytes + # str -> mp_str (string) -> bytes + # mp_bin (varbinary) -> bytes # # msgpack-0.5.0 (and only this version) warns when the # option is unset: @@ -98,7 +117,10 @@ def __init__(self, conn): # just always set it for all msgpack versions to get rid # of the warning on msgpack-0.5.0 and to keep our # behaviour on msgpack-1.0.0. - packer_kwargs['use_bin_type'] = False + if conn.encoding is None or sys.version_info.major == 2: + packer_kwargs['use_bin_type'] = False + else: + packer_kwargs['use_bin_type'] = True self.packer = msgpack.Packer(**packer_kwargs) diff --git a/tarantool/utils.py b/tarantool/utils.py index c365e7cb..7909e12a 100644 --- a/tarantool/utils.py +++ b/tarantool/utils.py @@ -6,7 +6,10 @@ if sys.version_info.major == 2: string_types = (basestring, ) integer_types = (int, long) + supported_types = integer_types + string_types + (float,) + ENCODING_DEFAULT = None + if sys.version_info.minor < 6: binary_types = (str, ) else: @@ -17,10 +20,13 @@ def strxor(rhs, lhs): return "".join(chr(ord(x) ^ ord(y)) for x, y in zip(rhs, lhs)) elif sys.version_info.major == 3: - binary_types = (bytes, ) - string_types = (str, ) - integer_types = (int, ) + binary_types = (bytes, ) + string_types = (str, ) + integer_types = (int, ) + supported_types = integer_types + string_types + binary_types + (float,) + ENCODING_DEFAULT = "utf-8" + from base64 import decodebytes as base64_decode def strxor(rhs, lhs): @@ -43,7 +49,7 @@ def check_key(*args, **kwargs): elif args[0] is None and kwargs['select']: return [] for key in args: - assert isinstance(key, integer_types + string_types + (float,)) + assert isinstance(key, supported_types) return list(args) diff --git a/test/suites/__init__.py b/test/suites/__init__.py index ecf3a201..8e2eafc1 100644 --- a/test/suites/__init__.py +++ b/test/suites/__init__.py @@ -12,11 +12,13 @@ from .test_mesh import TestSuite_Mesh from .test_execute import TestSuite_Execute from .test_dbapi import TestSuite_DBAPI +from .test_encoding import TestSuite_Encoding test_cases = (TestSuite_Schema_UnicodeConnection, TestSuite_Schema_BinaryConnection, TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect, - TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI) + TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI, + TestSuite_Encoding) def load_tests(loader, tests, pattern): suite = unittest.TestSuite() diff --git a/test/suites/lib/skip.py b/test/suites/lib/skip.py index 495a716c..f8f5a475 100644 --- a/test/suites/lib/skip.py +++ b/test/suites/lib/skip.py @@ -1,20 +1,15 @@ import functools import pkg_resources import re +import sys -SQL_SUPPORT_TNT_VERSION = '2.0.0' - -def skip_or_run_sql_test(func): - """Decorator to skip or run SQL-related tests depending on the tarantool +def skip_or_run_test_tarantool(func, REQUIRED_TNT_VERSION, msg): + """Decorator to skip or run tests depending on the tarantool version. - Tarantool supports SQL-related stuff only since 2.0.0 version. So this - decorator should wrap every SQL-related test to skip it if the tarantool - version < 2.0.0 is used for testing. - - Also, it can be used with the 'setUp' method for skipping the whole test - suite. + Also, it can be used with the 'setUp' method for skipping + the whole test suite. """ @functools.wraps(func) @@ -28,16 +23,69 @@ def wrapper(self, *args, **kwargs): ).group() tnt_version = pkg_resources.parse_version(self.tnt_version) - sql_support_tnt_version = pkg_resources.parse_version( - SQL_SUPPORT_TNT_VERSION - ) + support_version = pkg_resources.parse_version(REQUIRED_TNT_VERSION) - if tnt_version < sql_support_tnt_version: - self.skipTest( - 'Tarantool %s does not support SQL' % self.tnt_version - ) + if tnt_version < support_version: + self.skipTest('Tarantool %s %s' % (self.tnt_version, msg)) if func.__name__ != 'setUp': func(self, *args, **kwargs) return wrapper + + +def skip_or_run_test_python_major(func, REQUIRED_PYTHON_MAJOR, msg): + """Decorator to skip or run tests depending on the Python major + version. + + Also, it can be used with the 'setUp' method for skipping + the whole test suite. + """ + + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if func.__name__ == 'setUp': + func(self, *args, **kwargs) + + major = sys.version_info.major + if major != REQUIRED_PYTHON_MAJOR: + self.skipTest('Python %s connector %s' % (major, msg)) + + if func.__name__ != 'setUp': + func(self, *args, **kwargs) + + return wrapper + + +def skip_or_run_sql_test(func): + """Decorator to skip or run SQL-related tests depending on the + tarantool version. + + Tarantool supports SQL-related stuff only since 2.0.0 version. + So this decorator should wrap every SQL-related test to skip it if + the tarantool version < 2.0.0 is used for testing. + """ + + return skip_or_run_test_tarantool(func, '2.0.0', 'does not support SQL') + + +def skip_or_run_varbinary_test(func): + """Decorator to skip or run VARBINARY-related tests depending on + the tarantool version. + + Tarantool supports VARBINARY type only since 2.2.1 version. + See https://github.com/tarantool/tarantool/issues/4201 + """ + + return skip_or_run_test_tarantool(func, '2.2.1', + 'does not support VARBINARY type') + + +def skip_or_run_mp_bin_test(func): + """Decorator to skip or run mp_bin-related tests depending on + the Python version. + + Python 2 connector do not support mp_bin. + """ + + return skip_or_run_test_python_major(func, 3, 'does not support mp_bin') \ No newline at end of file diff --git a/test/suites/test_encoding.py b/test/suites/test_encoding.py new file mode 100644 index 00000000..1ee0f4aa --- /dev/null +++ b/test/suites/test_encoding.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import unittest +import tarantool + +from .lib.skip import skip_or_run_mp_bin_test, skip_or_run_varbinary_test +from .lib.tarantool_server import TarantoolServer + +class TestSuite_Encoding(unittest.TestCase): + @classmethod + def setUpClass(self): + print(' ENCODING '.center(70, '='), file=sys.stderr) + print('-' * 70, file=sys.stderr) + self.srv = TarantoolServer() + self.srv.script = 'test/suites/box.lua' + self.srv.start() + + self.srv.admin(""" + box.schema.user.create('test', { password = 'test' }) + box.schema.user.grant('test', 'execute,read,write', 'universe') + """) + + args = [self.srv.host, self.srv.args['primary']] + kwargs = { 'user': 'test', 'password': 'test' } + self.con_encoding_utf8 = tarantool.Connection(*args, encoding='utf-8', **kwargs) + self.con_encoding_none = tarantool.Connection(*args, encoding=None, **kwargs) + self.conns = [self.con_encoding_utf8, self.con_encoding_none] + + self.srv.admin("box.schema.create_space('space_str')") + self.srv.admin(""" + box.space['space_str']:create_index('primary', { + type = 'tree', + parts = {1, 'str'}, + unique = true}) + """.replace('\n', ' ')) + + self.srv.admin("box.schema.create_space('space_varbin')") + self.srv.admin(r""" + box.space['space_varbin']:format({ + { + 'id', + type = 'number', + is_nullable = false + }, + { + 'varbin', + type = 'varbinary', + is_nullable = false, + } + }) + """.replace('\n', ' ')) + self.srv.admin(""" + box.space['space_varbin']:create_index('id', { + type = 'tree', + parts = {1, 'number'}, + unique = true}) + """.replace('\n', ' ')) + self.srv.admin(""" + box.space['space_varbin']:create_index('varbin', { + type = 'tree', + parts = {2, 'varbinary'}, + unique = true}) + """.replace('\n', ' ')) + + def assertNotRaises(self, func, *args, **kwargs): + try: + func(*args, **kwargs) + except Exception as e: + self.fail('Function raised Exception: %s' % repr(e)) + + def setUp(self): + # prevent a remote tarantool from clean our session + if self.srv.is_started(): + self.srv.touch_lock() + + # encoding = 'utf-8' + # + # Python 3 -> Tarantool -> Python 3 + # str -> mp_str (string) -> str + # bytes -> mp_bin (varbinary) -> bytes + def test_01_01_str_encode_for_encoding_utf8_behavior(self): + data = 'test_01_01' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_utf8.insert, space, [data]) + + resp = self.con_encoding_utf8.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + def test_01_02_string_decode_for_encoding_utf8_behavior(self): + data = 'test_01_02' + space = 'space_str' + + self.srv.admin("box.space['%s']:insert{'%s'}" % (space, data)) + + resp = self.con_encoding_utf8.eval("return box.space['%s']:get('%s')" % (space, data)) + self.assertSequenceEqual(resp, [[data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_01_03_bytes_encode_for_encoding_utf8_behavior(self): + data_id = 103 + data = bytes(bytearray.fromhex('DEADBEAF0103')) + space = 'space_varbin' + + self.assertNotRaises(self.con_encoding_utf8.insert, space, [data_id, data]) + + resp = self.con_encoding_utf8.select(space, [ data ], index='varbin') + self.assertSequenceEqual(resp, [[data_id, data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_01_04_varbinary_decode_for_encoding_utf8_behavior(self): + data_id = 104 + data_hex = 'DEADBEAF0104' + data = bytes(bytearray.fromhex(data_hex)) + space = 'space_varbin' + + self.con_encoding_utf8.execute(""" + INSERT INTO "%s" VALUES (%d, x'%s'); + """ % (space, data_id, data_hex)) + + resp = self.con_encoding_utf8.execute(""" + SELECT * FROM "%s" WHERE "varbin" == x'%s'; + """ % (space, data_hex)) + self.assertSequenceEqual(resp, [[data_id, data]]) + + # encoding = None + # + # Python 3 -> Tarantool -> Python 3 + # bytes -> mp_str (string) -> bytes + # str -> mp_str (string) -> bytes + # mp_bin (varbinary) -> bytes + def test_02_01_str_encode_for_encoding_none_behavior(self): + data = 'test_02_01' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_none.insert, space, [data]) + + resp = self.con_encoding_utf8.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + def test_02_02_string_decode_for_encoding_none_behavior(self): + data = 'test_02_02' + data_decoded = b'test_02_02' + space = 'space_str' + + self.srv.admin("box.space['%s']:insert{'%s'}" % (space, data)) + + resp = self.con_encoding_none.eval("return box.space['%s']:get('%s')" % (space, data)) + self.assertSequenceEqual(resp, [[data_decoded]]) + + def test_02_03_bytes_encode_for_encoding_none_behavior(self): + data = b'test_02_03' + space = 'space_str' + + self.assertNotRaises(self.con_encoding_none.insert, space, [data]) + + resp = self.con_encoding_none.select(space, [data]) + self.assertSequenceEqual(resp, [[data]]) + + @skip_or_run_mp_bin_test + @skip_or_run_varbinary_test + def test_02_04_varbinary_decode_for_encoding_none_behavior(self): + data_id = 204 + data_hex = 'DEADBEAF0204' + data = bytes(bytearray.fromhex(data_hex)) + space = 'space_varbin' + + self.con_encoding_none.execute(""" + INSERT INTO "%s" VALUES (%d, x'%s'); + """ % (space, data_id, data_hex)) + + resp = self.con_encoding_none.execute(""" + SELECT * FROM "%s" WHERE "varbin" == x'%s'; + """ % (space, data_hex)) + self.assertSequenceEqual(resp, [[data_id, data]]) + + @classmethod + def tearDownClass(self): + for con in self.conns: + con.close() + self.srv.stop() + self.srv.clean()