diff --git a/cassandra/__init__.py b/cassandra/__init__.py index 4398c86f69..8b4b6f1a1b 100644 --- a/cassandra/__init__.py +++ b/cassandra/__init__.py @@ -743,4 +743,10 @@ def __init__(self, msg, excs=[]): complete_msg = msg if excs: complete_msg += ("The following exceptions were observed: \n" + '\n'.join(str(e) for e in excs)) - Exception.__init__(self, complete_msg) \ No newline at end of file + Exception.__init__(self, complete_msg) + +class VectorDeserializationFailure(DriverException): + """ + The driver was unable to deserialize a given vector + """ + pass diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index d1d7e888f9..b413b1c9e5 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -49,7 +49,7 @@ float_pack, float_unpack, double_pack, double_unpack, varint_pack, varint_unpack, point_be, point_le, vints_pack, vints_unpack) -from cassandra import util +from cassandra import util, VectorDeserializationFailure _little_endian_flag = 1 # we always serialize LE import ipaddress @@ -461,6 +461,7 @@ def serialize(uuid, protocol_version): class BooleanType(_CassandraType): typename = 'boolean' + serial_size = 1 @staticmethod def deserialize(byts, protocol_version): @@ -500,6 +501,7 @@ def serialize(var, protocol_version): class FloatType(_CassandraType): typename = 'float' + serial_size = 4 @staticmethod def deserialize(byts, protocol_version): @@ -512,6 +514,7 @@ def serialize(byts, protocol_version): class DoubleType(_CassandraType): typename = 'double' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -524,6 +527,7 @@ def serialize(byts, protocol_version): class LongType(_CassandraType): typename = 'bigint' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -536,6 +540,7 @@ def serialize(byts, protocol_version): class Int32Type(_CassandraType): typename = 'int' + serial_size = 4 @staticmethod def deserialize(byts, protocol_version): @@ -648,6 +653,7 @@ class TimestampType(DateType): class TimeUUIDType(DateType): typename = 'timeuuid' + serial_size = 16 def my_timestamp(self): return util.unix_time_from_uuid1(self.val) @@ -694,6 +700,7 @@ def serialize(val, protocol_version): class ShortType(_CassandraType): typename = 'smallint' + serial_size = 2 @staticmethod def deserialize(byts, protocol_version): @@ -706,6 +713,7 @@ def serialize(byts, protocol_version): class TimeType(_CassandraType): typename = 'time' + serial_size = 8 @staticmethod def deserialize(byts, protocol_version): @@ -1411,8 +1419,11 @@ def apply_parameters(cls, params, names): @classmethod def deserialize(cls, byts, protocol_version): - indexes = (4 * x for x in range(0, cls.vector_size)) - return [cls.subtype.deserialize(byts[idx:idx + 4], protocol_version) for idx in indexes] + serialized_size = getattr(cls.subtype, "serial_size", None) + if not serialized_size: + raise VectorDeserializationFailure("Cannot determine serialized size for vector with subtype %s" % cls.subtype.__name__) + indexes = (serialized_size * x for x in range(0, cls.vector_size)) + return [cls.subtype.deserialize(byts[idx:idx + serialized_size], protocol_version) for idx in indexes] @classmethod def serialize(cls, v, protocol_version): diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index a06bbd452d..5db7f087b7 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -16,10 +16,11 @@ import datetime import tempfile import time +import uuid from binascii import unhexlify import cassandra -from cassandra import util +from cassandra import util, VectorDeserializationFailure from cassandra.cqltypes import ( CassandraType, DateRangeType, DateType, DecimalType, EmptyValue, LongType, SetType, UTF8Type, @@ -308,15 +309,67 @@ def test_cql_quote(self): self.assertEqual(cql_quote('test'), "'test'") self.assertEqual(cql_quote(0), '0') - def test_vector_round_trip(self): - base = [3.4, 2.9, 41.6, 12.0] - ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") - base_bytes = ctype.serialize(base, 0) - self.assertEqual(16, len(base_bytes)) - result = ctype.deserialize(base_bytes, 0) - self.assertEqual(len(base), len(result)) - for idx in range(0,len(base)): - self.assertAlmostEqual(base[idx], result[idx], places=5) + def test_vector_round_trip_types_with_serialized_size(self): + # Test all the types which specify a serialized size... see PYTHON-1371 for details + self._round_trip_test([True, False, False, True], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.BooleanType, 4)") + self._round_trip_test([3.4, 2.9, 41.6, 12.0], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)") + self._round_trip_test([3.4, 2.9, 41.6, 12.0], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DoubleType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.LongType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.Int32Type, 4)") + self._round_trip_test([uuid.uuid1(), uuid.uuid1(), uuid.uuid1(), uuid.uuid1()], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.TimeUUIDType, 4)") + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.ShortType, 4)") + self._round_trip_test([datetime.time(1,1,1), datetime.time(2,2,2), datetime.time(3,3,3)], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.TimeType, 3)") + + def test_vector_round_trip_types_without_serialized_size(self): + # Test all the types which do not specify a serialized size... see PYTHON-1371 for details + # Varints + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([3, 2, 41, 12], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.IntegerType, 4)") + # ASCII text + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test(["abc", "def", "ghi", "jkl"], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.AsciiType, 4)") + # UTF8 text + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test(["abc", "def", "ghi", "jkl"], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.UTF8Type, 4)") + # Duration (containts varints) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([util.Duration(1,1,1), util.Duration(2,2,2), util.Duration(3,3,3)], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.DurationType, 3)") + # List (of otherwise serializable type) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([[3.4], [2.9], [41.6], [12.0]], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.FloatType), 4)") + # Set (of otherwise serializable type) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([set([3.4]), set([2.9]), set([41.6]), set([12.0])], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.FloatType), 4)") + # Map (of otherwise serializable types) + with self.assertRaises(VectorDeserializationFailure): + self._round_trip_test([{1:3.4}, {2:2.9}, {3:41.6}, {4:12.0}], \ + "org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.MapType \ + (org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.FloatType), 4)") + + def _round_trip_test(self, data, ctype_str): + ctype = parse_casstype_args(ctype_str) + data_bytes = ctype.serialize(data, 0) + serialized_size = getattr(ctype.subtype, "serial_size", None) + if serialized_size: + self.assertEqual(serialized_size * len(data), len(data_bytes)) + result = ctype.deserialize(data_bytes, 0) + self.assertEqual(len(data), len(result)) + for idx in range(0,len(data)): + self.assertAlmostEqual(data[idx], result[idx], places=5) def test_vector_cql_parameterized_type(self): ctype = parse_casstype_args("org.apache.cassandra.db.marshal.VectorType(org.apache.cassandra.db.marshal.FloatType, 4)")