diff --git a/bson/binary.py b/bson/binary.py index aab59cccbc..ee481fa1a5 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -450,6 +450,10 @@ def from_vector( raise ValueError(f"padding does not apply to {dtype=}") elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8 format_str = "B" + if 0 <= padding > 7: + raise ValueError(f"{padding=}. It must be in [0,1, ..7].") + if padding and not vector: + raise ValueError("Empty vector with non-zero padding.") elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32 format_str = "f" if padding: diff --git a/test/bson_binary_vector/float32.json b/test/bson_binary_vector/float32.json index bbbe00b758..845f504ff3 100644 --- a/test/bson_binary_vector/float32.json +++ b/test/bson_binary_vector/float32.json @@ -11,6 +11,15 @@ "padding": 0, "canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000" }, + { + "description": "Vector with decimals and negative value FLOAT32", + "valid": true, + "vector": [127.7, -7.7], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000" + }, { "description": "Empty Vector FLOAT32", "valid": true, @@ -35,8 +44,22 @@ "vector": [127.0, 7.0], "dtype_hex": "0x27", "dtype_alias": "FLOAT32", - "padding": 3 + "padding": 3, + "canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000" + }, + { + "description": "Insufficient vector data with 3 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1700000005766563746F7200050000000927002A2A2A00" + }, + { + "description": "Insufficient vector data with 5 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00" } ] } - diff --git a/test/bson_binary_vector/int8.json b/test/bson_binary_vector/int8.json index 7529721e5e..29524fb617 100644 --- a/test/bson_binary_vector/int8.json +++ b/test/bson_binary_vector/int8.json @@ -42,7 +42,8 @@ "vector": [127, 7], "dtype_hex": "0x03", "dtype_alias": "INT8", - "padding": 3 + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000903037F0700" }, { "description": "INT8 with float inputs", @@ -54,4 +55,3 @@ } ] } - diff --git a/test/bson_binary_vector/packed_bit.json b/test/bson_binary_vector/packed_bit.json index a41cd593f5..a220e7e318 100644 --- a/test/bson_binary_vector/packed_bit.json +++ b/test/bson_binary_vector/packed_bit.json @@ -2,6 +2,15 @@ "description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT", "test_key": "vector", "tests": [ + { + "description": "Padding specified with no vector data PACKED_BIT", + "valid": false, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 1, + "canonical_bson": "1400000005766563746F72000200000009100100" + }, { "description": "Simple Vector PACKED_BIT", "valid": true, @@ -44,7 +53,31 @@ "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 0 + }, + { + "description": "Vector with float values PACKED_BIT", + "valid": false, + "vector": [127.5], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Exceeding maximum padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 8, + "canonical_bson": "1500000005766563746F7200030000000910080100" + }, + { + "description": "Negative padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": -1 } ] } - diff --git a/test/test_bson_binary_vector.py b/test/test_bson_binary_vector.py index 00c82bbb65..a49f515fea 100644 --- a/test/test_bson_binary_vector.py +++ b/test/test_bson_binary_vector.py @@ -49,7 +49,7 @@ def create_test(case_spec): def run_test(self): for test_case in case_spec.get("tests", []): description = test_case["description"] - vector_exp = test_case["vector"] + vector_exp = test_case.get("vector", []) dtype_hex_exp = test_case["dtype_hex"] dtype_alias_exp = test_case.get("dtype_alias") padding_exp = test_case.get("padding", 0) @@ -76,9 +76,13 @@ def run_test(self): self.assertEqual( vector_obs.dtype, BinaryVectorDtype[dtype_alias_exp], description ) - self.assertEqual(vector_obs.data, vector_exp, description) - self.assertEqual(vector_obs.padding, padding_exp, description) - + if dtype_exp in [BinaryVectorDtype.FLOAT32]: + [ + self.assertAlmostEqual(vector_obs.data[i], vector_exp[i], delta=1e-5) + for i in range(len(vector_exp)) + ] + else: + self.assertEqual(vector_obs.data, vector_exp, description) # Test Binary Vector to BSON vector_exp = Binary.from_vector(vector_exp, dtype_exp, padding_exp) cB_obs = binascii.hexlify(encode({test_key: vector_exp})).decode().upper() @@ -86,7 +90,13 @@ def run_test(self): else: with self.assertRaises((struct.error, ValueError), msg=description): + # Tests Binary.from_vector Binary.from_vector(vector_exp, dtype_exp, padding_exp) + # Tests Binary.as_vector + cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) + decoded_doc = decode(cB_exp) + binary_obs = decoded_doc[test_key] + binary_obs.as_vector() return run_test