diff --git a/bson/binary.py b/bson/binary.py index 48f1f58512..6698e55ccc 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -462,10 +462,6 @@ def from_vector( raise ValueError(f"{padding=}. It must be in [0,1, ..7].") if padding and not vector: raise ValueError("Empty vector with non-zero padding.") - if padding and not (vector[-1] & ((1 << padding) - 1)) == 0: # type: ignore - raise ValueError( - "If padding p is provided, all bits in the final byte lower than p must be 0." - ) elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32 format_str = "f" if padding: @@ -494,11 +490,6 @@ def as_vector(self) -> BinaryVector: dtype = BinaryVectorDtype(dtype) n_values = len(self) - position - if padding and dtype != BinaryVectorDtype.PACKED_BIT: - raise ValueError( - f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})" - ) - if dtype == BinaryVectorDtype.INT8: dtype_format = "b" format_string = f"<{n_values}{dtype_format}" @@ -522,12 +513,6 @@ def as_vector(self) -> BinaryVector: dtype_format = "B" format_string = f"<{n_values}{dtype_format}" unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) - if padding and not n_values: - raise ValueError("Corrupt data. Vector has a padding P, but no data.") - if padding and n_values and not (unpacked_uint8s[-1] & ((1 << padding) - 1)) == 0: - raise ValueError( - "Corrupt data. Vector has a padding P, but bits in the final byte lower than P are non-zero." - ) return BinaryVector(unpacked_uint8s, dtype, padding) else: diff --git a/test/bson_binary_vector/packed_bit.json b/test/bson_binary_vector/packed_bit.json index 3015acba66..a220e7e318 100644 --- a/test/bson_binary_vector/packed_bit.json +++ b/test/bson_binary_vector/packed_bit.json @@ -21,32 +21,23 @@ "canonical_bson": "1600000005766563746F7200040000000910007F0700" }, { - "description": "PACKED_BIT with padding", + "description": "Empty Vector PACKED_BIT", "valid": true, - "vector": [127, 8], + "vector": [], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 3, - "canonical_bson": "1600000005766563746F7200040000000910037F0800" + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009100000" }, { - "description": "PACKED_BIT with inconsistent padding", - "valid": false, + "description": "PACKED_BIT with padding", + "valid": true, "vector": [127, 7], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 3, "canonical_bson": "1600000005766563746F7200040000000910037F0700" }, - { - "description": "Empty Vector PACKED_BIT", - "valid": true, - "vector": [], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 0, - "canonical_bson": "1400000005766563746F72000200000009100000" - }, { "description": "Overflow Vector PACKED_BIT", "valid": false, diff --git a/test/test_bson.py b/test/test_bson.py index 522945d5f4..1616c513c2 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -739,7 +739,7 @@ def test_vector(self): """Tests of subtype 9""" # We start with valid cases, across the 3 dtypes implemented. # Work with a simple vector that can be interpreted as int8, float32, or ubyte - list_vector = [127, 8] + list_vector = [127, 7] # As INT8, vector has length 2 binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8) vector = binary_vector.as_vector() @@ -764,18 +764,18 @@ def test_vector(self): uncompressed = "" for val in list_vector: uncompressed += format(val, "08b") - assert uncompressed[:-padding] == "0111111100001" + assert uncompressed[:-padding] == "0111111100000" # It is worthwhile explicitly showing the values encoded to BSON padded_doc = {"padded_vec": padded_vec} assert ( encode(padded_doc) - == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00" + == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00" ) # and dumped to json assert ( json_util.dumps(padded_doc) - == '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}' + == '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}' ) # FLOAT32 is also implemented diff --git a/test/test_bson_binary_vector.py b/test/test_bson_binary_vector.py index afe01f42bf..9bfdcbfb9a 100644 --- a/test/test_bson_binary_vector.py +++ b/test/test_bson_binary_vector.py @@ -48,11 +48,11 @@ def create_test(case_spec): def run_test(self): for test_case in case_spec.get("tests", []): description = test_case["description"] - vector_exp = test_case.get("vector", None) + vector_exp = test_case.get("vector", []) dtype_hex_exp = test_case["dtype_hex"] dtype_alias_exp = test_case.get("dtype_alias") padding_exp = test_case.get("padding", 0) - canonical_bson_exp = test_case.get("canonical_bson", None) + canonical_bson_exp = test_case.get("canonical_bson") # Convert dtype hex string into bytes dtype_exp = BinaryVectorDtype(int(dtype_hex_exp, 16).to_bytes(1, byteorder="little")) @@ -85,25 +85,14 @@ def run_test(self): self.assertEqual(cB_obs, canonical_bson_exp, description) else: - """ - #### To prove correct in an invalid case (`valid:false`), one MUST - - if the vector field is present, raise an exception when attempting to encode a document from the numeric values, - dtype, and padding. - - if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding - numeric values, as the field contains corrupted data. - """ - # Tests Binary.from_vector() - if vector_exp is not None: - with self.assertRaises((struct.error, ValueError), msg=description): - Binary.from_vector(vector_exp, dtype_exp, padding_exp) - - # Tests Binary.as_vector() - if canonical_bson_exp is not None: - with self.assertRaises((struct.error, ValueError), msg=description): - cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) - decoded_doc = decode(cB_exp) - binary_obs = decoded_doc[test_key] - binary_obs.as_vector() + with self.assertRaises((struct.error, ValueError), msg=description): + # Tests Binary.from_vector + Binary.from_vector(vector_exp, dtype_exp, padding_exp) + # Tests Binary.as_vector + cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) + decoded_doc = decode(cB_exp) + binary_obs = decoded_doc[test_key] + binary_obs.as_vector() return run_test