diff --git a/bson-scala/src/main/scala/org/mongodb/scala/bson/codecs/macrocodecs/MacroCodec.scala b/bson-scala/src/main/scala/org/mongodb/scala/bson/codecs/macrocodecs/MacroCodec.scala index 090d066223c..e284647af87 100644 --- a/bson-scala/src/main/scala/org/mongodb/scala/bson/codecs/macrocodecs/MacroCodec.scala +++ b/bson-scala/src/main/scala/org/mongodb/scala/bson/codecs/macrocodecs/MacroCodec.scala @@ -22,6 +22,7 @@ import scala.collection.mutable import org.bson._ import org.bson.codecs.configuration.{ CodecRegistries, CodecRegistry } import org.bson.codecs.{ Codec, DecoderContext, Encoder, EncoderContext } +import scala.collection.immutable.Vector import org.mongodb.scala.bson.BsonNull diff --git a/bson-scala/src/test/scala/org/mongodb/scala/bson/codecs/MacrosSpec.scala b/bson-scala/src/test/scala/org/mongodb/scala/bson/codecs/MacrosSpec.scala index 95d7533cc87..c16215a16e8 100644 --- a/bson-scala/src/test/scala/org/mongodb/scala/bson/codecs/MacrosSpec.scala +++ b/bson-scala/src/test/scala/org/mongodb/scala/bson/codecs/MacrosSpec.scala @@ -30,6 +30,7 @@ import org.mongodb.scala.bson.annotations.{ BsonIgnore, BsonProperty } import org.mongodb.scala.bson.codecs.Macros.{ createCodecProvider, createCodecProviderIgnoreNone } import org.mongodb.scala.bson.codecs.Registry.DEFAULT_CODEC_REGISTRY import org.mongodb.scala.bson.collection.immutable.Document +import scala.collection.immutable.Vector import scala.collection.JavaConverters._ import scala.reflect.ClassTag diff --git a/bson/src/main/org/bson/BsonBinary.java b/bson/src/main/org/bson/BsonBinary.java index d5d07273cea..8590c2920be 100644 --- a/bson/src/main/org/bson/BsonBinary.java +++ b/bson/src/main/org/bson/BsonBinary.java @@ -18,10 +18,13 @@ import org.bson.assertions.Assertions; import org.bson.internal.UuidHelper; +import org.bson.internal.vector.VectorHelper; import java.util.Arrays; import java.util.UUID; +import static org.bson.internal.vector.VectorHelper.encodeVectorToBinary; + /** * A representation of the BSON Binary type. Note that for performance reasons instances of this class are not immutable, * so care should be taken to only modify the underlying byte array if you know what you're doing, or else make a defensive copy. @@ -89,6 +92,20 @@ public BsonBinary(final UUID uuid) { this(uuid, UuidRepresentation.STANDARD); } + /** + * Constructs a {@linkplain BsonBinarySubType#VECTOR subtype 9} {@link BsonBinary} from the given {@link Vector}. + * + * @param vector the {@link Vector} + * @since 5.3 + */ + public BsonBinary(final Vector vector) { + if (vector == null) { + throw new IllegalArgumentException("Vector must not be null"); + } + this.data = encodeVectorToBinary(vector); + type = BsonBinarySubType.VECTOR.getValue(); + } + /** * Construct a new instance from the given UUID and UuidRepresentation * @@ -127,6 +144,21 @@ public UUID asUuid() { return UuidHelper.decodeBinaryToUuid(this.data.clone(), this.type, UuidRepresentation.STANDARD); } + /** + * Returns the binary as a {@link Vector}. The {@linkplain #getType() subtype} must be {@linkplain BsonBinarySubType#VECTOR 9}. + * + * @return the vector + * @throws BsonInvalidOperationException if the binary subtype is not {@link BsonBinarySubType#VECTOR}. + * @since 5.3 + */ + public Vector asVector() { + if (type != BsonBinarySubType.VECTOR.getValue()) { + throw new BsonInvalidOperationException("type must be a Vector subtype."); + } + + return VectorHelper.decodeBinaryToVector(this.data); + } + /** * Returns the binary as a UUID. * diff --git a/bson/src/main/org/bson/BsonBinarySubType.java b/bson/src/main/org/bson/BsonBinarySubType.java index 3c5f72813b6..7b5948b4efc 100644 --- a/bson/src/main/org/bson/BsonBinarySubType.java +++ b/bson/src/main/org/bson/BsonBinarySubType.java @@ -17,7 +17,7 @@ package org.bson; /** - * The Binary subtype + * The Binary subtype. * * @since 3.0 */ @@ -60,7 +60,7 @@ public enum BsonBinarySubType { ENCRYPTED((byte) 0x06), /** - * Columnar data + * Columnar data. * * @since 4.4 */ @@ -73,6 +73,15 @@ public enum BsonBinarySubType { */ SENSITIVE((byte) 0x08), + /** + * Vector data. + * + * @mongodb.server.release 6.0 + * @since 5.3 + * @see Vector + */ + VECTOR((byte) 0x09), + /** * User defined binary data. */ @@ -81,10 +90,10 @@ public enum BsonBinarySubType { private final byte value; /** - * Returns true if the given value is a UUID subtype + * Returns true if the given value is a UUID subtype. * - * @param value the subtype value as a byte - * @return true if value is a UUID subtype + * @param value the subtype value as a byte. + * @return true if value is a UUID subtype. * @since 3.4 */ public static boolean isUuid(final byte value) { diff --git a/bson/src/main/org/bson/Float32Vector.java b/bson/src/main/org/bson/Float32Vector.java new file mode 100644 index 00000000000..9678003b72f --- /dev/null +++ b/bson/src/main/org/bson/Float32Vector.java @@ -0,0 +1,79 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bson; + +import java.util.Arrays; + +import static org.bson.assertions.Assertions.assertNotNull; + +/** + * Represents a vector of 32-bit floating-point numbers, where each element in the vector is a float. + *
+ * The {@link Float32Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format. + * + * @mongodb.server.release 6.0 + * @see Vector#floatVector(float[]) + * @see BsonBinary#BsonBinary(Vector) + * @see BsonBinary#asVector() + * @since 5.3 + */ +public final class Float32Vector extends Vector { + + private final float[] data; + + Float32Vector(final float[] vectorData) { + super(DataType.FLOAT32); + this.data = assertNotNull(vectorData); + } + + /** + * Retrieve the underlying float array representing this {@link Float32Vector}, where each float + * represents an element of a vector. + *
+ * NOTE: The underlying float array is not copied; changes to the returned array will be reflected in this instance. + * + * @return the underlying float array representing this {@link Float32Vector} vector. + */ + public float[] getData() { + return assertNotNull(data); + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Float32Vector that = (Float32Vector) o; + return Arrays.equals(data, that.data); + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } + + @Override + public String toString() { + return "Float32Vector{" + + "data=" + Arrays.toString(data) + + ", dataType=" + getDataType() + + '}'; + } +} diff --git a/bson/src/main/org/bson/Int8Vector.java b/bson/src/main/org/bson/Int8Vector.java new file mode 100644 index 00000000000..b61e6bfee55 --- /dev/null +++ b/bson/src/main/org/bson/Int8Vector.java @@ -0,0 +1,80 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bson; + +import java.util.Arrays; +import java.util.Objects; + +import static org.bson.assertions.Assertions.assertNotNull; + +/** + * Represents a vector of 8-bit signed integers, where each element in the vector is a byte. + *
+ * The {@link Int8Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format. + * + * @mongodb.server.release 6.0 + * @see Vector#int8Vector(byte[]) + * @see BsonBinary#BsonBinary(Vector) + * @see BsonBinary#asVector() + * @since 5.3 + */ +public final class Int8Vector extends Vector { + + private byte[] data; + + Int8Vector(final byte[] data) { + super(DataType.INT8); + this.data = assertNotNull(data); + } + + /** + * Retrieve the underlying byte array representing this {@link Int8Vector} vector, where each byte represents + * an element of a vector. + *
+ * NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance. + * + * @return the underlying byte array representing this {@link Int8Vector} vector. + */ + public byte[] getData() { + return assertNotNull(data); + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Int8Vector that = (Int8Vector) o; + return Objects.deepEquals(data, that.data); + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } + + @Override + public String toString() { + return "Int8Vector{" + + "data=" + Arrays.toString(data) + + ", dataType=" + getDataType() + + '}'; + } +} diff --git a/bson/src/main/org/bson/PackedBitVector.java b/bson/src/main/org/bson/PackedBitVector.java new file mode 100644 index 00000000000..a5dd8f4dcdf --- /dev/null +++ b/bson/src/main/org/bson/PackedBitVector.java @@ -0,0 +1,101 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bson; + +import java.util.Arrays; +import java.util.Objects; + +import static org.bson.assertions.Assertions.assertNotNull; + +/** + * Represents a packed bit vector, where each element of the vector is represented by a single bit (0 or 1). + *
+ * The {@link PackedBitVector} is used to store data efficiently using the BSON Binary Subtype 9 format. + * + * @mongodb.server.release 6.0 + * @see Vector#packedBitVector(byte[], byte) + * @see BsonBinary#BsonBinary(Vector) + * @see BsonBinary#asVector() + * @since 5.3 + */ +public final class PackedBitVector extends Vector { + + private final byte padding; + private final byte[] data; + + PackedBitVector(final byte[] data, final byte padding) { + super(DataType.PACKED_BIT); + this.data = assertNotNull(data); + this.padding = padding; + } + + /** + * Retrieve the underlying byte array representing this {@link PackedBitVector} vector, where + * each bit represents an element of the vector (either 0 or 1). + *
+ * Note that the {@linkplain #getPadding() padding value} should be considered when interpreting the final byte of the array, + * as it indicates how many least-significant bits are to be ignored. + * + * @return the underlying byte array representing this {@link PackedBitVector} vector. + * @see #getPadding() + */ + public byte[] getData() { + return assertNotNull(data); + } + + /** + * Returns the padding value for this vector. + * + *
Padding refers to the number of least-significant bits in the final byte that are ignored when retrieving + * {@linkplain #getData() the vector array}. For instance, if the padding value is 3, this means that the last byte contains + * 3 least-significant unused bits, which should be disregarded during operations.
+ *+ * + * NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance. + * + * @return the padding value (between 0 and 7). + */ + public byte getPadding() { + return this.padding; + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PackedBitVector that = (PackedBitVector) o; + return padding == that.padding && Arrays.equals(data, that.data); + } + + @Override + public int hashCode() { + return Objects.hash(padding, Arrays.hashCode(data)); + } + + @Override + public String toString() { + return "PackedBitVector{" + + "padding=" + padding + + ", data=" + Arrays.toString(data) + + ", dataType=" + getDataType() + + '}'; + } +} diff --git a/bson/src/main/org/bson/Vector.java b/bson/src/main/org/bson/Vector.java new file mode 100644 index 00000000000..d267387d727 --- /dev/null +++ b/bson/src/main/org/bson/Vector.java @@ -0,0 +1,201 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bson; + +import static org.bson.assertions.Assertions.isTrueArgument; +import static org.bson.assertions.Assertions.notNull; + +/** + * Represents a vector that is stored and retrieved using the BSON Binary Subtype 9 format. + * This class supports multiple vector {@link DataType}'s and provides static methods to create + * vectors. + *
+ * Vectors are densely packed arrays of numbers, all the same type, which are stored efficiently + * in BSON using a binary format. + *
+ * NOTE: This class should be treated as sealed: it must not be extended or implemented by consumers of the library. + * + * @mongodb.server.release 6.0 + * @see BsonBinary + * @since 5.3 + */ +public abstract class Vector { + private final DataType dataType; + + Vector(final DataType dataType) { + this.dataType = dataType; + } + + /** + * Creates a vector with the {@link DataType#PACKED_BIT} data type. + *
+ * A {@link DataType#PACKED_BIT} vector is a binary quantized vector where each element of a vector is represented by a single bit (0 or 1). Each byte + * can hold up to 8 bits (vector elements). The padding parameter is used to specify how many least-significant bits in the final byte + * should be ignored.
+ * + *For example, a vector with two bytes and a padding of 4 would have the following structure:
+ *+ * Byte 1: 238 (binary: 11101110) + * Byte 2: 224 (binary: 11100000) + * Padding: 4 (ignore the last 4 bits in Byte 2) + * Resulting vector: 12 bits: 111011101110 + *+ *
+ * NOTE: The byte array `data` is not copied; changes to the provided array will be reflected + * in the created {@link PackedBitVector} instance. + * + * @param data The byte array representing the packed bit vector data. Each byte can store 8 bits. + * @param padding The number of least-significant bits (0 to 7) to ignore in the final byte of the vector data. + * @return A {@link PackedBitVector} instance with the {@link DataType#PACKED_BIT} data type. + * @throws IllegalArgumentException If the padding value is greater than 7. + */ + public static PackedBitVector packedBitVector(final byte[] data, final byte padding) { + notNull("data", data); + isTrueArgument("Padding must be between 0 and 7 bits. Provided padding: " + padding, padding >= 0 && padding <= 7); + isTrueArgument("Padding must be 0 if vector is empty. Provided padding: " + padding, padding == 0 || data.length > 0); + return new PackedBitVector(data, padding); + } + + /** + * Creates a vector with the {@link DataType#INT8} data type. + * + *
A {@link DataType#INT8} vector is a vector of 8-bit signed integers where each byte in the vector represents an element of a vector, + * with values in the range [-128, 127].
+ *+ * NOTE: The byte array `data` is not copied; changes to the provided array will be reflected + * in the created {@link Int8Vector} instance. + * + * @param data The byte array representing the {@link DataType#INT8} vector data. + * @return A {@link Int8Vector} instance with the {@link DataType#INT8} data type. + */ + public static Int8Vector int8Vector(final byte[] data) { + notNull("data", data); + return new Int8Vector(data); + } + + /** + * Creates a vector with the {@link DataType#FLOAT32} data type. + *
+ * A {@link DataType#FLOAT32} vector is a vector of floating-point numbers, where each element in the vector is a float.
+ *+ * NOTE: The float array `data` is not copied; changes to the provided array will be reflected + * in the created {@link Float32Vector} instance. + * + * @param data The float array representing the {@link DataType#FLOAT32} vector data. + * @return A {@link Float32Vector} instance with the {@link DataType#FLOAT32} data type. + */ + public static Float32Vector floatVector(final float[] data) { + notNull("data", data); + return new Float32Vector(data); + } + + /** + * Returns the {@link PackedBitVector}. + * + * @return {@link PackedBitVector}. + * @throws IllegalStateException if this vector is not of type {@link DataType#PACKED_BIT}. Use {@link #getDataType()} to check the vector + * type before calling this method. + */ + public PackedBitVector asPackedBitVector() { + ensureType(DataType.PACKED_BIT); + return (PackedBitVector) this; + } + + /** + * Returns the {@link Int8Vector}. + * + * @return {@link Int8Vector}. + * @throws IllegalStateException if this vector is not of type {@link DataType#INT8}. Use {@link #getDataType()} to check the vector + * type before calling this method. + */ + public Int8Vector asInt8Vector() { + ensureType(DataType.INT8); + return (Int8Vector) this; + } + + /** + * Returns the {@link Float32Vector}. + * + * @return {@link Float32Vector}. + * @throws IllegalStateException if this vector is not of type {@link DataType#FLOAT32}. Use {@link #getDataType()} to check the vector + * type before calling this method. + */ + public Float32Vector asFloat32Vector() { + ensureType(DataType.FLOAT32); + return (Float32Vector) this; + } + + /** + * Returns {@link DataType} of the vector. + * + * @return the data type of the vector. + */ + public DataType getDataType() { + return this.dataType; + } + + + private void ensureType(final DataType expected) { + if (this.dataType != expected) { + throw new IllegalStateException("Expected vector data type " + expected + ", but found " + this.dataType); + } + } + + /** + * Represents the data type (dtype) of a vector. + *
+ * Each dtype determines how the data in the vector is stored, including how many bits are used to represent each element + * in the vector. + * + * @mongodb.server.release 6.0 + * @since 5.3 + */ + public enum DataType { + /** + * An INT8 vector is a vector of 8-bit signed integers. The vector is stored as an array of bytes, where each byte + * represents a signed integer in the range [-128, 127]. + */ + INT8((byte) 0x03), + /** + * A FLOAT32 vector is a vector of 32-bit floating-point numbers, where each element in the vector is a float. + */ + FLOAT32((byte) 0x27), + /** + * A PACKED_BIT vector is a binary quantized vector where each element of a vector is represented by a single bit (0 or 1). + * Each byte can hold up to 8 bits (vector elements). + */ + PACKED_BIT((byte) 0x10); + + private final byte value; + + DataType(final byte value) { + this.value = value; + } + + /** + * Returns the byte value associated with this {@link DataType}. + * + *
This value is used in the BSON binary format to indicate the data type of the vector.
+ * + * @return the byte value representing the {@link DataType}. + */ + public byte getValue() { + return value; + } + } +} + diff --git a/bson/src/main/org/bson/codecs/ContainerCodecHelper.java b/bson/src/main/org/bson/codecs/ContainerCodecHelper.java index 5969763546b..b454206d5e8 100644 --- a/bson/src/main/org/bson/codecs/ContainerCodecHelper.java +++ b/bson/src/main/org/bson/codecs/ContainerCodecHelper.java @@ -16,10 +16,12 @@ package org.bson.codecs; +import org.bson.BsonBinarySubType; import org.bson.BsonReader; import org.bson.BsonType; import org.bson.Transformer; import org.bson.UuidRepresentation; +import org.bson.Vector; import org.bson.codecs.configuration.CodecConfigurationException; import org.bson.codecs.configuration.CodecRegistry; @@ -28,6 +30,8 @@ import java.util.Arrays; import java.util.UUID; +import static org.bson.internal.UuidHelper.isLegacyUUID; + /** * Helper methods for Codec implementations for containers, e.g. {@code Map} and {@code Iterable}. */ @@ -42,28 +46,50 @@ static Object readValue(final BsonReader reader, final DecoderContext decoderCon reader.readNull(); return null; } else { - Codec> codec = bsonTypeCodecMap.get(bsonType); + Codec> currentCodec = bsonTypeCodecMap.get(bsonType); + + if (bsonType == BsonType.BINARY) { + byte binarySubType = reader.peekBinarySubType(); + currentCodec = getBinarySubTypeCodec( + reader, + uuidRepresentation, + registry, binarySubType, + currentCodec); + } + + return valueTransformer.transform(currentCodec.decode(reader, decoderContext)); + } + } + + private static Codec> getBinarySubTypeCodec(final BsonReader reader, + final UuidRepresentation uuidRepresentation, + final CodecRegistry registry, + final byte binarySubType, + final Codec> binaryTypeCodec) { - if (bsonType == BsonType.BINARY && reader.peekBinarySize() == 16) { - switch (reader.peekBinarySubType()) { - case 3: - if (uuidRepresentation == UuidRepresentation.JAVA_LEGACY - || uuidRepresentation == UuidRepresentation.C_SHARP_LEGACY - || uuidRepresentation == UuidRepresentation.PYTHON_LEGACY) { - codec = registry.get(UUID.class); - } - break; - case 4: - if (uuidRepresentation == UuidRepresentation.STANDARD) { - codec = registry.get(UUID.class); - } - break; - default: - break; - } + if (binarySubType == BsonBinarySubType.VECTOR.getValue()) { + Codec+ * This class is not part of the public API and may be removed or changed at any time. + * + * @see Vector + * @see BsonBinary#asVector() + * @see BsonBinary#BsonBinary(Vector) + */ +public final class VectorHelper { + + private static final ByteOrder STORED_BYTE_ORDER = ByteOrder.LITTLE_ENDIAN; + private static final String ERROR_MESSAGE_UNKNOWN_VECTOR_DATA_TYPE = "Unknown vector data type: "; + private static final byte ZERO_PADDING = 0; + + private VectorHelper() { + //NOP + } + + private static final int METADATA_SIZE = 2; + + public static byte[] encodeVectorToBinary(final Vector vector) { + Vector.DataType dataType = vector.getDataType(); + switch (dataType) { + case INT8: + return encodeVector(dataType.getValue(), ZERO_PADDING, vector.asInt8Vector().getData()); + case PACKED_BIT: + PackedBitVector packedBitVector = vector.asPackedBitVector(); + return encodeVector(dataType.getValue(), packedBitVector.getPadding(), packedBitVector.getData()); + case FLOAT32: + return encodeVector(dataType.getValue(), vector.asFloat32Vector().getData()); + default: + throw Assertions.fail(ERROR_MESSAGE_UNKNOWN_VECTOR_DATA_TYPE + dataType); + } + } + + /** + * Decodes a vector from a binary representation. + *
+ * encodedVector is not mutated nor stored in the returned {@link Vector}.
+ */
+ public static Vector decodeBinaryToVector(final byte[] encodedVector) {
+ isTrue("Vector encoded array length must be at least 2, but found: " + encodedVector.length, encodedVector.length >= METADATA_SIZE);
+ Vector.DataType dataType = determineVectorDType(encodedVector[0]);
+ byte padding = encodedVector[1];
+ switch (dataType) {
+ case INT8:
+ return decodeInt8Vector(encodedVector, padding);
+ case PACKED_BIT:
+ return decodePackedBitVector(encodedVector, padding);
+ case FLOAT32:
+ return decodeFloat32Vector(encodedVector, padding);
+ default:
+ throw Assertions.fail(ERROR_MESSAGE_UNKNOWN_VECTOR_DATA_TYPE + dataType);
+ }
+ }
+
+ private static Float32Vector decodeFloat32Vector(final byte[] encodedVector, final byte padding) {
+ isTrue("Padding must be 0 for FLOAT32 data type, but found: " + padding, padding == 0);
+ return Vector.floatVector(decodeLittleEndianFloats(encodedVector));
+ }
+
+ private static PackedBitVector decodePackedBitVector(final byte[] encodedVector, final byte padding) {
+ byte[] packedBitVector = extractVectorData(encodedVector);
+ isTrue("Padding must be 0 if vector is empty, but found: " + padding, padding == 0 || packedBitVector.length > 0);
+ isTrue("Padding must be between 0 and 7 bits, but found: " + padding, padding >= 0 && padding <= 7);
+ return Vector.packedBitVector(packedBitVector, padding);
+ }
+
+ private static Int8Vector decodeInt8Vector(final byte[] encodedVector, final byte padding) {
+ isTrue("Padding must be 0 for INT8 data type, but found: " + padding, padding == 0);
+ byte[] int8Vector = extractVectorData(encodedVector);
+ return Vector.int8Vector(int8Vector);
+ }
+
+ private static byte[] extractVectorData(final byte[] encodedVector) {
+ int vectorDataLength = encodedVector.length - METADATA_SIZE;
+ byte[] vectorData = new byte[vectorDataLength];
+ System.arraycopy(encodedVector, METADATA_SIZE, vectorData, 0, vectorDataLength);
+ return vectorData;
+ }
+
+ private static byte[] encodeVector(final byte dType, final byte padding, final byte[] vectorData) {
+ final byte[] bytes = new byte[vectorData.length + METADATA_SIZE];
+ bytes[0] = dType;
+ bytes[1] = padding;
+ System.arraycopy(vectorData, 0, bytes, METADATA_SIZE, vectorData.length);
+ return bytes;
+ }
+
+ private static byte[] encodeVector(final byte dType, final float[] vectorData) {
+ final byte[] bytes = new byte[vectorData.length * Float.BYTES + METADATA_SIZE];
+
+ bytes[0] = dType;
+ bytes[1] = ZERO_PADDING;
+
+ ByteBuffer buffer = ByteBuffer.wrap(bytes);
+ buffer.order(STORED_BYTE_ORDER);
+ buffer.position(METADATA_SIZE);
+
+ FloatBuffer floatBuffer = buffer.asFloatBuffer();
+
+ // The JVM may optimize this operation internally, potentially using intrinsics
+ // or platform-specific optimizations (such as SIMD). If the byte order matches the underlying system's
+ // native order, the operation may involve a direct memory copy.
+ floatBuffer.put(vectorData);
+
+ return bytes;
+ }
+
+ private static float[] decodeLittleEndianFloats(final byte[] encodedVector) {
+ isTrue("Byte array length must be a multiple of 4 for FLOAT32 data type, but found: " + encodedVector.length,
+ (encodedVector.length - METADATA_SIZE) % Float.BYTES == 0);
+
+ int vectorSize = encodedVector.length - METADATA_SIZE;
+
+ int numFloats = vectorSize / Float.BYTES;
+ float[] floatArray = new float[numFloats];
+
+ ByteBuffer buffer = ByteBuffer.wrap(encodedVector, METADATA_SIZE, vectorSize);
+ buffer.order(STORED_BYTE_ORDER);
+
+ // The JVM may optimize this operation internally, potentially using intrinsics
+ // or platform-specific optimizations (such as SIMD). If the byte order matches the underlying system's
+ // native order, the operation may involve a direct memory copy.
+ buffer.asFloatBuffer().get(floatArray);
+ return floatArray;
+ }
+
+ public static Vector.DataType determineVectorDType(final byte dType) {
+ Vector.DataType[] values = Vector.DataType.values();
+ for (Vector.DataType value : values) {
+ if (value.getValue() == dType) {
+ return value;
+ }
+ }
+ throw new BsonInvalidOperationException(ERROR_MESSAGE_UNKNOWN_VECTOR_DATA_TYPE + dType);
+ }
+
+ private static void isTrue(final String message, final boolean condition) {
+ if (!condition) {
+ throw new BsonInvalidOperationException(message);
+ }
+ }
+}
diff --git a/bson/src/test/resources/bson-binary-vector/float32.json b/bson/src/test/resources/bson-binary-vector/float32.json
new file mode 100644
index 00000000000..e1d142c184b
--- /dev/null
+++ b/bson/src/test/resources/bson-binary-vector/float32.json
@@ -0,0 +1,50 @@
+{
+ "description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
+ "test_key": "vector",
+ "tests": [
+ {
+ "description": "Simple Vector FLOAT32",
+ "valid": true,
+ "vector": [127.0, 7.0],
+ "dtype_hex": "0x27",
+ "dtype_alias": "FLOAT32",
+ "padding": 0,
+ "canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
+ },
+ {
+ "description": "Vector with decimals and negative value FLOAT32",
+ "valid": true,
+ "vector": [127.7, -7.7],
+ "dtype_hex": "0x27",
+ "dtype_alias": "FLOAT32",
+ "padding": 0,
+ "canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
+ },
+ {
+ "description": "Empty Vector FLOAT32",
+ "valid": true,
+ "vector": [],
+ "dtype_hex": "0x27",
+ "dtype_alias": "FLOAT32",
+ "padding": 0,
+ "canonical_bson": "1400000005766563746F72000200000009270000"
+ },
+ {
+ "description": "Infinity Vector FLOAT32",
+ "valid": true,
+ "vector": ["-inf", 0.0, "inf"],
+ "dtype_hex": "0x27",
+ "dtype_alias": "FLOAT32",
+ "padding": 0,
+ "canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
+ },
+ {
+ "description": "FLOAT32 with padding",
+ "valid": false,
+ "vector": [127.0, 7.0],
+ "dtype_hex": "0x27",
+ "dtype_alias": "FLOAT32",
+ "padding": 3
+ }
+ ]
+}
\ No newline at end of file
diff --git a/bson/src/test/resources/bson-binary-vector/int8.json b/bson/src/test/resources/bson-binary-vector/int8.json
new file mode 100644
index 00000000000..c10c1b7d4e2
--- /dev/null
+++ b/bson/src/test/resources/bson-binary-vector/int8.json
@@ -0,0 +1,56 @@
+{
+ "description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
+ "test_key": "vector",
+ "tests": [
+ {
+ "description": "Simple Vector INT8",
+ "valid": true,
+ "vector": [127, 7],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 0,
+ "canonical_bson": "1600000005766563746F7200040000000903007F0700"
+ },
+ {
+ "description": "Empty Vector INT8",
+ "valid": true,
+ "vector": [],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 0,
+ "canonical_bson": "1400000005766563746F72000200000009030000"
+ },
+ {
+ "description": "Overflow Vector INT8",
+ "valid": false,
+ "vector": [128],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 0
+ },
+ {
+ "description": "Underflow Vector INT8",
+ "valid": false,
+ "vector": [-129],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 0
+ },
+ {
+ "description": "INT8 with padding",
+ "valid": false,
+ "vector": [127, 7],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 3
+ },
+ {
+ "description": "INT8 with float inputs",
+ "valid": false,
+ "vector": [127.77, 7.77],
+ "dtype_hex": "0x03",
+ "dtype_alias": "INT8",
+ "padding": 0
+ }
+ ]
+}
\ No newline at end of file
diff --git a/bson/src/test/resources/bson-binary-vector/packed_bit.json b/bson/src/test/resources/bson-binary-vector/packed_bit.json
new file mode 100644
index 00000000000..69fb3948335
--- /dev/null
+++ b/bson/src/test/resources/bson-binary-vector/packed_bit.json
@@ -0,0 +1,97 @@
+{
+ "description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT",
+ "test_key": "vector",
+ "tests": [
+ {
+ "description": "Padding specified with no vector data PACKED_BIT",
+ "valid": false,
+ "vector": [],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 1
+ },
+ {
+ "description": "Simple Vector PACKED_BIT",
+ "valid": true,
+ "vector": [127, 7],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0,
+ "canonical_bson": "1600000005766563746F7200040000000910007F0700"
+ },
+ {
+ "description": "Empty Vector PACKED_BIT",
+ "valid": true,
+ "vector": [],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0,
+ "canonical_bson": "1400000005766563746F72000200000009100000"
+ },
+ {
+ "description": "PACKED_BIT with padding",
+ "valid": true,
+ "vector": [127, 7],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 3,
+ "canonical_bson": "1600000005766563746F7200040000000910037F0700"
+ },
+ {
+ "description": "Overflow Vector PACKED_BIT",
+ "valid": false,
+ "vector": [256],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0
+ },
+ {
+ "description": "Underflow Vector PACKED_BIT",
+ "valid": false,
+ "vector": [-1],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0
+ },
+ {
+ "description": "Vector with float values PACKED_BIT",
+ "valid": false,
+ "vector": [127.5],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0
+ },
+ {
+ "description": "Padding specified with no vector data PACKED_BIT",
+ "valid": false,
+ "vector": [],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 1
+ },
+ {
+ "description": "Exceeding maximum padding PACKED_BIT",
+ "valid": false,
+ "vector": [1],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 8
+ },
+ {
+ "description": "Negative padding PACKED_BIT",
+ "valid": false,
+ "vector": [1],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": -1
+ },
+ {
+ "description": "Vector with float values PACKED_BIT",
+ "valid": false,
+ "vector": [127.5],
+ "dtype_hex": "0x10",
+ "dtype_alias": "PACKED_BIT",
+ "padding": 0
+ }
+ ]
+}
\ No newline at end of file
diff --git a/bson/src/test/resources/bson/binary.json b/bson/src/test/resources/bson/binary.json
index 38a70d1fe0c..29d88471afe 100644
--- a/bson/src/test/resources/bson/binary.json
+++ b/bson/src/test/resources/bson/binary.json
@@ -74,6 +74,36 @@
"description": "$type query operator (conflicts with legacy $binary form with $type field)",
"canonical_bson": "180000000378001000000010247479706500020000000000",
"canonical_extjson": "{\"x\" : { \"$type\" : {\"$numberInt\": \"2\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector FLOAT32",
+ "canonical_bson": "170000000578000A0000000927000000FE420000E04000",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwAAAP5CAADgQA==\", \"subType\": \"09\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector INT8",
+ "canonical_bson": "11000000057800040000000903007F0700",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwB/Bw==\", \"subType\": \"09\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector PACKED_BIT",
+ "canonical_bson": "11000000057800040000000910007F0700",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAB/Bw==\", \"subType\": \"09\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector (Zero-length) FLOAT32",
+ "canonical_bson": "0F0000000578000200000009270000",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwA=\", \"subType\": \"09\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector (Zero-length) INT8",
+ "canonical_bson": "0F0000000578000200000009030000",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwA=\", \"subType\": \"09\"}}}"
+ },
+ {
+ "description": "subtype 0x09 Vector (Zero-length) PACKED_BIT",
+ "canonical_bson": "0F0000000578000200000009100000",
+ "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAA=\", \"subType\": \"09\"}}}"
}
],
"decodeErrors": [
@@ -120,4 +150,4 @@
"string": "{\"x\" : { \"$uuid\" : \"----d264-44b3-4--9-90e8-e7d1dfc0----\"}}"
}
]
-}
+}
\ No newline at end of file
diff --git a/bson/src/test/unit/org/bson/BsonBinarySpecification.groovy b/bson/src/test/unit/org/bson/BsonBinarySpecification.groovy
index e51094e964f..503440daa04 100644
--- a/bson/src/test/unit/org/bson/BsonBinarySpecification.groovy
+++ b/bson/src/test/unit/org/bson/BsonBinarySpecification.groovy
@@ -48,9 +48,14 @@ class BsonBinarySpecification extends Specification {
data == bsonBinary.getData()
where:
- subType << [BsonBinarySubType.BINARY, BsonBinarySubType.FUNCTION, BsonBinarySubType.MD5,
- BsonBinarySubType.OLD_BINARY, BsonBinarySubType.USER_DEFINED, BsonBinarySubType.UUID_LEGACY,
- BsonBinarySubType.UUID_STANDARD]
+ subType << [BsonBinarySubType.BINARY,
+ BsonBinarySubType.FUNCTION,
+ BsonBinarySubType.MD5,
+ BsonBinarySubType.OLD_BINARY,
+ BsonBinarySubType.USER_DEFINED,
+ BsonBinarySubType.UUID_LEGACY,
+ BsonBinarySubType.UUID_STANDARD,
+ BsonBinarySubType.VECTOR]
}
@Unroll
diff --git a/bson/src/test/unit/org/bson/BsonBinarySubTypeSpecification.groovy b/bson/src/test/unit/org/bson/BsonBinarySubTypeSpecification.groovy
index 8e502891095..448d63f23fd 100644
--- a/bson/src/test/unit/org/bson/BsonBinarySubTypeSpecification.groovy
+++ b/bson/src/test/unit/org/bson/BsonBinarySubTypeSpecification.groovy
@@ -34,5 +34,6 @@ class BsonBinarySubTypeSpecification extends Specification {
6 | false
7 | false
8 | false
+ 9 | false
}
}
diff --git a/bson/src/test/unit/org/bson/BsonBinaryTest.java b/bson/src/test/unit/org/bson/BsonBinaryTest.java
new file mode 100644
index 00000000000..029c611c594
--- /dev/null
+++ b/bson/src/test/unit/org/bson/BsonBinaryTest.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2008-present MongoDB, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.bson;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.params.provider.Arguments.arguments;
+
+class BsonBinaryTest {
+
+ private static final byte FLOAT32_DTYPE = Vector.DataType.FLOAT32.getValue();
+ private static final byte INT8_DTYPE = Vector.DataType.INT8.getValue();
+ private static final byte PACKED_BIT_DTYPE = Vector.DataType.PACKED_BIT.getValue();
+ public static final int ZERO_PADDING = 0;
+
+ @Test
+ void shouldThrowExceptionWhenCreatingBsonBinaryWithNullVector() {
+ // given
+ Vector vector = null;
+
+ // when & then
+ IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> new BsonBinary(vector));
+ assertEquals("Vector must not be null", exception.getMessage());
+ }
+
+ @ParameterizedTest
+ @EnumSource(value = BsonBinarySubType.class, mode = EnumSource.Mode.EXCLUDE, names = {"VECTOR"})
+ void shouldThrowExceptionWhenBsonBinarySubTypeIsNotVector(final BsonBinarySubType bsonBinarySubType) {
+ // given
+ byte[] data = new byte[]{1, 2, 3, 4};
+ BsonBinary bsonBinary = new BsonBinary(bsonBinarySubType.getValue(), data);
+
+ // when & then
+ BsonInvalidOperationException exception = assertThrows(BsonInvalidOperationException.class, bsonBinary::asVector);
+ assertEquals("type must be a Vector subtype.", exception.getMessage());
+ }
+
+ @ParameterizedTest(name = "{index}: {0}")
+ @MethodSource("provideFloatVectors")
+ void shouldEncodeFloatVector(final Vector actualFloat32Vector, final byte[] expectedBsonEncodedVector) {
+ // when
+ BsonBinary actualBsonBinary = new BsonBinary(actualFloat32Vector);
+ byte[] actualBsonEncodedVector = actualBsonBinary.getData();
+
+ // then
+ assertEquals(BsonBinarySubType.VECTOR.getValue(), actualBsonBinary.getType(), "The subtype must be VECTOR");
+ assertArrayEquals(expectedBsonEncodedVector, actualBsonEncodedVector);
+ }
+
+ @ParameterizedTest(name = "{index}: {0}")
+ @MethodSource("provideFloatVectors")
+ void shouldDecodeFloatVector(final Float32Vector expectedFloatVector, final byte[] bsonEncodedVector) {
+ // when
+ Float32Vector decodedVector = (Float32Vector) new BsonBinary(BsonBinarySubType.VECTOR, bsonEncodedVector).asVector();
+
+ // then
+ assertEquals(expectedFloatVector, decodedVector);
+ }
+
+ private static Stream