-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Add BSON Binary Subtype 9 support for vector storage and retrieval. #1528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8cdec02
5237979
c68bd70
a50708f
7eb252b
5294d9e
e5bd2b7
8de3743
7f82ac4
d5c3fe9
d3c3789
0b1bd60
99830ec
a71779e
ec7cafe
d3a2287
45d21ed
b2952b7
097bf13
e65817c
0a168d9
cbfae8f
952ce35
3422dcd
874c00a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,10 +18,13 @@ | |
|
||
import org.bson.assertions.Assertions; | ||
import org.bson.internal.UuidHelper; | ||
import org.bson.internal.vector.VectorHelper; | ||
|
||
import java.util.Arrays; | ||
import java.util.UUID; | ||
|
||
import static org.bson.internal.vector.VectorHelper.encodeVectorToBinary; | ||
|
||
/** | ||
* A representation of the BSON Binary type. Note that for performance reasons instances of this class are not immutable, | ||
* so care should be taken to only modify the underlying byte array if you know what you're doing, or else make a defensive copy. | ||
|
@@ -89,6 +92,20 @@ public BsonBinary(final UUID uuid) { | |
this(uuid, UuidRepresentation.STANDARD); | ||
} | ||
|
||
/** | ||
jyemin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* Constructs a {@linkplain BsonBinarySubType#VECTOR subtype 9} {@link BsonBinary} from the given {@link Vector}. | ||
* | ||
* @param vector the {@link Vector} | ||
* @since 5.3 | ||
*/ | ||
public BsonBinary(final Vector vector) { | ||
if (vector == null) { | ||
throw new IllegalArgumentException("Vector must not be null"); | ||
} | ||
this.data = encodeVectorToBinary(vector); | ||
type = BsonBinarySubType.VECTOR.getValue(); | ||
} | ||
|
||
/** | ||
* Construct a new instance from the given UUID and UuidRepresentation | ||
* | ||
|
@@ -127,6 +144,21 @@ public UUID asUuid() { | |
return UuidHelper.decodeBinaryToUuid(this.data.clone(), this.type, UuidRepresentation.STANDARD); | ||
} | ||
|
||
/** | ||
* Returns the binary as a {@link Vector}. The {@linkplain #getType() subtype} must be {@linkplain BsonBinarySubType#VECTOR 9}. | ||
* | ||
* @return the vector | ||
* @throws BsonInvalidOperationException if the binary subtype is not {@link BsonBinarySubType#VECTOR}. | ||
* @since 5.3 | ||
*/ | ||
public Vector asVector() { | ||
if (type != BsonBinarySubType.VECTOR.getValue()) { | ||
throw new BsonInvalidOperationException("type must be a Vector subtype."); | ||
} | ||
|
||
return VectorHelper.decodeBinaryToVector(this.data); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [just a note] We talked about this being a potential performance issue if an application has to read a |
||
} | ||
|
||
/** | ||
* Returns the binary as a UUID. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Copyright 2008-present MongoDB, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.bson; | ||
|
||
import java.util.Arrays; | ||
|
||
import static org.bson.assertions.Assertions.assertNotNull; | ||
|
||
/** | ||
* Represents a vector of 32-bit floating-point numbers, where each element in the vector is a float. | ||
* <p> | ||
* The {@link Float32Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format. | ||
* | ||
* @mongodb.server.release 6.0 | ||
* @see Vector#floatVector(float[]) | ||
* @see BsonBinary#BsonBinary(Vector) | ||
* @see BsonBinary#asVector() | ||
* @since 5.3 | ||
*/ | ||
public final class Float32Vector extends Vector { | ||
|
||
private final float[] data; | ||
|
||
Float32Vector(final float[] vectorData) { | ||
super(DataType.FLOAT32); | ||
this.data = assertNotNull(vectorData); | ||
} | ||
|
||
/** | ||
* Retrieve the underlying float array representing this {@link Float32Vector}, where each float | ||
* represents an element of a vector. | ||
* <p> | ||
* NOTE: The underlying float array is not copied; changes to the returned array will be reflected in this instance. | ||
* | ||
* @return the underlying float array representing this {@link Float32Vector} vector. | ||
*/ | ||
public float[] getData() { | ||
return assertNotNull(data); | ||
} | ||
|
||
@Override | ||
public boolean equals(final Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (o == null || getClass() != o.getClass()) { | ||
return false; | ||
} | ||
Float32Vector that = (Float32Vector) o; | ||
return Arrays.equals(data, that.data); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Arrays.hashCode(data); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "Float32Vector{" | ||
+ "data=" + Arrays.toString(data) | ||
+ ", dataType=" + getDataType() | ||
+ '}'; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Copyright 2008-present MongoDB, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.bson; | ||
|
||
import java.util.Arrays; | ||
import java.util.Objects; | ||
|
||
import static org.bson.assertions.Assertions.assertNotNull; | ||
|
||
/** | ||
* Represents a vector of 8-bit signed integers, where each element in the vector is a byte. | ||
* <p> | ||
* The {@link Int8Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format. | ||
* | ||
* @mongodb.server.release 6.0 | ||
* @see Vector#int8Vector(byte[]) | ||
* @see BsonBinary#BsonBinary(Vector) | ||
* @see BsonBinary#asVector() | ||
* @since 5.3 | ||
*/ | ||
public final class Int8Vector extends Vector { | ||
|
||
private byte[] data; | ||
|
||
Int8Vector(final byte[] data) { | ||
super(DataType.INT8); | ||
this.data = assertNotNull(data); | ||
} | ||
|
||
/** | ||
* Retrieve the underlying byte array representing this {@link Int8Vector} vector, where each byte represents | ||
* an element of a vector. | ||
* <p> | ||
* NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance. | ||
* | ||
* @return the underlying byte array representing this {@link Int8Vector} vector. | ||
*/ | ||
public byte[] getData() { | ||
return assertNotNull(data); | ||
} | ||
|
||
@Override | ||
public boolean equals(final Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (o == null || getClass() != o.getClass()) { | ||
return false; | ||
} | ||
Int8Vector that = (Int8Vector) o; | ||
return Objects.deepEquals(data, that.data); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Arrays.hashCode(data); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "Int8Vector{" | ||
+ "data=" + Arrays.toString(data) | ||
+ ", dataType=" + getDataType() | ||
+ '}'; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright 2008-present MongoDB, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.bson; | ||
|
||
import java.util.Arrays; | ||
import java.util.Objects; | ||
|
||
import static org.bson.assertions.Assertions.assertNotNull; | ||
|
||
/** | ||
* Represents a packed bit vector, where each element of the vector is represented by a single bit (0 or 1). | ||
* <p> | ||
* The {@link PackedBitVector} is used to store data efficiently using the BSON Binary Subtype 9 format. | ||
* | ||
* @mongodb.server.release 6.0 | ||
* @see Vector#packedBitVector(byte[], byte) | ||
* @see BsonBinary#BsonBinary(Vector) | ||
* @see BsonBinary#asVector() | ||
* @since 5.3 | ||
*/ | ||
public final class PackedBitVector extends Vector { | ||
|
||
private final byte padding; | ||
private final byte[] data; | ||
|
||
PackedBitVector(final byte[] data, final byte padding) { | ||
super(DataType.PACKED_BIT); | ||
this.data = assertNotNull(data); | ||
this.padding = padding; | ||
} | ||
|
||
/** | ||
* Retrieve the underlying byte array representing this {@link PackedBitVector} vector, where | ||
* each bit represents an element of the vector (either 0 or 1). | ||
* <p> | ||
* Note that the {@linkplain #getPadding() padding value} should be considered when interpreting the final byte of the array, | ||
* as it indicates how many least-significant bits are to be ignored. | ||
* | ||
* @return the underlying byte array representing this {@link PackedBitVector} vector. | ||
* @see #getPadding() | ||
*/ | ||
public byte[] getData() { | ||
return assertNotNull(data); | ||
} | ||
|
||
/** | ||
* Returns the padding value for this vector. | ||
* | ||
* <p>Padding refers to the number of least-significant bits in the final byte that are ignored when retrieving | ||
* {@linkplain #getData() the vector array}. For instance, if the padding value is 3, this means that the last byte contains | ||
* 3 least-significant unused bits, which should be disregarded during operations.</p> | ||
* <p> | ||
* | ||
* NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance. | ||
* | ||
* @return the padding value (between 0 and 7). | ||
*/ | ||
public byte getPadding() { | ||
return this.padding; | ||
} | ||
|
||
@Override | ||
public boolean equals(final Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (o == null || getClass() != o.getClass()) { | ||
return false; | ||
} | ||
PackedBitVector that = (PackedBitVector) o; | ||
return padding == that.padding && Arrays.equals(data, that.data); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(padding, Arrays.hashCode(data)); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "PackedBitVector{" | ||
+ "padding=" + padding | ||
+ ", data=" + Arrays.toString(data) | ||
+ ", dataType=" + getDataType() | ||
+ '}'; | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.