Skip to content

Commit 7bb4e63

Browse files
committed
Documentation.
1 parent 0d84a06 commit 7bb4e63

15 files changed

+207
-21
lines changed

spring-data-jpa/src/main/java/org/springframework/data/jpa/repository/query/JpaParametersParameterAccessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ protected Object potentiallyUnwrap(Object parameterValue) {
8282
* @return
8383
*/
8484
public ScoringFunction getScoringFunction() {
85-
return doWithScore(Score::getFunction, Score.class::isInstance, () -> ScoringFunction.UNSPECIFIED);
85+
return doWithScore(Score::getFunction, Score.class::isInstance, ScoringFunction::unspecified);
8686
}
8787

8888
/**

spring-data-jpa/src/main/java/org/springframework/data/jpa/repository/query/JpaQueryCreator.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,7 @@ public class JpaQueryCreator extends AbstractQueryCreator<String, JpqlQueryBuild
7676
VectorScoringFunctions.EUCLIDEAN, new DistanceFunction("euclidean_distance", Sort.Direction.ASC), //
7777
VectorScoringFunctions.TAXICAB, new DistanceFunction("taxicab_distance", Sort.Direction.ASC), //
7878
VectorScoringFunctions.HAMMING, new DistanceFunction("hamming_distance", Sort.Direction.ASC), //
79-
VectorScoringFunctions.INNER_PRODUCT, new DistanceFunction("negative_inner_product", Sort.Direction.ASC), //
80-
81-
// TODO: Do we need both, dot and inner product? Aren't these the same in some sense?
82-
VectorScoringFunctions.DOT, new DistanceFunction("negative_inner_product", Sort.Direction.ASC));
79+
VectorScoringFunctions.DOT_PRODUCT, new DistanceFunction("negative_inner_product", Sort.Direction.ASC));
8380

8481
record DistanceFunction(String distanceFunction, Sort.Direction direction) {
8582

@@ -100,7 +97,6 @@ record DistanceFunction(String distanceFunction, Sort.Direction direction) {
10097
* Create a new {@link JpaQueryCreator}.
10198
*
10299
* @param tree must not be {@literal null}.
103-
* @param searchQuery
104100
* @param type must not be {@literal null}.
105101
* @param provider must not be {@literal null}.
106102
* @param templates must not be {@literal null}.
@@ -121,6 +117,7 @@ public JpaQueryCreator(PartTree tree, boolean searchQuery, ReturnedType type, Pa
121117
JpqlQueryTemplates templates, Metamodel metamodel) {
122118

123119
super(tree);
120+
124121
this.searchQuery = searchQuery;
125122
this.tree = tree;
126123
this.returnedType = type;
@@ -585,7 +582,9 @@ private static String getDistanceFunction(ScoringFunction scoringFunction) {
585582
DistanceFunction distanceFunction = JpaQueryCreator.DISTANCE_FUNCTIONS.get(scoringFunction);
586583

587584
if (distanceFunction == null) {
588-
throw new IllegalArgumentException("Unsupported ScoringFunction: %s".formatted(scoringFunction.getName()));
585+
throw new IllegalArgumentException(
586+
"Unsupported ScoringFunction: %s. Make sure to declare a supported ScoringFunction when creating Score/Similarity instances."
587+
.formatted(scoringFunction.getName()));
589588
}
590589

591590
return distanceFunction.distanceFunction();

spring-data-jpa/src/main/java/org/springframework/data/jpa/repository/query/JpqlQueryBuilder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ default Select select(JpqlQueryBuilder.PathExpression path) {
496496
/**
497497
* Select a single attribute.
498498
*
499-
* @param path
499+
* @param selection
500500
* @return
501501
*/
502502
@CheckReturnValue

spring-data-jpa/src/main/java/org/springframework/data/jpa/repository/query/ParameterMetadataProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ ScoringFunction getScoringFunction() {
217217
return accessor.getScoringFunction();
218218
}
219219

220-
return ScoringFunction.UNSPECIFIED;
220+
return ScoringFunction.unspecified();
221221
}
222222

223223
ParameterBinding getVectorBinding() {

spring-data-jpa/src/main/java/org/springframework/data/jpa/repository/query/SimilarityNormalizer.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@
3232
public class SimilarityNormalizer {
3333

3434
/**
35-
* Identity normalizer for {@link ScoringFunction#UNSPECIFIED} scoring function without altering the score.
35+
* Identity normalizer for {@link ScoringFunction#unspecified()} scoring function without altering the score.
3636
*/
37-
public static final SimilarityNormalizer IDENTITY = new SimilarityNormalizer(ScoringFunction.UNSPECIFIED,
37+
public static final SimilarityNormalizer IDENTITY = new SimilarityNormalizer(ScoringFunction.unspecified(),
3838
DoubleUnaryOperator.identity(), DoubleUnaryOperator.identity());
3939

4040
/**
@@ -52,16 +52,15 @@ public class SimilarityNormalizer {
5252
/**
5353
* Normalizer for Negative Inner Product (Dot) scores using {@code negative_inner_product(…)} as the scoring function.
5454
*/
55-
public static final SimilarityNormalizer DOT = new SimilarityNormalizer(VectorScoringFunctions.DOT,
55+
public static final SimilarityNormalizer DOT_PRODUCT = new SimilarityNormalizer(VectorScoringFunctions.DOT_PRODUCT,
5656
it -> (1 - it) / 2, it -> 1 - (it * 2));
5757

5858
private static final Map<ScoringFunction, SimilarityNormalizer> NORMALIZERS = new HashMap<>();
5959

6060
static {
6161
NORMALIZERS.put(EUCLIDEAN.scoringFunction, EUCLIDEAN);
6262
NORMALIZERS.put(COSINE.scoringFunction, COSINE);
63-
NORMALIZERS.put(DOT.scoringFunction, DOT);
64-
NORMALIZERS.put(VectorScoringFunctions.INNER_PRODUCT, DOT);
63+
NORMALIZERS.put(DOT_PRODUCT.scoringFunction, DOT_PRODUCT);
6564
}
6665

6766
private final ScoringFunction scoringFunction;

spring-data-jpa/src/test/java/org/springframework/data/jpa/repository/PgVectorIntegrationTests.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.springframework.core.io.ClassPathResource;
5050
import org.springframework.data.domain.Range;
5151
import org.springframework.data.domain.Score;
52+
import org.springframework.data.domain.ScoringFunction;
5253
import org.springframework.data.domain.SearchResult;
5354
import org.springframework.data.domain.SearchResults;
5455
import org.springframework.data.domain.Similarity;
@@ -109,7 +110,7 @@ void shouldApplyVectorSearchWithDistance(VectorScoringFunctions functions) {
109110
}
110111

111112
static Set<VectorScoringFunctions> scoringFunctions() {
112-
return EnumSet.of(VectorScoringFunctions.COSINE, VectorScoringFunctions.INNER_PRODUCT,
113+
return EnumSet.of(VectorScoringFunctions.COSINE, VectorScoringFunctions.DOT_PRODUCT,
113114
VectorScoringFunctions.EUCLIDEAN);
114115
}
115116

@@ -169,6 +170,21 @@ void shouldRunStringQueryWithDistance() {
169170
assertThat(result.getScore().getFunction()).isEqualTo(VectorScoringFunctions.COSINE);
170171
}
171172

173+
@Test
174+
void shouldRunStringQueryWithFloatDistance() {
175+
176+
SearchResults<WithVector> results = repository.searchAnnotatedByCountryAndEmbeddingWithin("de", VECTOR, 2);
177+
178+
assertThat(results).hasSize(3).extracting(SearchResult::getContent).extracting(WithVector::getCountry)
179+
.containsOnly("de", "de", "de");
180+
assertThat(results).extracting(SearchResult::getContent).extracting(WithVector::getDescription)
181+
.containsSequence("two", "one", "four");
182+
183+
SearchResult<WithVector> result = results.getContent().get(0);
184+
assertThat(result.getScore().getValue()).isGreaterThanOrEqualTo(0);
185+
assertThat(result.getScore().getFunction()).isEqualTo(ScoringFunction.unspecified());
186+
}
187+
172188
@Test
173189
void shouldApplyVectorSearchWithRange() {
174190

@@ -320,6 +336,14 @@ AND cosine_distance(w.embedding, :embedding) <= :distance
320336
SearchResults<WithVector> searchAnnotatedByCountryAndEmbeddingWithin(String country, Vector embedding,
321337
Score distance);
322338

339+
@Query("""
340+
SELECT w, cosine_distance(w.embedding, :embedding) as distance FROM org.springframework.data.jpa.repository.PgVectorIntegrationTests$WithVector w
341+
WHERE w.country = ?1
342+
AND cosine_distance(w.embedding, :embedding) <= :distance
343+
ORDER BY distance asc""")
344+
SearchResults<WithVector> searchAnnotatedByCountryAndEmbeddingWithin(String country, Vector embedding,
345+
float distance);
346+
323347
SearchResults<WithVector> searchAllByCountryAndEmbeddingWithin(String country, Vector embedding,
324348
Range<Similarity> distance);
325349

spring-data-jpa/src/test/java/org/springframework/data/jpa/repository/query/SimilarityNormalizerUnitTests.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,19 @@ void normalizesCosine() {
5858
@Test
5959
void normalizesNegativeInnerProduct() {
6060

61-
assertThat(SimilarityNormalizer.DOT.getSimilarity(-0.8465620279312134)).isCloseTo(0.9232810139656067, offset(0.01));
62-
assertThat(SimilarityNormalizer.DOT.getSimilarity(-1.0626180171966553)).isCloseTo(1.0313090085983276, offset(0.01));
63-
assertThat(SimilarityNormalizer.DOT.getSimilarity(-2.0293400287628174)).isCloseTo(1.5146700143814087, offset(0.01));
61+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getSimilarity(-0.8465620279312134)).isCloseTo(0.9232810139656067,
62+
offset(0.01));
63+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getSimilarity(-1.0626180171966553)).isCloseTo(1.0313090085983276,
64+
offset(0.01));
65+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getSimilarity(-2.0293400287628174)).isCloseTo(1.5146700143814087,
66+
offset(0.01));
6467

65-
assertThat(SimilarityNormalizer.DOT.getScore(0.9232810139656067)).isCloseTo(-0.8465620279312134, offset(0.01));
66-
assertThat(SimilarityNormalizer.DOT.getScore(1.0313090085983276)).isCloseTo(-1.0626180171966553, offset(0.01));
67-
assertThat(SimilarityNormalizer.DOT.getScore(1.5146700143814087)).isCloseTo(-2.0293400287628174, offset(0.01));
68+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getScore(0.9232810139656067)).isCloseTo(-0.8465620279312134,
69+
offset(0.01));
70+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getScore(1.0313090085983276)).isCloseTo(-1.0626180171966553,
71+
offset(0.01));
72+
assertThat(SimilarityNormalizer.DOT_PRODUCT.getScore(1.5146700143814087)).isCloseTo(-2.0293400287628174,
73+
offset(0.01));
6874
}
6975

7076
}

src/main/antora/modules/ROOT/nav.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
** xref:jpa/stored-procedures.adoc[]
1515
** xref:jpa/specifications.adoc[]
1616
** xref:repositories/query-by-example.adoc[]
17+
** xref:repositories/vector-search.adoc[]
1718
** xref:jpa/transactions.adoc[]
1819
** xref:jpa/locking.adoc[]
1920
** xref:auditing.adoc[]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
:vector-search-intro-include: data-jpa::partial$vector-search-intro-include.adoc
2+
:vector-search-model-include: data-jpa::partial$vector-search-model-include.adoc
3+
:vector-search-repository-include: data-jpa::partial$vector-search-repository-include.adoc
4+
:vector-search-scoring-include: data-jpa::partial$vector-search-scoring-include.adoc
5+
:vector-search-method-derived-include: data-jpa::partial$vector-search-method-derived-include.adoc
6+
:vector-search-method-annotated-include: data-jpa::partial$vector-search-method-annotated-include.adoc
7+
8+
include::{commons}@data-commons::page$repositories/vector-search.adoc[]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
To use Hibernate Vector Search, you need to add the following dependencies to your project.
2+
3+
The following example shows how to set up dependencies in Maven and Gradle:
4+
5+
[tabs]
6+
======
7+
Maven::
8+
+
9+
[source,xml,indent=0,subs="verbatim,quotes",role="primary"]
10+
----
11+
<dependencies>
12+
<dependency>
13+
<groupId>org.hibernate.orm</groupId>
14+
<artifactId>hibernate-vector</artifactId>
15+
<version>${hibernate.version}</version>
16+
</dependency>
17+
</dependencies>
18+
19+
----
20+
21+
Gradle::
22+
+
23+
====
24+
[source,groovy,indent=0,subs="verbatim,quotes",role="secondary"]
25+
----
26+
dependencies {
27+
implementation 'org.hibernate.orm:hibernate-vector:${hibernateVersion}'
28+
}
29+
----
30+
====
31+
======
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
Annotated search methods must define the entire JPQL query to run a Vector Search.
2+
3+
.Using `@Query` Search Methods
4+
====
5+
[source,java]
6+
----
7+
interface CommentRepository extends Repository<Comment, String> {
8+
9+
@Query("""
10+
SELECT c, cosine_distance(c.embedding, :embedding) as distance FROM Comment c
11+
WHERE c.country = ?1
12+
AND cosine_distance(c.embedding, :embedding) <= :distance
13+
ORDER BY distance asc""")
14+
SearchResults<WithVector> searchAnnotatedByCountryAndEmbeddingWithin(String country, Vector embedding,
15+
Score distance);
16+
17+
@Query("""
18+
SELECT c FROM Comment c
19+
WHERE c.country = ?1
20+
AND cosine_distance(c.embedding, :embedding) <= :distance
21+
ORDER BY cosine_distance(c.embedding, :embedding) asc""")
22+
List<WithVector> findAnnotatedByCountryAndEmbeddingWithin(String country, Vector embedding, Score distance);
23+
24+
}
25+
----
26+
====
27+
28+
Vector Search methods are not required to include a score or distance in their projection.
29+
When using annotated search methods returning `SearchResults`, the execution mechanism assumes that if a second projection column is present that this one holds the score value.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
.Using `Near` and `Within` Keywords in Repository Search Methods
2+
====
3+
[source,java]
4+
----
5+
interface CommentRepository extends Repository<Comment, String> {
6+
7+
SearchResults<Comment> searchByEmbeddingNear(Vector vector, Score score);
8+
9+
SearchResults<Comment> searchByEmbeddingWithin(Vector vector, Range<Similarity> range);
10+
11+
SearchResults<Comment> searchByCountryAndEmbeddingWithin(String country, Vector vector, Range<Similarity> range);
12+
}
13+
----
14+
====
15+
16+
Derived Search Methods can define domain model attributes and Vector parameters.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
====
2+
[source,java]
3+
----
4+
class Comment {
5+
6+
@Id String id;
7+
String country;
8+
String comment;
9+
10+
@Column(name = "the_embedding")
11+
@JdbcTypeCode(SqlTypes.VECTOR)
12+
@Array(length = 5)
13+
Vector embedding;
14+
15+
// getters, setters, …
16+
}
17+
----
18+
====
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
.Using `SearchResult<T>` in a Repository Search Method
2+
====
3+
[source,java]
4+
----
5+
interface CommentRepository extends Repository<Comment, String> {
6+
7+
SearchResults<Comment> searchByCountryAndEmbeddingNear(String country, Vector vector, Score distance,
8+
Limit limit);
9+
10+
@Query("""
11+
SELECT c, cosine_distance(c.embedding, :embedding) as distance FROM Comment c
12+
WHERE c.country = ?1
13+
AND cosine_distance(c.embedding, :embedding) <= :distance
14+
ORDER BY distance asc""")
15+
SearchResults<WithVector> searchAnnotatedByCountryAndEmbeddingWithin(String country, Vector embedding,
16+
Score distance);
17+
18+
}
19+
20+
SearchResults<Comment> results = repository.searchByCountryAndEmbeddingNear("en", Vector.of(…), Score.of(0.9), Limit.of(10));
21+
----
22+
====
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
Hibernate translates distance function calls to native database functions for PGvector and Oracle.
2+
Their result is typically a distance.
3+
When using `Similarity` instead of `Score`, Spring Data normalizes distance scores into a similarity score between 0 and 1. The higher the score, the more similar the two vectors are.
4+
// END
5+
6+
.Using `Score` and `Similarity` in a Repository Search Methods
7+
====
8+
[source,java]
9+
----
10+
interface CommentRepository extends Repository<Comment, String> {
11+
12+
SearchResults<Comment> searchByEmbeddingNear(Vector vector, Score score);
13+
14+
SearchResults<Comment> searchByEmbeddingNear(Vector vector, Similarity similarity);
15+
16+
SearchResults<Comment> searchByEmbeddingNear(Vector vector, Range<Similarity> range);
17+
}
18+
19+
repository.searchByEmbeddingNear(Vector.of(…), Score.of(0.9, ScoringFunction.cosine())); <1>
20+
21+
repository.searchByEmbeddingNear(Vector.of(…), Similarity.of(0.9, ScoringFunction.cosine())); <2>
22+
23+
repository.searchByEmbeddingNear(Vector.of(…), Similarity.between(0.5, 1, ScoringFunction.euclidean()));<3>
24+
----
25+
26+
<1> Run a search and return results with a score of `0.9` or smaller using the Cosine distance.
27+
<2> Run a search and normalize the score into a similarity value.
28+
Return results with a similarity of `0.9` or greater using Cosine scoring.
29+
<3> Run a search and normalize the score into a similarity value.
30+
Return results with a similarity of between `0.5` and `1.0` or greater using Euclidean scoring.
31+
====
32+
33+
NOTE: JPA requires a `ScoringFunction` to be provided when creating `Score` or `Similarity` instances to select a scoring function.

0 commit comments

Comments
 (0)