-
Notifications
You must be signed in to change notification settings - Fork 1.5k
PHPORM-277 Add Builder::vectorSearch()
#3242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,23 +5,31 @@ | |
use Illuminate\Database\Eloquent\Collection as EloquentCollection; | ||
use Illuminate\Support\Collection as LaravelCollection; | ||
use Illuminate\Support\Facades\Schema; | ||
use MongoDB\Builder\Query; | ||
use MongoDB\Builder\Search; | ||
use MongoDB\Collection as MongoDBCollection; | ||
use MongoDB\Driver\Exception\ServerException; | ||
use MongoDB\Laravel\Schema\Builder; | ||
use MongoDB\Laravel\Tests\Models\Book; | ||
|
||
use function array_map; | ||
use function assert; | ||
use function mt_getrandmax; | ||
use function rand; | ||
use function range; | ||
use function srand; | ||
use function usleep; | ||
use function usort; | ||
|
||
class AtlasSearchTest extends TestCase | ||
{ | ||
private array $vectors; | ||
|
||
public function setUp(): void | ||
{ | ||
parent::setUp(); | ||
|
||
Book::insert([ | ||
Book::insert($this->addVector([ | ||
['title' => 'Introduction to Algorithms'], | ||
['title' => 'Clean Code: A Handbook of Agile Software Craftsmanship'], | ||
['title' => 'Design Patterns: Elements of Reusable Object-Oriented Software'], | ||
|
@@ -42,7 +50,7 @@ public function setUp(): void | |
['title' => 'Understanding Machine Learning: From Theory to Algorithms'], | ||
['title' => 'Deep Learning'], | ||
['title' => 'Pattern Recognition and Machine Learning'], | ||
]); | ||
])); | ||
|
||
$collection = $this->getConnection('mongodb')->getCollection('books'); | ||
assert($collection instanceof MongoDBCollection); | ||
|
@@ -66,8 +74,9 @@ public function setUp(): void | |
|
||
$collection->createSearchIndex([ | ||
'fields' => [ | ||
['type' => 'vector', 'numDimensions' => 16, 'path' => 'vector16', 'similarity' => 'cosine'], | ||
['type' => 'vector', 'numDimensions' => 4, 'path' => 'vector4', 'similarity' => 'cosine'], | ||
['type' => 'vector', 'numDimensions' => 32, 'path' => 'vector32', 'similarity' => 'euclidean'], | ||
['type' => 'filter', 'path' => 'title'], | ||
], | ||
], ['name' => 'vector', 'type' => 'vectorSearch']); | ||
} catch (ServerException $e) { | ||
|
@@ -131,7 +140,7 @@ public function testGetIndexes() | |
], | ||
[ | ||
'name' => 'vector', | ||
'columns' => ['vector16', 'vector32'], | ||
'columns' => ['vector4', 'vector32', 'title'], | ||
jmikola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
'type' => 'vectorSearch', | ||
'primary' => false, | ||
'unique' => false, | ||
|
@@ -180,10 +189,10 @@ public function testEloquentBuilderAutocomplete() | |
self::assertInstanceOf(LaravelCollection::class, $results); | ||
self::assertCount(3, $results); | ||
self::assertSame([ | ||
'Operating System Concepts', | ||
'Database System Concepts', | ||
'Modern Operating Systems', | ||
], $results->all()); | ||
'Operating System Concepts', | ||
], $results->sort()->values()->all()); | ||
} | ||
|
||
public function testDatabaseBuilderAutocomplete() | ||
|
@@ -194,9 +203,62 @@ public function testDatabaseBuilderAutocomplete() | |
self::assertInstanceOf(LaravelCollection::class, $results); | ||
self::assertCount(3, $results); | ||
self::assertSame([ | ||
'Operating System Concepts', | ||
'Database System Concepts', | ||
'Modern Operating Systems', | ||
], $results->all()); | ||
'Operating System Concepts', | ||
], $results->sort()->values()->all()); | ||
} | ||
|
||
public function testDatabaseBuilderVectorSearch() | ||
{ | ||
$results = $this->getConnection('mongodb')->table('books') | ||
->vectorSearch( | ||
index: 'vector', | ||
path: 'vector4', | ||
queryVector: $this->vectors[7], // This is an exact match of the vector | ||
jmikola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
limit: 4, | ||
exact: true, | ||
); | ||
|
||
self::assertInstanceOf(LaravelCollection::class, $results); | ||
self::assertCount(4, $results); | ||
self::assertSame('The Art of Computer Programming', $results->first()['title']); | ||
self::assertSame(1.0, $results->first()['vectorSearchScore']); | ||
} | ||
|
||
public function testEloquentBuilderVectorSearch() | ||
{ | ||
$results = Book::vectorSearch( | ||
index: 'vector', | ||
path: 'vector4', | ||
queryVector: $this->vectors[7], | ||
limit: 5, | ||
numCandidates: 15, | ||
// excludes the exact match | ||
filter: Query::query( | ||
title: Query::ne('The Art of Computer Programming'), | ||
), | ||
); | ||
|
||
self::assertInstanceOf(EloquentCollection::class, $results); | ||
self::assertCount(5, $results); | ||
self::assertInstanceOf(Book::class, $results->first()); | ||
self::assertNotSame('The Art of Computer Programming', $results->first()->title); | ||
self::assertSame('The Mythical Man-Month: Essays on Software Engineering', $results->first()->title); | ||
self::assertThat( | ||
$results->first()->vectorSearchScore, | ||
self::logicalAnd(self::isType('float'), self::greaterThan(0.9), self::lessThan(1.0)), | ||
); | ||
} | ||
|
||
/** Generate random vectors using fixed seed to make tests deterministic */ | ||
private function addVector(array $items): array | ||
{ | ||
srand(1); | ||
foreach ($items as &$item) { | ||
$this->vectors[] = $item['vector4'] = array_map(fn () => rand() / mt_getrandmax(), range(0, 3)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider leaving a TODO to revisit this once PHPC-2474 is implemented. It looks like that ticket entails more than just adding a new subtype constant, as there are utility functions to convert to/from vectors (as described in the corresponding spec). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure for the todo, I create a ticket to add support in the aggregation builder first. https://jira.mongodb.org/browse/PHPLIB-1603 |
||
} | ||
|
||
return $items; | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.