From cfb41dd2b1a481a787ad97d4325659f85748542c Mon Sep 17 00:00:00 2001 From: Augustas Date: Wed, 27 Nov 2024 10:10:04 +0200 Subject: [PATCH 1/6] add text2vec-weaviate vectorizer --- src/collections/config/types/vectorizer.ts | 18 +++++++++ src/collections/configure/types/vectorizer.ts | 5 +++ src/collections/configure/unit.test.ts | 39 +++++++++++++++++++ src/collections/configure/vectorizer.ts | 22 +++++++++++ 4 files changed, 84 insertions(+) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index 63ff7a21..966f6d4a 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -40,6 +40,7 @@ export type Vectorizer = | 'text2vec-google' | 'text2vec-transformers' | 'text2vec-voyageai' + | 'text2vec-weaviate' | 'none'; /** The configuration for image vectorization using a neural network module. @@ -406,6 +407,20 @@ export type Text2VecVoyageAIConfig = { vectorizeCollectionName?: boolean; }; +/** + * The configuration for text vectorization using Weaviate's self-hosted text-based embedding models. + * + * TODO: add documentation reference once available. + */ +export interface Text2VecWeaviateConfig { + /** The base URL to use where API requests should go. */ + baseURL?: string; + /** The model to use. */ + model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string; + /** Whether to vectorize the collection name. */ + vectorizeCollectionName?: boolean; +}; + export type NoVectorizerConfig = {}; export type VectorizerConfig = @@ -428,6 +443,7 @@ export type VectorizerConfig = | Text2VecPalmConfig | Text2VecTransformersConfig | Text2VecVoyageAIConfig + | Text2VecWeaviateConfig | NoVectorizerConfig; export type VectorizerConfigType = V extends 'img2vec-neural' @@ -474,6 +490,8 @@ export type VectorizerConfigType = V extends 'img2vec-neural' ? Text2VecTransformersConfig | undefined : V extends 'text2vec-voyageai' ? Text2VecVoyageAIConfig | undefined + : V extends 'text2vec-weaviate' + ? Text2VecWeaviateConfig | undefined : V extends 'none' ? {} : V extends undefined diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index d95f7a89..d7bc66b3 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -17,6 +17,7 @@ import { Text2VecOpenAIConfig, Text2VecTransformersConfig, Text2VecVoyageAIConfig, + Text2VecWeaviateConfig, VectorIndexType, Vectorizer, VectorizerConfigType, @@ -182,6 +183,8 @@ export type Text2VecTransformersConfigCreate = Text2VecTransformersConfig; export type Text2VecVoyageAIConfigCreate = Text2VecVoyageAIConfig; +export type Text2VecWeaviateConfigCreate = Text2VecWeaviateConfig; + export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Img2VecNeuralConfigCreate | undefined : V extends 'multi2vec-clip' @@ -226,6 +229,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Text2VecTransformersConfigCreate | undefined : V extends 'text2vec-voyageai' ? Text2VecVoyageAIConfigCreate | undefined + : V extends 'text2vec-weaviate' + ? Text2VecWeaviateConfigCreate | undefined : V extends 'none' ? {} : V extends undefined diff --git a/src/collections/configure/unit.test.ts b/src/collections/configure/unit.test.ts index cef6b655..8554a4e8 100644 --- a/src/collections/configure/unit.test.ts +++ b/src/collections/configure/unit.test.ts @@ -1342,6 +1342,45 @@ describe('Unit testing of the vectorizer factory class', () => { }, }); }); + + it('should create the correct Text2VecWeaviateConfig type with defaults', () => { + const config = configure.vectorizer.text2VecWeaviate(); + expect(config).toEqual>({ + name: undefined, + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'text2vec-weaviate', + config: undefined, + }, + }); + }); + + it('should create the correct Text2VecWeaviateConfig type with all values', () => { + const config = configure.vectorizer.text2VecWeaviate({ + name: 'test', + baseURL: 'base-url', + model: 'model', + vectorizeCollectionName: true, + }); + expect(config).toEqual>({ + name: 'test', + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'text2vec-weaviate', + config: { + baseURL: 'base-url', + model: 'model', + vectorizeCollectionName: true, + }, + }, + }); + }); }); describe('Unit testing of the generative factory class', () => { diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index 1fbcde15..6f0f7d34 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -600,4 +600,26 @@ export const vectorizer = { }, }); }, + + /** + * Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-weaviate'`. + * + * TODO: add documentation reference once available. + * + * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-weaviate` vectorizer. + * @returns {VectorConfigCreate, N, I, 'text2vec-weaviate'>} The configuration object. + */ + text2VecWeaviate: ( + opts?: ConfigureTextVectorizerOptions + ): VectorConfigCreate, N, I, 'text2vec-weaviate'> => { + const { name, sourceProperties, vectorIndexConfig, ...config } = opts || {}; + return makeVectorizer(name, { + sourceProperties, + vectorIndexConfig, + vectorizerConfig: { + name: 'text2vec-weaviate', + config: Object.keys(config).length === 0 ? undefined : config, + }, + }); + }, }; From 6faa4cfd761fa82fdbd1fa61613af24797da1aa9 Mon Sep 17 00:00:00 2001 From: Augustas Date: Wed, 27 Nov 2024 16:05:07 +0200 Subject: [PATCH 2/6] add weaviate embedding service headers --- package-lock.json | 9 +++++---- package.json | 6 +++--- src/connection/helpers.test.ts | 35 ++++++++++++++++++++++++++++++++++ src/connection/helpers.ts | 16 ++++++++++++++-- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index fa02f507..105be795 100644 --- a/package-lock.json +++ b/package-lock.json @@ -53,7 +53,7 @@ "tsup": "^8.0.2", "typedoc": "^0.25.12", "typedoc-plugin-extras": "^3.0.0", - "typescript": "5.1.3" + "typescript": "^5.3.3" }, "engines": { "node": ">=18.0.0" @@ -9448,10 +9448,11 @@ } }, "node_modules/typescript": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.1.3.tgz", - "integrity": "sha512-XH627E9vkeqhlZFQuL+UsyAXEnibT0kWR2FWONlr4sTjvxyJYnyefgrkyECLzM5NenmKzRAy2rR/OlYLA1HkZw==", + "version": "5.3.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.3.tgz", + "integrity": "sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==", "dev": true, + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/package.json b/package.json index 38e80a00..714924af 100644 --- a/package.json +++ b/package.json @@ -24,8 +24,8 @@ "test:coverage": "npm run test -- --coverage", "build": "npm run build:node", "build:web": "tsup", - "build:cjs": "tsc --module commonjs --outDir dist/node/cjs && touch dist/node/cjs/package.json && echo '{\"type\": \"commonjs\"}' > dist/node/cjs/package.json", - "build:esm": "tsc --module esnext --outDir dist/node/esm && touch dist/node/esm/package.json && echo '{\"type\": \"module\"}' > dist/node/esm/package.json", + "build:cjs": "tsc --module commonjs --moduleResolution node10 --outDir dist/node/cjs && touch dist/node/cjs/package.json && echo '{\"type\": \"commonjs\"}' > dist/node/cjs/package.json", + "build:esm": "tsc --outDir dist/node/esm && touch dist/node/esm/package.json && echo '{\"type\": \"module\"}' > dist/node/esm/package.json", "build:node": "npm run lint && npm run build:cjs && npm run build:esm && prettier --write --no-error-on-unmatched-pattern '**/dist/**/*.{ts,js}'", "prepack": "npm run build", "lint": "eslint --ext .ts,.js .", @@ -95,7 +95,7 @@ "tsup": "^8.0.2", "typedoc": "^0.25.12", "typedoc-plugin-extras": "^3.0.0", - "typescript": "5.1.3" + "typescript": "^5.3.3" }, "lint-staged": { "*.{ts,js}": [ diff --git a/src/connection/helpers.test.ts b/src/connection/helpers.test.ts index 7e0f4276..fb055fc5 100644 --- a/src/connection/helpers.test.ts +++ b/src/connection/helpers.test.ts @@ -1,4 +1,5 @@ import weaviate from '../index.js'; +import { connectToWeaviateCloud } from './helpers.js'; const WCD_URL = 'https://piblpmmdsiknacjnm1ltla.c1.europe-west3.gcp.weaviate.cloud'; const WCD_KEY = 'cy4ua772mBlMdfw3YnclqAWzFhQt0RLIN0sl'; @@ -18,6 +19,40 @@ describe('Testing of the connection helper methods', () => { }); }); + describe('adds Weaviate Embedding Service headers', () => { + it('to empty headers', async () => { + const clientMakerMock = jest.fn().mockResolvedValue(undefined); + + await connectToWeaviateCloud(WCD_URL, clientMakerMock, { + authCredentials: new weaviate.ApiKey(WCD_KEY), + }); + + expect(clientMakerMock.mock.calls[0][0]).toMatchObject({ + headers: { + 'X-Weaviate-Api-Key': WCD_KEY, + 'X-Weaviate-Cluster-Url': WCD_URL, + }, + }); + }); + + it('to existing headers', async () => { + const clientMakerMock = jest.fn().mockResolvedValue(undefined); + + await connectToWeaviateCloud(WCD_URL, clientMakerMock, { + authCredentials: new weaviate.ApiKey(WCD_KEY), + headers: { existingHeader: 'existingValue' }, + }); + + expect(clientMakerMock.mock.calls[0][0]).toMatchObject({ + headers: { + existingHeader: 'existingValue', + 'X-Weaviate-Api-Key': WCD_KEY, + 'X-Weaviate-Cluster-Url': WCD_URL, + }, + }); + }); + }); + it('should connect to a local cluster', () => { return weaviate .connectToLocal() diff --git a/src/connection/helpers.ts b/src/connection/helpers.ts index cc28c0c5..b556bb3a 100644 --- a/src/connection/helpers.ts +++ b/src/connection/helpers.ts @@ -1,6 +1,6 @@ import { WeaviateStartUpError } from '../errors.js'; import { ClientParams, WeaviateClient } from '../index.js'; -import { AuthCredentials } from './auth.js'; +import { AuthCredentials, isApiKey, mapApiKey } from './auth.js'; import { ProxiesParams, TimeoutParams } from './http.js'; /** The options available to the `weaviate.connectToWeaviateCloud` method. */ @@ -100,7 +100,7 @@ export function connectToWeaviateCloud( }, }, auth: options?.authCredentials, - headers: options?.headers, + headers: addWeaviateEmbeddingServiceHeaders(clusterURL, options), }).catch((e) => { throw new WeaviateStartUpError(`Weaviate failed to startup with message: ${e.message}`); }); @@ -155,3 +155,15 @@ export function connectToCustom( throw new WeaviateStartUpError(`Weaviate failed to startup with message: ${e.message}`); }); } + +function addWeaviateEmbeddingServiceHeaders(clusterURL: string, options?: ConnectToWeaviateCloudOptions) { + if (!isApiKey(options?.authCredentials)) { + return options?.headers; + } + + return { + ...options.headers, + 'X-Weaviate-Api-Key': mapApiKey(options.authCredentials).apiKey, + 'X-Weaviate-Cluster-Url': clusterURL, + }; +} From 7820fb1fef984e0176b3555e9566012332507d77 Mon Sep 17 00:00:00 2001 From: Augustas Date: Thu, 28 Nov 2024 09:37:00 +0200 Subject: [PATCH 3/6] format --- src/collections/config/types/vectorizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index 966f6d4a..be32f991 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -419,7 +419,7 @@ export interface Text2VecWeaviateConfig { model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string; /** Whether to vectorize the collection name. */ vectorizeCollectionName?: boolean; -}; +} export type NoVectorizerConfig = {}; From 8c2ea3916a67984bf35445c5e5c83e9082ffe477 Mon Sep 17 00:00:00 2001 From: Augustas Date: Thu, 28 Nov 2024 09:47:09 +0200 Subject: [PATCH 4/6] add additional tests --- src/connection/helpers.test.ts | 59 ++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/src/connection/helpers.test.ts b/src/connection/helpers.test.ts index fb055fc5..97f6049b 100644 --- a/src/connection/helpers.test.ts +++ b/src/connection/helpers.test.ts @@ -19,6 +19,18 @@ describe('Testing of the connection helper methods', () => { }); }); + it('should connect to a local cluster', () => { + return weaviate + .connectToLocal() + .then((client) => client.getMeta()) + .then((res: any) => { + expect(res.version).toBeDefined(); + }) + .catch((e: any) => { + throw new Error('it should not have errord: ' + e); + }); + }); + describe('adds Weaviate Embedding Service headers', () => { it('to empty headers', async () => { const clientMakerMock = jest.fn().mockResolvedValue(undefined); @@ -27,11 +39,9 @@ describe('Testing of the connection helper methods', () => { authCredentials: new weaviate.ApiKey(WCD_KEY), }); - expect(clientMakerMock.mock.calls[0][0]).toMatchObject({ - headers: { - 'X-Weaviate-Api-Key': WCD_KEY, - 'X-Weaviate-Cluster-Url': WCD_URL, - }, + expect(clientMakerMock.mock.calls[0][0].headers).toEqual({ + 'X-Weaviate-Api-Key': WCD_KEY, + 'X-Weaviate-Cluster-Url': WCD_URL, }); }); @@ -43,25 +53,34 @@ describe('Testing of the connection helper methods', () => { headers: { existingHeader: 'existingValue' }, }); - expect(clientMakerMock.mock.calls[0][0]).toMatchObject({ - headers: { - existingHeader: 'existingValue', - 'X-Weaviate-Api-Key': WCD_KEY, - 'X-Weaviate-Cluster-Url': WCD_URL, - }, + expect(clientMakerMock.mock.calls[0][0].headers).toEqual({ + existingHeader: 'existingValue', + 'X-Weaviate-Api-Key': WCD_KEY, + 'X-Weaviate-Cluster-Url': WCD_URL, }); }); }); - it('should connect to a local cluster', () => { - return weaviate - .connectToLocal() - .then((client) => client.getMeta()) - .then((res: any) => { - expect(res.version).toBeDefined(); - }) - .catch((e: any) => { - throw new Error('it should not have errord: ' + e); + describe('does not add Weaviate Embedding Service headers when not using API key', () => { + it('to empty headers', async () => { + const clientMakerMock = jest.fn().mockResolvedValue(undefined); + + await connectToWeaviateCloud(WCD_URL, clientMakerMock, { + authCredentials: new weaviate.AuthUserPasswordCredentials({ username: 'test' }), + }); + + expect(clientMakerMock.mock.calls[0][0].headers).toBe(undefined); + }); + + it('to existing headers', async () => { + const clientMakerMock = jest.fn().mockResolvedValue(undefined); + + await connectToWeaviateCloud(WCD_URL, clientMakerMock, { + authCredentials: new weaviate.AuthUserPasswordCredentials({ username: 'test' }), + headers: { existingHeader: 'existingValue' }, }); + + expect(clientMakerMock.mock.calls[0][0].headers).toEqual({ existingHeader: 'existingValue' }); + }); }); }); From a58dac37226cd384d5965253fdfb7971741acbc9 Mon Sep 17 00:00:00 2001 From: Augustas Date: Thu, 28 Nov 2024 12:21:34 +0200 Subject: [PATCH 5/6] temporarily comment Azure login test --- src/connection/journey.test.ts | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/connection/journey.test.ts b/src/connection/journey.test.ts index 52237453..b517e5e8 100644 --- a/src/connection/journey.test.ts +++ b/src/connection/journey.test.ts @@ -35,29 +35,29 @@ describe('connection', () => { }); }); - it('makes an Azure logged-in request with client credentials', async () => { - if (process.env.AZURE_CLIENT_SECRET == undefined || process.env.AZURE_CLIENT_SECRET == '') { - console.warn('Skipping because `AZURE_CLIENT_SECRET` is not set'); - return Promise.resolve(); - } - - const client = await weaviate.connectToLocal({ - port: 8081, - authCredentials: new AuthClientCredentials({ - clientSecret: process.env.AZURE_CLIENT_SECRET, - silentRefresh: false, - }), - }); - - return client - .getMeta() - .then((res) => { - expect(res.version).toBeDefined(); - }) - .catch((e) => { - throw new Error('it should not have errord: ' + e); - }); - }); + // it('makes an Azure logged-in request with client credentials', async () => { + // if (process.env.AZURE_CLIENT_SECRET == undefined || process.env.AZURE_CLIENT_SECRET == '') { + // console.warn('Skipping because `AZURE_CLIENT_SECRET` is not set'); + // return Promise.resolve(); + // } + + // const client = await weaviate.connectToLocal({ + // port: 8081, + // authCredentials: new AuthClientCredentials({ + // clientSecret: process.env.AZURE_CLIENT_SECRET, + // silentRefresh: false, + // }), + // }); + + // return client + // .getMeta() + // .then((res) => { + // expect(res.version).toBeDefined(); + // }) + // .catch((e) => { + // throw new Error('it should not have errord: ' + e); + // }); + // }); it('makes an Okta logged-in request with client credentials', async () => { if (process.env.OKTA_CLIENT_SECRET == undefined || process.env.OKTA_CLIENT_SECRET == '') { From f207ed517191638e6963abb7407ca6c8beae62d0 Mon Sep 17 00:00:00 2001 From: Augustas Date: Thu, 28 Nov 2024 13:10:03 +0200 Subject: [PATCH 6/6] switch interface to type --- src/collections/config/types/vectorizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index be32f991..051ead5c 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -412,14 +412,14 @@ export type Text2VecVoyageAIConfig = { * * TODO: add documentation reference once available. */ -export interface Text2VecWeaviateConfig { +export type Text2VecWeaviateConfig = { /** The base URL to use where API requests should go. */ baseURL?: string; /** The model to use. */ model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string; /** Whether to vectorize the collection name. */ vectorizeCollectionName?: boolean; -} +}; export type NoVectorizerConfig = {};