diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index 550b87c9e0b..d7b73f9a256 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -53,7 +53,7 @@ async function toStringArray( describe('ChromeAdapter', () => { describe('constructor', () => { - it('sets image as expected input type by default', async () => { + it('determines expected inputs by request inspection', async () => { const languageModelProvider = { availability: () => Promise.resolve(Availability.available) } as LanguageModel; @@ -69,7 +69,11 @@ describe('ChromeAdapter', () => { contents: [ { role: 'user', - parts: [{ text: 'hi' }] + parts: [ + { text: 'hi' }, + // Triggers image as expected type. + { inlineData: { mimeType: 'image/asd', data: 'asd' } } + ] } ] }); diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 9ac8f350a02..76575ff0e07 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -27,7 +27,9 @@ import { Availability, LanguageModel, LanguageModelCreateOptions, - LanguageModelMessageContent + LanguageModelExpectedInput, + LanguageModelMessageContent, + LanguageModelMessageType } from '../types/language-model'; /** @@ -44,9 +46,7 @@ export class ChromeAdapter { private languageModelProvider?: LanguageModel, private mode?: InferenceMode, private onDeviceParams: LanguageModelCreateOptions = {} - ) { - this.addImageTypeAsExpectedInput(); - } + ) {} /** * Checks if a given request can be made on-device. @@ -68,8 +68,10 @@ export class ChromeAdapter { return false; } + const expectedInputs = ChromeAdapter.extractExpectedInputs(request); + // Triggers out-of-band download so model will eventually become available. - const availability = await this.downloadIfAvailable(); + const availability = await this.downloadIfAvailable(expectedInputs); if (this.mode === 'only_on_device') { return true; @@ -129,6 +131,33 @@ export class ChromeAdapter { ); } + /** + * Maps + * + * Vertex's input mime types to + * + * Chrome's expected types. + * + *

Chrome's API checks availability by type. It's tedious to specify the types in advance, so + * this method infers the types.

+ */ + private static extractExpectedInputs( + request: GenerateContentRequest + ): LanguageModelExpectedInput[] { + const inputSet = new Set(); + for (const content of request.contents) { + for (const part of content.parts) { + if (part.inlineData) { + const type = part.inlineData.mimeType.split( + '/' + )[0] as LanguageModelMessageType; + inputSet.add({ type }); + } + } + } + return Array.from(inputSet); + } + /** * Asserts inference for the given request can be performed by an on-device model. */ @@ -164,12 +193,20 @@ export class ChromeAdapter { /** * Encapsulates logic to get availability and download a model if one is downloadable. */ - private async downloadIfAvailable(): Promise { + private async downloadIfAvailable( + expectedInputs: LanguageModelExpectedInput[] + ): Promise { + // Side-effect: updates construction-time params with request-time params. + // This is required because params are referenced through multiple flows. + Object.assign(this.onDeviceParams, { expectedInputs }); + const availability = await this.languageModelProvider?.availability( this.onDeviceParams ); if (availability === Availability.downloadable) { + // Side-effect: triggers out-of-band model download. + // This is required because Chrome manages the model download. this.download(); } @@ -252,11 +289,6 @@ export class ChromeAdapter { return newSession; } - private addImageTypeAsExpectedInput(): void { - // Defaults to support image inputs for convenience. - this.onDeviceParams.expectedInputs ??= [{ type: 'image' }]; - } - /** * Formats string returned by Chrome as a {@link Response} returned by Vertex. */ diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts index cd84f22dbdb..301287d462a 100644 --- a/packages/vertexai/src/types/language-model.ts +++ b/packages/vertexai/src/types/language-model.ts @@ -52,7 +52,7 @@ export interface LanguageModelCreateOptions interface LanguageModelPromptOptions { signal?: AbortSignal; } -interface LanguageModelExpectedInput { +export interface LanguageModelExpectedInput { type: LanguageModelMessageType; languages?: string[]; } @@ -74,7 +74,7 @@ export interface LanguageModelMessageContent { content: LanguageModelMessageContentValue; } type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; -type LanguageModelMessageType = 'text' | 'image' | 'audio'; +export type LanguageModelMessageType = 'text' | 'image' | 'audio'; type LanguageModelMessageContentValue = | ImageBitmapSource | AudioBuffer