diff --git a/web/apps/photos/src/services/clip-service.ts b/web/apps/photos/src/services/clip-service.ts index a1f3cd1a6..6099edbd9 100644 --- a/web/apps/photos/src/services/clip-service.ts +++ b/web/apps/photos/src/services/clip-service.ts @@ -41,29 +41,29 @@ export interface CLIPIndexingStatus { * 3. Use cosine similarity to find the find the image (embedding) closest to * the text (embedding). * - * More details are in the blog post that describes the initial launch of this - * feature using the GGML runtime: - * https://ente.io/blog/image-search-with-clip-ggml/ + * More details are in our [blog + * post](https://ente.io/blog/image-search-with-clip-ggml/) that describes the + * initial launch of this feature using the GGML runtime. * - * Since the initial launch, we've added support for another runtime, ONNX. + * Since the initial launch, we've switched over to another runtime, + * [ONNX](https://onnxruntime.ai). * - * Note that we don't train the neural network - we use one of the publicly - * available pre-trained neural networks (which are wholly defined by their - * connectivity and weights), and use one of the standard ML runtimes to load - * these weights and instantiate a running network that we can use to compute - * the embeddings. Theoretically, the same CLIP model can be loaded by different - * frameworks / runtimes, but in practice each runtime has its own preferred - * format, and there are also quantization tradeoffs. So for each runtime that - * we support we download a distinct model (binary encoding of weights). + * Note that we don't train the neural network - we only use one of the publicly + * available pre-trained neural networks for inference. These neural networks + * are wholly defined by their connectivity and weights. ONNX, our ML runtimes, + * loads these weights and instantiates a running network that we can use to + * compute the embeddings. * - * Currently supported runtimes are: + * Theoretically, the same CLIP model can be loaded by different frameworks / + * runtimes, but in practice each runtime has its own preferred format, and + * there are also quantization tradeoffs. So there is a specific model (a binary + * encoding of weights) tied to our current runtime that we use. * - * - [GGML](https://github.com/monatis/clip.cpp) - * - [ONNX](https://onnxruntime.ai) - * - * Both these currently have one (and only one) associated model. + * To ensure that the embeddings, for the most part, can be shared, whenever + * possible we try to ensure that all the preprocessing steps, and the model + * itself, is the same across clients - web and mobile. */ -class ClipService { +class CLIPService { private embeddingExtractionInProgress: AbortController | null = null; private reRunNeeded = false; private indexingStatus: CLIPIndexingStatus = { @@ -372,7 +372,7 @@ class ClipService { }; } -export const clipService = new ClipService(); +export const clipService = new CLIPService(); const getNonClipEmbeddingExtractedFiles = async ( files: EnteFile[], diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 8e0e0680c..b85005db0 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -4,7 +4,6 @@ import * as chrono from "chrono-node"; import { FILE_TYPE } from "constants/file"; import { t } from "i18next"; import { Collection } from "types/collection"; -import { Model } from "types/embedding"; import { EntityType, LocationTag, LocationTagData } from "types/entity"; import { EnteFile } from "types/file"; import { Person, Thing } from "types/machineLearning"; @@ -396,7 +395,7 @@ async function searchThing(searchPhrase: string) { } async function searchClip(searchPhrase: string): Promise { - const imageEmbeddings = await getLocalEmbeddings(Model.ONNX_CLIP); + const imageEmbeddings = await getLocalEmbeddings(); const textEmbedding = await clipService.getTextEmbedding(searchPhrase); const clipSearchResult = new Map( ( diff --git a/web/apps/photos/src/types/embedding.tsx b/web/apps/photos/src/types/embedding.tsx index 3626e0fad..b618ed315 100644 --- a/web/apps/photos/src/types/embedding.tsx +++ b/web/apps/photos/src/types/embedding.tsx @@ -1,5 +1,4 @@ export enum Model { - GGML_CLIP = "ggml-clip", ONNX_CLIP = "onnx-clip", } diff --git a/web/packages/next/types/ipc.ts b/web/packages/next/types/ipc.ts index 43eaee575..4a1539cc3 100644 --- a/web/packages/next/types/ipc.ts +++ b/web/packages/next/types/ipc.ts @@ -10,12 +10,7 @@ export interface AppUpdateInfo { version: string; } -export type CLIPModel = "ggml-clip" | "onnx-clip"; - -export const isCLIPModel = (s: unknown) => s == "ggml-clip" || s == "onnx-clip"; - export enum Model { - GGML_CLIP = "ggml-clip", ONNX_CLIP = "onnx-clip", }