Browse Source

[desktop] ML: Clarify that existing indexed people will not show in this update (#1775)

Manav Rathi 1 year ago
parent
commit
07ba9ef1d6

+ 7 - 21
desktop/src/main.ts

@@ -315,32 +315,18 @@ const setupTrayItem = (mainWindow: BrowserWindow) => {
 
 /**
  * Older versions of our app used to maintain a cache dir using the main
- * process. This has been removed in favor of cache on the web layer.
+ * process. This has been removed in favor of cache on the web layer. Delete the
+ * old cache dir if it exists.
  *
- * Delete the old cache dir if it exists.
- *
- * This will happen in two phases. The cache had three subdirectories:
- *
- * - Two of them, "thumbs" and "files", will be removed now (v1.7.0, May 2024).
- *
- * - The third one, "face-crops" will be removed once we finish the face search
- *   changes. See: [Note: Legacy face crops].
- *
- * This migration code can be removed after some time once most people have
- * upgraded to newer versions.
+ * Added May 2024, v1.7.0. This migration code can be removed after some time
+ * once most people have upgraded to newer versions.
  */
 const deleteLegacyDiskCacheDirIfExists = async () => {
-    const removeIfExists = async (dirPath: string) => {
-        if (existsSync(dirPath)) {
-            log.info(`Removing legacy disk cache from ${dirPath}`);
-            await fs.rm(dirPath, { recursive: true });
-        }
-    };
     // [Note: Getting the cache path]
     //
     // The existing code was passing "cache" as a parameter to getPath.
     //
-    // However, "cache" is not a valid parameter to getPath. It works! (for
+    // However, "cache" is not a valid parameter to getPath. It works (for
     // example, on macOS I get `~/Library/Caches`), but it is intentionally not
     // documented as part of the public API:
     //
@@ -353,8 +339,8 @@ const deleteLegacyDiskCacheDirIfExists = async () => {
     // @ts-expect-error "cache" works but is not part of the public API.
     const cacheDir = path.join(app.getPath("cache"), "ente");
     if (existsSync(cacheDir)) {
-        await removeIfExists(path.join(cacheDir, "thumbs"));
-        await removeIfExists(path.join(cacheDir, "files"));
+        log.info(`Removing legacy disk cache from ${cacheDir}`);
+        await fs.rm(cacheDir, { recursive: true });
     }
 };
 

+ 11 - 14
desktop/src/main/ipc.ts

@@ -24,7 +24,6 @@ import {
     updateOnNextRestart,
 } from "./services/app-update";
 import {
-    legacyFaceCrop,
     openDirectory,
     openLogDirectory,
     selectDirectory,
@@ -43,10 +42,10 @@ import {
 import { convertToJPEG, generateImageThumbnail } from "./services/image";
 import { logout } from "./services/logout";
 import {
-    clipImageEmbedding,
-    clipTextEmbeddingIfAvailable,
+    computeCLIPImageEmbedding,
+    computeCLIPTextEmbeddingIfAvailable,
 } from "./services/ml-clip";
-import { detectFaces, faceEmbeddings } from "./services/ml-face";
+import { computeFaceEmbeddings, detectFaces } from "./services/ml-face";
 import { encryptionKey, saveEncryptionKey } from "./services/store";
 import {
     clearPendingUploads,
@@ -170,24 +169,22 @@ export const attachIPCHandlers = () => {
 
     // - ML
 
-    ipcMain.handle("clipImageEmbedding", (_, jpegImageData: Uint8Array) =>
-        clipImageEmbedding(jpegImageData),
+    ipcMain.handle(
+        "computeCLIPImageEmbedding",
+        (_, jpegImageData: Uint8Array) =>
+            computeCLIPImageEmbedding(jpegImageData),
     );
 
-    ipcMain.handle("clipTextEmbeddingIfAvailable", (_, text: string) =>
-        clipTextEmbeddingIfAvailable(text),
+    ipcMain.handle("computeCLIPTextEmbeddingIfAvailable", (_, text: string) =>
+        computeCLIPTextEmbeddingIfAvailable(text),
     );
 
     ipcMain.handle("detectFaces", (_, input: Float32Array) =>
         detectFaces(input),
     );
 
-    ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
-        faceEmbeddings(input),
-    );
-
-    ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>
-        legacyFaceCrop(faceID),
+    ipcMain.handle("computeFaceEmbeddings", (_, input: Float32Array) =>
+        computeFaceEmbeddings(input),
     );
 
     // - Upload

+ 0 - 15
desktop/src/main/services/dir.ts

@@ -1,7 +1,5 @@
 import { shell } from "electron/common";
 import { app, dialog } from "electron/main";
-import { existsSync } from "fs";
-import fs from "node:fs/promises";
 import path from "node:path";
 import { posixPath } from "../utils/electron";
 
@@ -78,16 +76,3 @@ export const openLogDirectory = () => openDirectory(logDirectoryPath());
  * - Windows: %USERPROFILE%\AppData\Roaming\ente\logs\ente.log
  */
 const logDirectoryPath = () => app.getPath("logs");
-
-/**
- * See: [Note: Legacy face crops]
- */
-export const legacyFaceCrop = async (
-    faceID: string,
-): Promise<Uint8Array | undefined> => {
-    // See: [Note: Getting the cache path]
-    // @ts-expect-error "cache" works but is not part of the public API.
-    const cacheDir = path.join(app.getPath("cache"), "ente");
-    const filePath = path.join(cacheDir, "face-crops", faceID);
-    return existsSync(filePath) ? await fs.readFile(filePath) : undefined;
-};

+ 12 - 10
desktop/src/main/services/ml-clip.ts

@@ -11,7 +11,7 @@ import * as ort from "onnxruntime-node";
 import Tokenizer from "../../thirdparty/clip-bpe-ts/mod";
 import log from "../log";
 import { writeStream } from "../stream";
-import { ensure } from "../utils/common";
+import { ensure, wait } from "../utils/common";
 import { deleteTempFile, makeTempFilePath } from "../utils/temp";
 import { makeCachedInferenceSession } from "./ml";
 
@@ -20,7 +20,7 @@ const cachedCLIPImageSession = makeCachedInferenceSession(
     351468764 /* 335.2 MB */,
 );
 
-export const clipImageEmbedding = async (jpegImageData: Uint8Array) => {
+export const computeCLIPImageEmbedding = async (jpegImageData: Uint8Array) => {
     const tempFilePath = await makeTempFilePath();
     const imageStream = new Response(jpegImageData.buffer).body;
     await writeStream(tempFilePath, ensure(imageStream));
@@ -42,7 +42,7 @@ const clipImageEmbedding_ = async (jpegFilePath: string) => {
     const results = await session.run(feeds);
     log.debug(
         () =>
-            `onnx/clip image embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
+            `ONNX/CLIP image embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
     );
     /* Need these model specific casts to type the result */
     const imageEmbedding = ensure(results.output).data as Float32Array;
@@ -140,21 +140,23 @@ const getTokenizer = () => {
     return _tokenizer;
 };
 
-export const clipTextEmbeddingIfAvailable = async (text: string) => {
-    const sessionOrStatus = await Promise.race([
+export const computeCLIPTextEmbeddingIfAvailable = async (text: string) => {
+    const sessionOrSkip = await Promise.race([
         cachedCLIPTextSession(),
-        "downloading-model",
+        // Wait for a tick to get the session promise to resolved the first time
+        // this code runs on each app start (and the model has been downloaded).
+        wait(0).then(() => 1),
     ]);
 
-    // Don't wait for the download to complete
-    if (typeof sessionOrStatus == "string") {
+    // Don't wait for the download to complete.
+    if (typeof sessionOrSkip == "number") {
         log.info(
             "Ignoring CLIP text embedding request because model download is pending",
         );
         return undefined;
     }
 
-    const session = sessionOrStatus;
+    const session = sessionOrSkip;
     const t1 = Date.now();
     const tokenizer = getTokenizer();
     const tokenizedText = Int32Array.from(tokenizer.encodeForCLIP(text));
@@ -165,7 +167,7 @@ export const clipTextEmbeddingIfAvailable = async (text: string) => {
     const results = await session.run(feeds);
     log.debug(
         () =>
-            `onnx/clip text embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
+            `ONNX/CLIP text embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
     );
     const textEmbedding = ensure(results.output).data as Float32Array;
     return normalizeEmbedding(textEmbedding);

+ 3 - 3
desktop/src/main/services/ml-face.ts

@@ -23,7 +23,7 @@ export const detectFaces = async (input: Float32Array) => {
         input: new ort.Tensor("float32", input, [1, 3, 640, 640]),
     };
     const results = await session.run(feeds);
-    log.debug(() => `onnx/yolo face detection took ${Date.now() - t} ms`);
+    log.debug(() => `ONNX/YOLO face detection took ${Date.now() - t} ms`);
     return ensure(results.output).data;
 };
 
@@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
     5286998 /* 5 MB */,
 );
 
-export const faceEmbeddings = async (input: Float32Array) => {
+export const computeFaceEmbeddings = async (input: Float32Array) => {
     // Dimension of each face (alias)
     const mobileFaceNetFaceSize = 112;
     // Smaller alias
@@ -45,7 +45,7 @@ export const faceEmbeddings = async (input: Float32Array) => {
     const t = Date.now();
     const feeds = { img_inputs: inputTensor };
     const results = await session.run(feeds);
-    log.debug(() => `onnx/yolo face embedding took ${Date.now() - t} ms`);
+    log.debug(() => `ONNX/MFNT face embedding took ${Date.now() - t} ms`);
     /* Need these model specific casts to extract and type the result */
     return (results.embeddings as unknown as Record<string, unknown>)
         .cpuData as Float32Array;

+ 9 - 0
desktop/src/main/utils/common.ts

@@ -13,3 +13,12 @@ export const ensure = <T>(v: T | null | undefined): T => {
     if (v === undefined) throw new Error("Required value was not found");
     return v;
 };
+
+/**
+ * Wait for {@link ms} milliseconds
+ *
+ * This function is a promisified `setTimeout`. It returns a promise that
+ * resolves after {@link ms} milliseconds.
+ */
+export const wait = (ms: number) =>
+    new Promise((resolve) => setTimeout(resolve, ms));

+ 1 - 3
desktop/src/main/utils/electron.ts

@@ -55,9 +55,7 @@ export const execAsync = async (command: string | string[]) => {
         : command;
     const startTime = Date.now();
     const result = await execAsync_(escapedCommand);
-    log.debug(
-        () => `${escapedCommand} (${Math.round(Date.now() - startTime)} ms)`,
-    );
+    log.debug(() => `${escapedCommand} (${Date.now() - startTime} ms)`);
     return result;
 };
 

+ 9 - 13
desktop/src/preload.ts

@@ -153,20 +153,17 @@ const ffmpegExec = (
 
 // - ML
 
-const clipImageEmbedding = (jpegImageData: Uint8Array) =>
-    ipcRenderer.invoke("clipImageEmbedding", jpegImageData);
+const computeCLIPImageEmbedding = (jpegImageData: Uint8Array) =>
+    ipcRenderer.invoke("computeCLIPImageEmbedding", jpegImageData);
 
-const clipTextEmbeddingIfAvailable = (text: string) =>
-    ipcRenderer.invoke("clipTextEmbeddingIfAvailable", text);
+const computeCLIPTextEmbeddingIfAvailable = (text: string) =>
+    ipcRenderer.invoke("computeCLIPTextEmbeddingIfAvailable", text);
 
 const detectFaces = (input: Float32Array) =>
     ipcRenderer.invoke("detectFaces", input);
 
-const faceEmbeddings = (input: Float32Array) =>
-    ipcRenderer.invoke("faceEmbeddings", input);
-
-const legacyFaceCrop = (faceID: string) =>
-    ipcRenderer.invoke("legacyFaceCrop", faceID);
+const computeFaceEmbeddings = (input: Float32Array) =>
+    ipcRenderer.invoke("computeFaceEmbeddings", input);
 
 // - Watch
 
@@ -340,11 +337,10 @@ contextBridge.exposeInMainWorld("electron", {
 
     // - ML
 
-    clipImageEmbedding,
-    clipTextEmbeddingIfAvailable,
+    computeCLIPImageEmbedding,
+    computeCLIPTextEmbeddingIfAvailable,
     detectFaces,
-    faceEmbeddings,
-    legacyFaceCrop,
+    computeFaceEmbeddings,
 
     // - Watch
 

+ 1 - 8
web/apps/photos/src/components/ml/MLSearchSettings.tsx

@@ -270,14 +270,7 @@ function EnableMLSearch({ onClose, enableMlSearch, onRootClose }) {
                     {" "}
                     <Typography color="text.muted">
                         {/* <Trans i18nKey={"ENABLE_ML_SEARCH_DESCRIPTION"} /> */}
-                        <p>
-                            We're putting finishing touches, coming back soon!
-                        </p>
-                        <p>
-                            <small>
-                                Existing indexed faces will continue to show.
-                            </small>
-                        </p>
+                        We're putting finishing touches, coming back soon!
                     </Typography>
                 </Box>
                 {isInternalUserForML() && (

+ 4 - 11
web/apps/photos/src/components/ml/PeopleList.tsx

@@ -151,22 +151,15 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({ faceID }) => {
 
     useEffect(() => {
         let didCancel = false;
-        const electron = globalThis.electron;
-
-        if (faceID && electron) {
-            electron
-                .legacyFaceCrop(faceID)
-                .then(async (data) => {
-                    if (data) return data;
+        if (faceID) {
+            blobCache("face-crops")
+                .then((cache) => cache.get(faceID))
+                .then((data) => {
                     /*
                     TODO(MR): regen if needed and get this to work on web too.
                     cachedOrNew("face-crops", cacheKey, async () => {
                         return regenerateFaceCrop(faceId);
                     })*/
-                    const cache = await blobCache("face-crops");
-                    return await cache.get(faceID);
-                })
-                .then((data) => {
                     if (data) {
                         const blob = new Blob([data]);
                         if (!didCancel) setObjectURL(URL.createObjectURL(blob));

+ 4 - 3
web/apps/photos/src/services/clip-service.ts

@@ -184,7 +184,7 @@ class CLIPService {
     };
 
     getTextEmbeddingIfAvailable = async (text: string) => {
-        return ensureElectron().clipTextEmbeddingIfAvailable(text);
+        return ensureElectron().computeCLIPTextEmbeddingIfAvailable(text);
     };
 
     private runClipEmbeddingExtraction = async (canceller: AbortController) => {
@@ -294,7 +294,7 @@ class CLIPService {
         const file = await localFile
             .arrayBuffer()
             .then((buffer) => new Uint8Array(buffer));
-        return await ensureElectron().clipImageEmbedding(file);
+        return await ensureElectron().computeCLIPImageEmbedding(file);
     };
 
     private encryptAndUploadEmbedding = async (
@@ -328,7 +328,8 @@ class CLIPService {
 
     private extractFileClipImageEmbedding = async (file: EnteFile) => {
         const thumb = await downloadManager.getThumbnail(file);
-        const embedding = await ensureElectron().clipImageEmbedding(thumb);
+        const embedding =
+            await ensureElectron().computeCLIPImageEmbedding(thumb);
         return embedding;
     };
 

+ 2 - 2
web/apps/photos/src/services/face/f-index.ts

@@ -57,8 +57,8 @@ export const indexFaces = async (enteFile: EnteFile, localFile?: File) => {
     }
 
     log.debug(() => {
-        const ms = Math.round(Date.now() - startTime);
         const nf = mlFile.faces?.length ?? 0;
+        const ms = Date.now() - startTime;
         return `Indexed ${nf} faces in file ${enteFile.id} (${ms} ms)`;
     });
     return mlFile;
@@ -625,7 +625,7 @@ const mobileFaceNetEmbeddingSize = 192;
 const computeEmbeddings = async (
     faceData: Float32Array,
 ): Promise<Float32Array[]> => {
-    const outputData = await workerBridge.faceEmbeddings(faceData);
+    const outputData = await workerBridge.computeFaceEmbeddings(faceData);
 
     const embeddingSize = mobileFaceNetEmbeddingSize;
     const embeddings = new Array<Float32Array>(

+ 1 - 3
web/apps/photos/src/services/heic-convert.ts

@@ -51,9 +51,7 @@ class HEICConverter {
                                     const startTime = Date.now();
                                     const convertedHEIC =
                                         await worker.heicToJPEG(fileBlob);
-                                    const ms = Math.round(
-                                        Date.now() - startTime,
-                                    );
+                                    const ms = Date.now() - startTime;
                                     log.debug(() => `heic => jpeg (${ms} ms)`);
                                     clearTimeout(timeout);
                                     resolve(convertedHEIC);

+ 1 - 1
web/apps/photos/src/worker/ffmpeg.worker.ts

@@ -82,7 +82,7 @@ const ffmpegExec = async (
 
         const result = ffmpeg.FS("readFile", outputPath);
 
-        const ms = Math.round(Date.now() - startTime);
+        const ms = Date.now() - startTime;
         log.debug(() => `[wasm] ffmpeg ${cmd.join(" ")} (${ms} ms)`);
         return result;
     } finally {

+ 5 - 25
web/packages/next/types/ipc.ts

@@ -297,7 +297,9 @@ export interface Electron {
      *
      * @returns A CLIP embedding.
      */
-    clipImageEmbedding: (jpegImageData: Uint8Array) => Promise<Float32Array>;
+    computeCLIPImageEmbedding: (
+        jpegImageData: Uint8Array,
+    ) => Promise<Float32Array>;
 
     /**
      * Return a CLIP embedding of the given image if we already have the model
@@ -319,7 +321,7 @@ export interface Electron {
      *
      * @returns A CLIP embedding.
      */
-    clipTextEmbeddingIfAvailable: (
+    computeCLIPTextEmbeddingIfAvailable: (
         text: string,
     ) => Promise<Float32Array | undefined>;
 
@@ -337,29 +339,7 @@ export interface Electron {
      * Both the input and output are opaque binary data whose internal structure
      * is specific to our implementation and the model (MobileFaceNet) we use.
      */
-    faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
-
-    /**
-     * Return a face crop stored by a previous version of ML.
-     *
-     * [Note: Legacy face crops]
-     *
-     * Older versions of ML generated and stored face crops in a "face-crops"
-     * cache directory on the Electron side. For the time being, we have
-     * disabled the face search whilst we put finishing touches to it. However,
-     * it'll be nice to still show the existing faces that have been clustered
-     * for people who opted in to the older beta.
-     *
-     * So we retain the older "face-crops" disk cache, and use this method to
-     * serve faces from it when needed.
-     *
-     * @param faceID An identifier corresponding to which the face crop had been
-     * stored by the older version of our app.
-     *
-     * @returns the JPEG data of the face crop if a file is found for the given
-     * {@link faceID}, otherwise undefined.
-     */
-    legacyFaceCrop: (faceID: string) => Promise<Uint8Array | undefined>;
+    computeFaceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
 
     // - Watch
 

+ 2 - 2
web/packages/next/worker/comlink-worker.ts

@@ -47,8 +47,8 @@ const workerBridge = {
     convertToJPEG: (imageData: Uint8Array) =>
         ensureElectron().convertToJPEG(imageData),
     detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
-    faceEmbeddings: (input: Float32Array) =>
-        ensureElectron().faceEmbeddings(input),
+    computeFaceEmbeddings: (input: Float32Array) =>
+        ensureElectron().computeFaceEmbeddings(input),
 };
 
 export type WorkerBridge = typeof workerBridge;

+ 4 - 0
web/packages/utils/promise.ts

@@ -10,6 +10,10 @@ export const wait = (ms: number) =>
 /**
  * Await the given {@link promise} for {@link timeoutMS} milliseconds. If it
  * does not resolve within {@link timeoutMS}, then reject with a timeout error.
+ *
+ * Note that this does not abort {@link promise} itself - it will still get
+ * resolved to completion, just its result will be ignored if it gets resolved
+ * after we've already timed out.
  */
 export const withTimeout = async <T>(promise: Promise<T>, ms: number) => {
     let timeoutId: ReturnType<typeof setTimeout>;