[desktop] ML: Clarify that existing indexed people will not show in this update (#1775)
This commit is contained in:
commit
07ba9ef1d6
17 changed files with 76 additions and 134 deletions
|
@ -315,32 +315,18 @@ const setupTrayItem = (mainWindow: BrowserWindow) => {
|
|||
|
||||
/**
|
||||
* Older versions of our app used to maintain a cache dir using the main
|
||||
* process. This has been removed in favor of cache on the web layer.
|
||||
* process. This has been removed in favor of cache on the web layer. Delete the
|
||||
* old cache dir if it exists.
|
||||
*
|
||||
* Delete the old cache dir if it exists.
|
||||
*
|
||||
* This will happen in two phases. The cache had three subdirectories:
|
||||
*
|
||||
* - Two of them, "thumbs" and "files", will be removed now (v1.7.0, May 2024).
|
||||
*
|
||||
* - The third one, "face-crops" will be removed once we finish the face search
|
||||
* changes. See: [Note: Legacy face crops].
|
||||
*
|
||||
* This migration code can be removed after some time once most people have
|
||||
* upgraded to newer versions.
|
||||
* Added May 2024, v1.7.0. This migration code can be removed after some time
|
||||
* once most people have upgraded to newer versions.
|
||||
*/
|
||||
const deleteLegacyDiskCacheDirIfExists = async () => {
|
||||
const removeIfExists = async (dirPath: string) => {
|
||||
if (existsSync(dirPath)) {
|
||||
log.info(`Removing legacy disk cache from ${dirPath}`);
|
||||
await fs.rm(dirPath, { recursive: true });
|
||||
}
|
||||
};
|
||||
// [Note: Getting the cache path]
|
||||
//
|
||||
// The existing code was passing "cache" as a parameter to getPath.
|
||||
//
|
||||
// However, "cache" is not a valid parameter to getPath. It works! (for
|
||||
// However, "cache" is not a valid parameter to getPath. It works (for
|
||||
// example, on macOS I get `~/Library/Caches`), but it is intentionally not
|
||||
// documented as part of the public API:
|
||||
//
|
||||
|
@ -353,8 +339,8 @@ const deleteLegacyDiskCacheDirIfExists = async () => {
|
|||
// @ts-expect-error "cache" works but is not part of the public API.
|
||||
const cacheDir = path.join(app.getPath("cache"), "ente");
|
||||
if (existsSync(cacheDir)) {
|
||||
await removeIfExists(path.join(cacheDir, "thumbs"));
|
||||
await removeIfExists(path.join(cacheDir, "files"));
|
||||
log.info(`Removing legacy disk cache from ${cacheDir}`);
|
||||
await fs.rm(cacheDir, { recursive: true });
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import {
|
|||
updateOnNextRestart,
|
||||
} from "./services/app-update";
|
||||
import {
|
||||
legacyFaceCrop,
|
||||
openDirectory,
|
||||
openLogDirectory,
|
||||
selectDirectory,
|
||||
|
@ -43,10 +42,10 @@ import {
|
|||
import { convertToJPEG, generateImageThumbnail } from "./services/image";
|
||||
import { logout } from "./services/logout";
|
||||
import {
|
||||
clipImageEmbedding,
|
||||
clipTextEmbeddingIfAvailable,
|
||||
computeCLIPImageEmbedding,
|
||||
computeCLIPTextEmbeddingIfAvailable,
|
||||
} from "./services/ml-clip";
|
||||
import { detectFaces, faceEmbeddings } from "./services/ml-face";
|
||||
import { computeFaceEmbeddings, detectFaces } from "./services/ml-face";
|
||||
import { encryptionKey, saveEncryptionKey } from "./services/store";
|
||||
import {
|
||||
clearPendingUploads,
|
||||
|
@ -170,24 +169,22 @@ export const attachIPCHandlers = () => {
|
|||
|
||||
// - ML
|
||||
|
||||
ipcMain.handle("clipImageEmbedding", (_, jpegImageData: Uint8Array) =>
|
||||
clipImageEmbedding(jpegImageData),
|
||||
ipcMain.handle(
|
||||
"computeCLIPImageEmbedding",
|
||||
(_, jpegImageData: Uint8Array) =>
|
||||
computeCLIPImageEmbedding(jpegImageData),
|
||||
);
|
||||
|
||||
ipcMain.handle("clipTextEmbeddingIfAvailable", (_, text: string) =>
|
||||
clipTextEmbeddingIfAvailable(text),
|
||||
ipcMain.handle("computeCLIPTextEmbeddingIfAvailable", (_, text: string) =>
|
||||
computeCLIPTextEmbeddingIfAvailable(text),
|
||||
);
|
||||
|
||||
ipcMain.handle("detectFaces", (_, input: Float32Array) =>
|
||||
detectFaces(input),
|
||||
);
|
||||
|
||||
ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
|
||||
faceEmbeddings(input),
|
||||
);
|
||||
|
||||
ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>
|
||||
legacyFaceCrop(faceID),
|
||||
ipcMain.handle("computeFaceEmbeddings", (_, input: Float32Array) =>
|
||||
computeFaceEmbeddings(input),
|
||||
);
|
||||
|
||||
// - Upload
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
import { shell } from "electron/common";
|
||||
import { app, dialog } from "electron/main";
|
||||
import { existsSync } from "fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { posixPath } from "../utils/electron";
|
||||
|
||||
|
@ -78,16 +76,3 @@ export const openLogDirectory = () => openDirectory(logDirectoryPath());
|
|||
* - Windows: %USERPROFILE%\AppData\Roaming\ente\logs\ente.log
|
||||
*/
|
||||
const logDirectoryPath = () => app.getPath("logs");
|
||||
|
||||
/**
|
||||
* See: [Note: Legacy face crops]
|
||||
*/
|
||||
export const legacyFaceCrop = async (
|
||||
faceID: string,
|
||||
): Promise<Uint8Array | undefined> => {
|
||||
// See: [Note: Getting the cache path]
|
||||
// @ts-expect-error "cache" works but is not part of the public API.
|
||||
const cacheDir = path.join(app.getPath("cache"), "ente");
|
||||
const filePath = path.join(cacheDir, "face-crops", faceID);
|
||||
return existsSync(filePath) ? await fs.readFile(filePath) : undefined;
|
||||
};
|
||||
|
|
|
@ -11,7 +11,7 @@ import * as ort from "onnxruntime-node";
|
|||
import Tokenizer from "../../thirdparty/clip-bpe-ts/mod";
|
||||
import log from "../log";
|
||||
import { writeStream } from "../stream";
|
||||
import { ensure } from "../utils/common";
|
||||
import { ensure, wait } from "../utils/common";
|
||||
import { deleteTempFile, makeTempFilePath } from "../utils/temp";
|
||||
import { makeCachedInferenceSession } from "./ml";
|
||||
|
||||
|
@ -20,7 +20,7 @@ const cachedCLIPImageSession = makeCachedInferenceSession(
|
|||
351468764 /* 335.2 MB */,
|
||||
);
|
||||
|
||||
export const clipImageEmbedding = async (jpegImageData: Uint8Array) => {
|
||||
export const computeCLIPImageEmbedding = async (jpegImageData: Uint8Array) => {
|
||||
const tempFilePath = await makeTempFilePath();
|
||||
const imageStream = new Response(jpegImageData.buffer).body;
|
||||
await writeStream(tempFilePath, ensure(imageStream));
|
||||
|
@ -42,7 +42,7 @@ const clipImageEmbedding_ = async (jpegFilePath: string) => {
|
|||
const results = await session.run(feeds);
|
||||
log.debug(
|
||||
() =>
|
||||
`onnx/clip image embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
|
||||
`ONNX/CLIP image embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
|
||||
);
|
||||
/* Need these model specific casts to type the result */
|
||||
const imageEmbedding = ensure(results.output).data as Float32Array;
|
||||
|
@ -140,21 +140,23 @@ const getTokenizer = () => {
|
|||
return _tokenizer;
|
||||
};
|
||||
|
||||
export const clipTextEmbeddingIfAvailable = async (text: string) => {
|
||||
const sessionOrStatus = await Promise.race([
|
||||
export const computeCLIPTextEmbeddingIfAvailable = async (text: string) => {
|
||||
const sessionOrSkip = await Promise.race([
|
||||
cachedCLIPTextSession(),
|
||||
"downloading-model",
|
||||
// Wait for a tick to get the session promise to resolved the first time
|
||||
// this code runs on each app start (and the model has been downloaded).
|
||||
wait(0).then(() => 1),
|
||||
]);
|
||||
|
||||
// Don't wait for the download to complete
|
||||
if (typeof sessionOrStatus == "string") {
|
||||
// Don't wait for the download to complete.
|
||||
if (typeof sessionOrSkip == "number") {
|
||||
log.info(
|
||||
"Ignoring CLIP text embedding request because model download is pending",
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const session = sessionOrStatus;
|
||||
const session = sessionOrSkip;
|
||||
const t1 = Date.now();
|
||||
const tokenizer = getTokenizer();
|
||||
const tokenizedText = Int32Array.from(tokenizer.encodeForCLIP(text));
|
||||
|
@ -165,7 +167,7 @@ export const clipTextEmbeddingIfAvailable = async (text: string) => {
|
|||
const results = await session.run(feeds);
|
||||
log.debug(
|
||||
() =>
|
||||
`onnx/clip text embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
|
||||
`ONNX/CLIP text embedding took ${Date.now() - t1} ms (prep: ${t2 - t1} ms, inference: ${Date.now() - t2} ms)`,
|
||||
);
|
||||
const textEmbedding = ensure(results.output).data as Float32Array;
|
||||
return normalizeEmbedding(textEmbedding);
|
||||
|
|
|
@ -23,7 +23,7 @@ export const detectFaces = async (input: Float32Array) => {
|
|||
input: new ort.Tensor("float32", input, [1, 3, 640, 640]),
|
||||
};
|
||||
const results = await session.run(feeds);
|
||||
log.debug(() => `onnx/yolo face detection took ${Date.now() - t} ms`);
|
||||
log.debug(() => `ONNX/YOLO face detection took ${Date.now() - t} ms`);
|
||||
return ensure(results.output).data;
|
||||
};
|
||||
|
||||
|
@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
|
|||
5286998 /* 5 MB */,
|
||||
);
|
||||
|
||||
export const faceEmbeddings = async (input: Float32Array) => {
|
||||
export const computeFaceEmbeddings = async (input: Float32Array) => {
|
||||
// Dimension of each face (alias)
|
||||
const mobileFaceNetFaceSize = 112;
|
||||
// Smaller alias
|
||||
|
@ -45,7 +45,7 @@ export const faceEmbeddings = async (input: Float32Array) => {
|
|||
const t = Date.now();
|
||||
const feeds = { img_inputs: inputTensor };
|
||||
const results = await session.run(feeds);
|
||||
log.debug(() => `onnx/yolo face embedding took ${Date.now() - t} ms`);
|
||||
log.debug(() => `ONNX/MFNT face embedding took ${Date.now() - t} ms`);
|
||||
/* Need these model specific casts to extract and type the result */
|
||||
return (results.embeddings as unknown as Record<string, unknown>)
|
||||
.cpuData as Float32Array;
|
||||
|
|
|
@ -13,3 +13,12 @@ export const ensure = <T>(v: T | null | undefined): T => {
|
|||
if (v === undefined) throw new Error("Required value was not found");
|
||||
return v;
|
||||
};
|
||||
|
||||
/**
|
||||
* Wait for {@link ms} milliseconds
|
||||
*
|
||||
* This function is a promisified `setTimeout`. It returns a promise that
|
||||
* resolves after {@link ms} milliseconds.
|
||||
*/
|
||||
export const wait = (ms: number) =>
|
||||
new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
|
|
@ -55,9 +55,7 @@ export const execAsync = async (command: string | string[]) => {
|
|||
: command;
|
||||
const startTime = Date.now();
|
||||
const result = await execAsync_(escapedCommand);
|
||||
log.debug(
|
||||
() => `${escapedCommand} (${Math.round(Date.now() - startTime)} ms)`,
|
||||
);
|
||||
log.debug(() => `${escapedCommand} (${Date.now() - startTime} ms)`);
|
||||
return result;
|
||||
};
|
||||
|
||||
|
|
|
@ -153,20 +153,17 @@ const ffmpegExec = (
|
|||
|
||||
// - ML
|
||||
|
||||
const clipImageEmbedding = (jpegImageData: Uint8Array) =>
|
||||
ipcRenderer.invoke("clipImageEmbedding", jpegImageData);
|
||||
const computeCLIPImageEmbedding = (jpegImageData: Uint8Array) =>
|
||||
ipcRenderer.invoke("computeCLIPImageEmbedding", jpegImageData);
|
||||
|
||||
const clipTextEmbeddingIfAvailable = (text: string) =>
|
||||
ipcRenderer.invoke("clipTextEmbeddingIfAvailable", text);
|
||||
const computeCLIPTextEmbeddingIfAvailable = (text: string) =>
|
||||
ipcRenderer.invoke("computeCLIPTextEmbeddingIfAvailable", text);
|
||||
|
||||
const detectFaces = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("detectFaces", input);
|
||||
|
||||
const faceEmbeddings = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("faceEmbeddings", input);
|
||||
|
||||
const legacyFaceCrop = (faceID: string) =>
|
||||
ipcRenderer.invoke("legacyFaceCrop", faceID);
|
||||
const computeFaceEmbeddings = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("computeFaceEmbeddings", input);
|
||||
|
||||
// - Watch
|
||||
|
||||
|
@ -340,11 +337,10 @@ contextBridge.exposeInMainWorld("electron", {
|
|||
|
||||
// - ML
|
||||
|
||||
clipImageEmbedding,
|
||||
clipTextEmbeddingIfAvailable,
|
||||
computeCLIPImageEmbedding,
|
||||
computeCLIPTextEmbeddingIfAvailable,
|
||||
detectFaces,
|
||||
faceEmbeddings,
|
||||
legacyFaceCrop,
|
||||
computeFaceEmbeddings,
|
||||
|
||||
// - Watch
|
||||
|
||||
|
|
|
@ -270,14 +270,7 @@ function EnableMLSearch({ onClose, enableMlSearch, onRootClose }) {
|
|||
{" "}
|
||||
<Typography color="text.muted">
|
||||
{/* <Trans i18nKey={"ENABLE_ML_SEARCH_DESCRIPTION"} /> */}
|
||||
<p>
|
||||
We're putting finishing touches, coming back soon!
|
||||
</p>
|
||||
<p>
|
||||
<small>
|
||||
Existing indexed faces will continue to show.
|
||||
</small>
|
||||
</p>
|
||||
We're putting finishing touches, coming back soon!
|
||||
</Typography>
|
||||
</Box>
|
||||
{isInternalUserForML() && (
|
||||
|
|
|
@ -151,22 +151,15 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({ faceID }) => {
|
|||
|
||||
useEffect(() => {
|
||||
let didCancel = false;
|
||||
const electron = globalThis.electron;
|
||||
|
||||
if (faceID && electron) {
|
||||
electron
|
||||
.legacyFaceCrop(faceID)
|
||||
.then(async (data) => {
|
||||
if (data) return data;
|
||||
if (faceID) {
|
||||
blobCache("face-crops")
|
||||
.then((cache) => cache.get(faceID))
|
||||
.then((data) => {
|
||||
/*
|
||||
TODO(MR): regen if needed and get this to work on web too.
|
||||
cachedOrNew("face-crops", cacheKey, async () => {
|
||||
return regenerateFaceCrop(faceId);
|
||||
})*/
|
||||
const cache = await blobCache("face-crops");
|
||||
return await cache.get(faceID);
|
||||
})
|
||||
.then((data) => {
|
||||
if (data) {
|
||||
const blob = new Blob([data]);
|
||||
if (!didCancel) setObjectURL(URL.createObjectURL(blob));
|
||||
|
|
|
@ -184,7 +184,7 @@ class CLIPService {
|
|||
};
|
||||
|
||||
getTextEmbeddingIfAvailable = async (text: string) => {
|
||||
return ensureElectron().clipTextEmbeddingIfAvailable(text);
|
||||
return ensureElectron().computeCLIPTextEmbeddingIfAvailable(text);
|
||||
};
|
||||
|
||||
private runClipEmbeddingExtraction = async (canceller: AbortController) => {
|
||||
|
@ -294,7 +294,7 @@ class CLIPService {
|
|||
const file = await localFile
|
||||
.arrayBuffer()
|
||||
.then((buffer) => new Uint8Array(buffer));
|
||||
return await ensureElectron().clipImageEmbedding(file);
|
||||
return await ensureElectron().computeCLIPImageEmbedding(file);
|
||||
};
|
||||
|
||||
private encryptAndUploadEmbedding = async (
|
||||
|
@ -328,7 +328,8 @@ class CLIPService {
|
|||
|
||||
private extractFileClipImageEmbedding = async (file: EnteFile) => {
|
||||
const thumb = await downloadManager.getThumbnail(file);
|
||||
const embedding = await ensureElectron().clipImageEmbedding(thumb);
|
||||
const embedding =
|
||||
await ensureElectron().computeCLIPImageEmbedding(thumb);
|
||||
return embedding;
|
||||
};
|
||||
|
||||
|
|
|
@ -57,8 +57,8 @@ export const indexFaces = async (enteFile: EnteFile, localFile?: File) => {
|
|||
}
|
||||
|
||||
log.debug(() => {
|
||||
const ms = Math.round(Date.now() - startTime);
|
||||
const nf = mlFile.faces?.length ?? 0;
|
||||
const ms = Date.now() - startTime;
|
||||
return `Indexed ${nf} faces in file ${enteFile.id} (${ms} ms)`;
|
||||
});
|
||||
return mlFile;
|
||||
|
@ -625,7 +625,7 @@ const mobileFaceNetEmbeddingSize = 192;
|
|||
const computeEmbeddings = async (
|
||||
faceData: Float32Array,
|
||||
): Promise<Float32Array[]> => {
|
||||
const outputData = await workerBridge.faceEmbeddings(faceData);
|
||||
const outputData = await workerBridge.computeFaceEmbeddings(faceData);
|
||||
|
||||
const embeddingSize = mobileFaceNetEmbeddingSize;
|
||||
const embeddings = new Array<Float32Array>(
|
||||
|
|
|
@ -51,9 +51,7 @@ class HEICConverter {
|
|||
const startTime = Date.now();
|
||||
const convertedHEIC =
|
||||
await worker.heicToJPEG(fileBlob);
|
||||
const ms = Math.round(
|
||||
Date.now() - startTime,
|
||||
);
|
||||
const ms = Date.now() - startTime;
|
||||
log.debug(() => `heic => jpeg (${ms} ms)`);
|
||||
clearTimeout(timeout);
|
||||
resolve(convertedHEIC);
|
||||
|
|
|
@ -82,7 +82,7 @@ const ffmpegExec = async (
|
|||
|
||||
const result = ffmpeg.FS("readFile", outputPath);
|
||||
|
||||
const ms = Math.round(Date.now() - startTime);
|
||||
const ms = Date.now() - startTime;
|
||||
log.debug(() => `[wasm] ffmpeg ${cmd.join(" ")} (${ms} ms)`);
|
||||
return result;
|
||||
} finally {
|
||||
|
|
|
@ -297,7 +297,9 @@ export interface Electron {
|
|||
*
|
||||
* @returns A CLIP embedding.
|
||||
*/
|
||||
clipImageEmbedding: (jpegImageData: Uint8Array) => Promise<Float32Array>;
|
||||
computeCLIPImageEmbedding: (
|
||||
jpegImageData: Uint8Array,
|
||||
) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Return a CLIP embedding of the given image if we already have the model
|
||||
|
@ -319,7 +321,7 @@ export interface Electron {
|
|||
*
|
||||
* @returns A CLIP embedding.
|
||||
*/
|
||||
clipTextEmbeddingIfAvailable: (
|
||||
computeCLIPTextEmbeddingIfAvailable: (
|
||||
text: string,
|
||||
) => Promise<Float32Array | undefined>;
|
||||
|
||||
|
@ -337,29 +339,7 @@ export interface Electron {
|
|||
* Both the input and output are opaque binary data whose internal structure
|
||||
* is specific to our implementation and the model (MobileFaceNet) we use.
|
||||
*/
|
||||
faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Return a face crop stored by a previous version of ML.
|
||||
*
|
||||
* [Note: Legacy face crops]
|
||||
*
|
||||
* Older versions of ML generated and stored face crops in a "face-crops"
|
||||
* cache directory on the Electron side. For the time being, we have
|
||||
* disabled the face search whilst we put finishing touches to it. However,
|
||||
* it'll be nice to still show the existing faces that have been clustered
|
||||
* for people who opted in to the older beta.
|
||||
*
|
||||
* So we retain the older "face-crops" disk cache, and use this method to
|
||||
* serve faces from it when needed.
|
||||
*
|
||||
* @param faceID An identifier corresponding to which the face crop had been
|
||||
* stored by the older version of our app.
|
||||
*
|
||||
* @returns the JPEG data of the face crop if a file is found for the given
|
||||
* {@link faceID}, otherwise undefined.
|
||||
*/
|
||||
legacyFaceCrop: (faceID: string) => Promise<Uint8Array | undefined>;
|
||||
computeFaceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
|
||||
|
||||
// - Watch
|
||||
|
||||
|
|
|
@ -47,8 +47,8 @@ const workerBridge = {
|
|||
convertToJPEG: (imageData: Uint8Array) =>
|
||||
ensureElectron().convertToJPEG(imageData),
|
||||
detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
|
||||
faceEmbeddings: (input: Float32Array) =>
|
||||
ensureElectron().faceEmbeddings(input),
|
||||
computeFaceEmbeddings: (input: Float32Array) =>
|
||||
ensureElectron().computeFaceEmbeddings(input),
|
||||
};
|
||||
|
||||
export type WorkerBridge = typeof workerBridge;
|
||||
|
|
|
@ -10,6 +10,10 @@ export const wait = (ms: number) =>
|
|||
/**
|
||||
* Await the given {@link promise} for {@link timeoutMS} milliseconds. If it
|
||||
* does not resolve within {@link timeoutMS}, then reject with a timeout error.
|
||||
*
|
||||
* Note that this does not abort {@link promise} itself - it will still get
|
||||
* resolved to completion, just its result will be ignored if it gets resolved
|
||||
* after we've already timed out.
|
||||
*/
|
||||
export const withTimeout = async <T>(promise: Promise<T>, ms: number) => {
|
||||
let timeoutId: ReturnType<typeof setTimeout>;
|
||||
|
|
Loading…
Add table
Reference in a new issue