embeddings

This commit is contained in:
Manav Rathi 2024-05-16 13:41:50 +05:30
parent 73946d9b8e
commit 43a3df5bbf
No known key found for this signature in database
10 changed files with 40 additions and 55 deletions

View file

@ -46,7 +46,7 @@ import {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
} from "./services/ml-clip";
import { detectFaces, faceEmbedding } from "./services/ml-face";
import { detectFaces, faceEmbeddings } from "./services/ml-face";
import { encryptionKey, saveEncryptionKey } from "./services/store";
import {
clearPendingUploads,
@ -182,8 +182,8 @@ export const attachIPCHandlers = () => {
detectFaces(input),
);
ipcMain.handle("faceEmbedding", (_, input: Float32Array) =>
faceEmbedding(input),
ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
faceEmbeddings(input),
);
ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>

View file

@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
5286998 /* 5 MB */,
);
export const faceEmbedding = async (input: Float32Array) => {
export const faceEmbeddings = async (input: Float32Array) => {
// Dimension of each face (alias)
const mobileFaceNetFaceSize = 112;
// Smaller alias

View file

@ -162,8 +162,8 @@ const clipTextEmbeddingIfAvailable = (text: string) =>
const detectFaces = (input: Float32Array) =>
ipcRenderer.invoke("detectFaces", input);
const faceEmbedding = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbedding", input);
const faceEmbeddings = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbeddings", input);
const legacyFaceCrop = (faceID: string) =>
ipcRenderer.invoke("legacyFaceCrop", faceID);
@ -343,7 +343,7 @@ contextBridge.exposeInMainWorld("electron", {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
detectFaces,
faceEmbedding,
faceEmbeddings,
legacyFaceCrop,
// - Watch

View file

@ -1,6 +1,6 @@
import { Face } from "services/face/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "../machineLearning/mobileFaceNetEmbeddingService";
import { mobileFaceNetFaceSize } from "../machineLearning/embed";
/**
* Laplacian blur detection.

View file

@ -24,7 +24,7 @@ import {
/**
* Detect faces in the given {@link imageBitmap}.
*
* The ML model used is YOLO, running in an ONNX runtime.
* The model used is YOLO, running in an ONNX runtime.
*/
export const detectFaces = async (
imageBitmap: ImageBitmap,

View file

@ -0,0 +1,26 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import { FaceEmbedding } from "services/face/types";
export const mobileFaceNetFaceSize = 112;
/**
* Compute embeddings for the given {@link faceData}.
*
* The model used is MobileFaceNet, running in an ONNX runtime.
*/
export const getFaceEmbeddings = async (
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> => {
const outputData = await workerBridge.faceEmbeddings(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
};

View file

@ -31,8 +31,8 @@ import { EnteFile } from "types/file";
import { isInternalUserForML } from "utils/user";
import { fetchImageBitmapForContext } from "../face/image";
import { syncPeopleIndex } from "../face/people";
import mobileFaceNetEmbeddingService from "./embed";
import FaceService from "./faceService";
import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
/**
* TODO-ML(MR): What and why.

View file

@ -1,41 +0,0 @@
import { workerBridge } from "@/next/worker/worker-bridge";
import {
FaceEmbedding,
FaceEmbeddingMethod,
FaceEmbeddingService,
Versioned,
} from "services/face/types";
export const mobileFaceNetFaceSize = 112;
class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
public method: Versioned<FaceEmbeddingMethod>;
public faceSize: number;
public constructor() {
this.method = {
value: "MobileFaceNet",
version: 2,
};
this.faceSize = mobileFaceNetFaceSize;
}
public async getFaceEmbeddings(
faceData: Float32Array,
): Promise<Array<FaceEmbedding>> {
const outputData = await workerBridge.faceEmbedding(faceData);
const embeddingSize = 192;
const embeddings = new Array<FaceEmbedding>(
outputData.length / embeddingSize,
);
for (let i = 0; i < embeddings.length; i++) {
embeddings[i] = new Float32Array(
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
);
}
return embeddings;
}
}
export default new MobileFaceNetEmbeddingService();

View file

@ -332,12 +332,12 @@ export interface Electron {
detectFaces: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a MobileFaceNet embedding for the given face data.
* Return a MobileFaceNet embeddings for the given faces.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (MobileFaceNet) we use.
*/
faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a face crop stored by a previous version of ML.

View file

@ -47,8 +47,8 @@ const workerBridge = {
convertToJPEG: (imageData: Uint8Array) =>
ensureElectron().convertToJPEG(imageData),
detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
faceEmbedding: (input: Float32Array) =>
ensureElectron().faceEmbedding(input),
faceEmbeddings: (input: Float32Array) =>
ensureElectron().faceEmbeddings(input),
};
export type WorkerBridge = typeof workerBridge;