Quellcode durchsuchen

Also move the embedding

Manav Rathi vor 1 Jahr
Ursprung
Commit
52727f2255

+ 36 - 1
desktop/src/main/services/ml-face.ts

@@ -104,5 +104,40 @@ export const detectFaces = async (input: Float32Array) => {
 };
 
 export const faceEmbedding = async (input: Float32Array) => {
-    throw new Error("test");
+    // console.log("start ort mobilefacenet");
+    // this.onnxInferenceSession = await ort.InferenceSession.create(
+    //     "/models/mobilefacenet/mobilefacenet_opset15.onnx",
+    // );
+    // const faceBatchSize = 1;
+    // const data = new Float32Array(
+    //     faceBatchSize * 3 * this.faceSize * this.faceSize,
+    // );
+    // const inputTensor = new ort.Tensor("float32", data, [
+    //     faceBatchSize,
+    //     this.faceSize,
+    //     this.faceSize,
+    //     3,
+    // ]);
+    // // TODO(MR): onnx-yolo
+    // // const feeds: Record<string, ort.Tensor> = {};
+    // const feeds: Record<string, any> = {};
+    // const name = this.onnxInferenceSession.inputNames[0];
+    // feeds[name] = inputTensor;
+    // await this.onnxInferenceSession.run(feeds);
+    // console.log("start end mobilefacenet");
+
+    // Dimension of each face (alias)
+    const mobileFaceNetFaceSize = 112;
+    // Smaller alias
+    const z = mobileFaceNetFaceSize;
+    // Size of each face's data in the batch
+    const n = Math.round(input.length / (z * z * 3));
+    const inputTensor = new ort.Tensor("float32", input, [n, z, z, 3]);
+
+    const session = await faceEmbeddingSession();
+    const t = Date.now();
+    const feeds = { img_inputs: inputTensor };
+    const results = await session.run(feeds);
+    log.debug(() => `onnx/yolo face embedding took ${Date.now() - t} ms`);
+    return results.embeddings["cpuData"]; // as Float32Array;
 };

+ 0 - 11
web/apps/photos/src/constants/mlConfig.ts

@@ -53,15 +53,4 @@ export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = {
     enabled: false,
 };
 
-export const ML_SYNC_DOWNLOAD_TIMEOUT_MS = 300000;
-
-export const MAX_FACE_DISTANCE_PERCENT = Math.sqrt(2) / 100;
-
 export const MAX_ML_SYNC_ERROR_COUNT = 1;
-
-export const TEXT_DETECTION_TIMEOUT_MS = [10000, 30000, 60000, 120000, 240000];
-
-export const MOBILEFACENET_FACE_SIZE = 112;
-export const MOBILEFACENET_EMBEDDING_SIZE = 192;
-
-export const BATCHES_BEFORE_SYNCING_INDEX = 5;

+ 2 - 2
web/apps/photos/src/services/machineLearning/laplacianBlurDetectionService.ts

@@ -1,10 +1,10 @@
-import { MOBILEFACENET_FACE_SIZE } from "constants/mlConfig";
 import {
     BlurDetectionMethod,
     BlurDetectionService,
     Versioned,
 } from "types/machineLearning";
 import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
+import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
 
 class LaplacianBlurDetectionService implements BlurDetectionService {
     public method: Versioned<BlurDetectionMethod>;
@@ -19,7 +19,7 @@ class LaplacianBlurDetectionService implements BlurDetectionService {
     public detectBlur(alignedFaces: Float32Array): number[] {
         const numFaces = Math.round(
             alignedFaces.length /
-                (MOBILEFACENET_FACE_SIZE * MOBILEFACENET_FACE_SIZE * 3),
+                (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
         );
         const blurValues: number[] = [];
         for (let i = 0; i < numFaces; i++) {

+ 8 - 68
web/apps/photos/src/services/machineLearning/mobileFaceNetEmbeddingService.ts

@@ -1,7 +1,4 @@
-import {
-    MOBILEFACENET_EMBEDDING_SIZE,
-    MOBILEFACENET_FACE_SIZE,
-} from "constants/mlConfig";
+import { ensureElectron } from "@/next/electron";
 import {
     FaceEmbedding,
     FaceEmbeddingMethod,
@@ -9,17 +6,9 @@ import {
     Versioned,
 } from "types/machineLearning";
 
-// TODO(MR): onnx-yolo
-// import * as ort from "onnxruntime-web";
-// import { env } from "onnxruntime-web";
-const ort: any = {};
+export const mobileFaceNetFaceSize = 112;
 
-// TODO(MR): onnx-yolo
-// env.wasm.wasmPaths = "/js/onnx/";
 class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
-    // TODO(MR): onnx-yolo
-    // private onnxInferenceSession?: ort.InferenceSession;
-    private onnxInferenceSession?: any;
     public method: Versioned<FaceEmbeddingMethod>;
     public faceSize: number;
 
@@ -28,70 +17,21 @@ class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
             value: "MobileFaceNet",
             version: 2,
         };
-        this.faceSize = MOBILEFACENET_FACE_SIZE;
-        // TODO: set timeout
-    }
-
-    private async initOnnx() {
-        console.log("start ort mobilefacenet");
-        this.onnxInferenceSession = await ort.InferenceSession.create(
-            "/models/mobilefacenet/mobilefacenet_opset15.onnx",
-        );
-        const faceBatchSize = 1;
-        const data = new Float32Array(
-            faceBatchSize * 3 * this.faceSize * this.faceSize,
-        );
-        const inputTensor = new ort.Tensor("float32", data, [
-            faceBatchSize,
-            this.faceSize,
-            this.faceSize,
-            3,
-        ]);
-        // TODO(MR): onnx-yolo
-        // const feeds: Record<string, ort.Tensor> = {};
-        const feeds: Record<string, any> = {};
-        const name = this.onnxInferenceSession.inputNames[0];
-        feeds[name] = inputTensor;
-        await this.onnxInferenceSession.run(feeds);
-        console.log("start end mobilefacenet");
-    }
-
-    private async getOnnxInferenceSession() {
-        if (!this.onnxInferenceSession) {
-            await this.initOnnx();
-        }
-        return this.onnxInferenceSession;
+        this.faceSize = mobileFaceNetFaceSize;
     }
 
     public async getFaceEmbeddings(
         faceData: Float32Array,
     ): Promise<Array<FaceEmbedding>> {
-        const inputTensor = new ort.Tensor("float32", faceData, [
-            Math.round(faceData.length / (this.faceSize * this.faceSize * 3)),
-            this.faceSize,
-            this.faceSize,
-            3,
-        ]);
-        // TODO(MR): onnx-yolo
-        // const feeds: Record<string, ort.Tensor> = {};
-        const feeds: Record<string, any> = {};
-        feeds["img_inputs"] = inputTensor;
-        const inferenceSession = await this.getOnnxInferenceSession();
-        // TODO(MR): onnx-yolo
-        // const runout: ort.InferenceSession.OnnxValueMapType =
-        const runout: any = await inferenceSession.run(feeds);
-        // const test = runout.embeddings;
-        // const test2 = test.cpuData;
-        const outputData = runout.embeddings["cpuData"] as Float32Array;
+        const outputData = await ensureElectron().faceEmbedding(faceData);
+
+        const embeddingSize = 192;
         const embeddings = new Array<FaceEmbedding>(
-            outputData.length / MOBILEFACENET_EMBEDDING_SIZE,
+            outputData.length / embeddingSize,
         );
         for (let i = 0; i < embeddings.length; i++) {
             embeddings[i] = new Float32Array(
-                outputData.slice(
-                    i * MOBILEFACENET_EMBEDDING_SIZE,
-                    (i + 1) * MOBILEFACENET_EMBEDDING_SIZE,
-                ),
+                outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
             );
         }
         return embeddings;

+ 2 - 2
web/apps/photos/src/services/machineLearning/yoloFaceDetectionService.ts

@@ -1,5 +1,4 @@
 import { ensureElectron } from "@/next/electron";
-import { MAX_FACE_DISTANCE_PERCENT } from "constants/mlConfig";
 import { euclidean } from "hdbscan";
 import {
     Matrix,
@@ -36,7 +35,8 @@ class YoloFaceDetectionService implements FaceDetectionService {
     public async detectFaces(
         imageBitmap: ImageBitmap,
     ): Promise<Array<FaceDetection>> {
-        const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
+        const maxFaceDistancePercent = Math.sqrt(2) / 100;
+        const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
         const preprocessResult =
             this.preprocessImageBitmapToFloat32ChannelsFirst(
                 imageBitmap,

+ 1 - 2
web/apps/photos/src/types/machineLearning/index.ts

@@ -287,9 +287,8 @@ export interface FaceAlignmentService {
 export interface FaceEmbeddingService {
     method: Versioned<FaceEmbeddingMethod>;
     faceSize: number;
-    // init(): Promise<void>;
+
     getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
-    dispose(): Promise<void>;
 }
 
 export interface BlurDetectionService {