Kaynağa Gözat

[web] ML cleanup - Part 3/x (#1737)

Manav Rathi 1 yıl önce
ebeveyn
işleme
a0cb8b850e
41 değiştirilmiş dosya ile 1235 ekleme ve 2062 silme
  1. 3 3
      desktop/src/main/ipc.ts
  2. 1 1
      desktop/src/main/services/ml-face.ts
  3. 3 3
      desktop/src/preload.ts
  4. 0 1
      web/apps/photos/package.json
  5. 1 1
      web/apps/photos/src/components/Search/SearchBar/searchInput/MenuWithPeople.tsx
  6. 1 1
      web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx
  7. 2 5
      web/apps/photos/src/components/ml/PeopleList.tsx
  8. 4 0
      web/apps/photos/src/services/embeddingService.ts
  9. 9 31
      web/apps/photos/src/services/face/align.ts
  10. 187 0
      web/apps/photos/src/services/face/blur.ts
  11. 34 0
      web/apps/photos/src/services/face/cluster.ts
  12. 32 0
      web/apps/photos/src/services/face/crop.ts
  13. 7 4
      web/apps/photos/src/services/face/db.ts
  14. 316 0
      web/apps/photos/src/services/face/detect.ts
  15. 26 0
      web/apps/photos/src/services/face/embed.ts
  16. 194 0
      web/apps/photos/src/services/face/f-index.ts
  17. 8 18
      web/apps/photos/src/services/face/face.worker.ts
  18. 0 0
      web/apps/photos/src/services/face/geom.ts
  19. 33 51
      web/apps/photos/src/services/face/image.ts
  20. 1 1
      web/apps/photos/src/services/face/index.ts
  21. 111 0
      web/apps/photos/src/services/face/people.ts
  22. 161 0
      web/apps/photos/src/services/face/types.ts
  23. 0 60
      web/apps/photos/src/services/machineLearning/arcfaceCropService.ts
  24. 0 88
      web/apps/photos/src/services/machineLearning/clusteringService.ts
  25. 0 37
      web/apps/photos/src/services/machineLearning/dbscanClusteringService.ts
  26. 0 359
      web/apps/photos/src/services/machineLearning/faceService.ts
  27. 0 44
      web/apps/photos/src/services/machineLearning/hdbscanClusteringService.ts
  28. 0 211
      web/apps/photos/src/services/machineLearning/laplacianBlurDetectionService.ts
  29. 62 278
      web/apps/photos/src/services/machineLearning/machineLearningService.ts
  30. 10 25
      web/apps/photos/src/services/machineLearning/mlWorkManager.ts
  31. 0 41
      web/apps/photos/src/services/machineLearning/mobileFaceNetEmbeddingService.ts
  32. 0 113
      web/apps/photos/src/services/machineLearning/peopleService.ts
  33. 0 332
      web/apps/photos/src/services/machineLearning/yoloFaceDetectionService.ts
  34. 0 331
      web/apps/photos/src/services/ml/types.ts
  35. 4 5
      web/apps/photos/src/services/searchService.ts
  36. 2 2
      web/apps/photos/src/types/search/index.ts
  37. 7 7
      web/apps/photos/src/utils/image/index.ts
  38. 12 0
      web/docs/dependencies.md
  39. 2 2
      web/packages/next/types/ipc.ts
  40. 2 2
      web/packages/next/worker/comlink-worker.ts
  41. 0 5
      web/yarn.lock

+ 3 - 3
desktop/src/main/ipc.ts

@@ -46,7 +46,7 @@ import {
     clipImageEmbedding,
     clipTextEmbeddingIfAvailable,
 } from "./services/ml-clip";
-import { detectFaces, faceEmbedding } from "./services/ml-face";
+import { detectFaces, faceEmbeddings } from "./services/ml-face";
 import { encryptionKey, saveEncryptionKey } from "./services/store";
 import {
     clearPendingUploads,
@@ -182,8 +182,8 @@ export const attachIPCHandlers = () => {
         detectFaces(input),
     );
 
-    ipcMain.handle("faceEmbedding", (_, input: Float32Array) =>
-        faceEmbedding(input),
+    ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
+        faceEmbeddings(input),
     );
 
     ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>

+ 1 - 1
desktop/src/main/services/ml-face.ts

@@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
     5286998 /* 5 MB */,
 );
 
-export const faceEmbedding = async (input: Float32Array) => {
+export const faceEmbeddings = async (input: Float32Array) => {
     // Dimension of each face (alias)
     const mobileFaceNetFaceSize = 112;
     // Smaller alias

+ 3 - 3
desktop/src/preload.ts

@@ -162,8 +162,8 @@ const clipTextEmbeddingIfAvailable = (text: string) =>
 const detectFaces = (input: Float32Array) =>
     ipcRenderer.invoke("detectFaces", input);
 
-const faceEmbedding = (input: Float32Array) =>
-    ipcRenderer.invoke("faceEmbedding", input);
+const faceEmbeddings = (input: Float32Array) =>
+    ipcRenderer.invoke("faceEmbeddings", input);
 
 const legacyFaceCrop = (faceID: string) =>
     ipcRenderer.invoke("legacyFaceCrop", faceID);
@@ -343,7 +343,7 @@ contextBridge.exposeInMainWorld("electron", {
     clipImageEmbedding,
     clipTextEmbeddingIfAvailable,
     detectFaces,
-    faceEmbedding,
+    faceEmbeddings,
     legacyFaceCrop,
 
     // - Watch

+ 0 - 1
web/apps/photos/package.json

@@ -16,7 +16,6 @@
         "chrono-node": "^2.2.6",
         "date-fns": "^2",
         "debounce": "^2.0.0",
-        "density-clustering": "^1.3.0",
         "eventemitter3": "^4.0.7",
         "exifr": "^7.1.3",
         "fast-srp-hap": "^2.0.4",

+ 1 - 1
web/apps/photos/src/components/Search/SearchBar/searchInput/MenuWithPeople.tsx

@@ -5,7 +5,7 @@ import { t } from "i18next";
 import { AppContext } from "pages/_app";
 import { useContext } from "react";
 import { components } from "react-select";
-import { IndexStatus } from "services/ml/db";
+import { IndexStatus } from "services/face/db";
 import { Suggestion, SuggestionType } from "types/search";
 
 const { Menu } = components;

+ 1 - 1
web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx

@@ -9,8 +9,8 @@ import { useCallback, useContext, useEffect, useRef, useState } from "react";
 import { components } from "react-select";
 import AsyncSelect from "react-select/async";
 import { InputActionMeta } from "react-select/src/types";
+import { Person } from "services/face/types";
 import { City } from "services/locationSearchService";
-import { Person } from "services/ml/types";
 import {
     getAutoCompleteSuggestions,
     getDefaultOptions,

+ 2 - 5
web/apps/photos/src/components/ml/PeopleList.tsx

@@ -3,8 +3,8 @@ import { Skeleton, styled } from "@mui/material";
 import { Legend } from "components/PhotoViewer/styledComponents/Legend";
 import { t } from "i18next";
 import React, { useEffect, useState } from "react";
-import mlIDbStorage from "services/ml/db";
-import { Face, Person, type MlFileData } from "services/ml/types";
+import mlIDbStorage from "services/face/db";
+import { Face, Person, type MlFileData } from "services/face/types";
 import { EnteFile } from "types/file";
 
 const FaceChipContainer = styled("div")`
@@ -167,10 +167,7 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
                 .legacyFaceCrop(faceID)
                 /*
             cachedOrNew("face-crops", cacheKey, async () => {
-                const user = await ensureLocalUser();
                 return machineLearningService.regenerateFaceCrop(
-                    user.token,
-                    user.id,
                     faceId,
                 );
             })*/

+ 4 - 0
web/apps/photos/src/services/embeddingService.ts

@@ -102,6 +102,10 @@ export const syncCLIPEmbeddings = async () => {
             if (!response.diff?.length) {
                 return;
             }
+            // Note: in rare cases we might get a diff entry for an embedding
+            // corresponding to a file which has been deleted (but whose
+            // embedding is enqueued for deletion). Client should expect such a
+            // scenario (all it has to do is just ignore them).
             const newEmbeddings = await Promise.all(
                 response.diff.map(async (embedding) => {
                     try {

+ 9 - 31
web/apps/photos/src/services/machineLearning/arcfaceAlignmentService.ts → web/apps/photos/src/services/face/align.ts

@@ -1,31 +1,8 @@
 import { Matrix } from "ml-matrix";
-import { Point } from "services/ml/geom";
-import {
-    FaceAlignment,
-    FaceAlignmentMethod,
-    FaceAlignmentService,
-    FaceDetection,
-    Versioned,
-} from "services/ml/types";
+import { Point } from "services/face/geom";
+import { FaceAlignment, FaceDetection } from "services/face/types";
 import { getSimilarityTransformation } from "similarity-transformation";
 
-class ArcfaceAlignmentService implements FaceAlignmentService {
-    public method: Versioned<FaceAlignmentMethod>;
-
-    constructor() {
-        this.method = {
-            value: "ArcFace",
-            version: 1,
-        };
-    }
-
-    public getFaceAlignment(faceDetection: FaceDetection): FaceAlignment {
-        return getArcfaceAlignment(faceDetection);
-    }
-}
-
-export default new ArcfaceAlignmentService();
-
 const ARCFACE_LANDMARKS = [
     [38.2946, 51.6963],
     [73.5318, 51.5014],
@@ -43,9 +20,12 @@ const ARC_FACE_5_LANDMARKS = [
     [70.7299, 92.2041],
 ] as Array<[number, number]>;
 
-export function getArcfaceAlignment(
-    faceDetection: FaceDetection,
-): FaceAlignment {
+/**
+ * Compute and return an {@link FaceAlignment} for the given face detection.
+ *
+ * @param faceDetection A geometry indicating a face detected in an image.
+ */
+export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
     const landmarkCount = faceDetection.landmarks.length;
     return getFaceAlignmentUsingSimilarityTransform(
         faceDetection,
@@ -54,12 +34,11 @@ export function getArcfaceAlignment(
             ARCFACE_LANDMARKS_FACE_SIZE,
         ),
     );
-}
+};
 
 function getFaceAlignmentUsingSimilarityTransform(
     faceDetection: FaceDetection,
     alignedLandmarks: Array<[number, number]>,
-    // alignmentMethod: Versioned<FaceAlignmentMethod>
 ): FaceAlignment {
     const landmarksMat = new Matrix(
         faceDetection.landmarks
@@ -90,7 +69,6 @@ function getFaceAlignmentUsingSimilarityTransform(
         simTransform.rotation.get(0, 1),
         simTransform.rotation.get(0, 0),
     );
-    // log.info({ affineMatrix, meanTranslation, centerMat, center, toMean: simTransform.toMean, fromMean: simTransform.fromMean, size });
 
     return {
         affineMatrix,

+ 187 - 0
web/apps/photos/src/services/face/blur.ts

@@ -0,0 +1,187 @@
+import { Face } from "services/face/types";
+import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
+import { mobileFaceNetFaceSize } from "./embed";
+
+/**
+ * Laplacian blur detection.
+ */
+export const detectBlur = (
+    alignedFaces: Float32Array,
+    faces: Face[],
+): number[] => {
+    const numFaces = Math.round(
+        alignedFaces.length /
+            (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
+    );
+    const blurValues: number[] = [];
+    for (let i = 0; i < numFaces; i++) {
+        const face = faces[i];
+        const direction = faceDirection(face);
+        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
+            alignedFaces,
+            i,
+        );
+        const laplacian = applyLaplacian(faceImage, direction);
+        blurValues.push(matrixVariance(laplacian));
+    }
+    return blurValues;
+};
+
+type FaceDirection = "left" | "right" | "straight";
+
+const faceDirection = (face: Face): FaceDirection => {
+    const landmarks = face.detection.landmarks;
+    const leftEye = landmarks[0];
+    const rightEye = landmarks[1];
+    const nose = landmarks[2];
+    const leftMouth = landmarks[3];
+    const rightMouth = landmarks[4];
+
+    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
+    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
+    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
+
+    const faceIsUpright =
+        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
+        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
+
+    const noseStickingOutLeft =
+        nose.x < Math.min(leftEye.x, rightEye.x) &&
+        nose.x < Math.min(leftMouth.x, rightMouth.x);
+
+    const noseStickingOutRight =
+        nose.x > Math.max(leftEye.x, rightEye.x) &&
+        nose.x > Math.max(leftMouth.x, rightMouth.x);
+
+    const noseCloseToLeftEye =
+        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
+    const noseCloseToRightEye =
+        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
+
+    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
+        return "left";
+    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
+        return "right";
+    }
+
+    return "straight";
+};
+
+/**
+ * Return a new image by applying a Laplacian blur kernel to each pixel.
+ */
+const applyLaplacian = (
+    image: number[][],
+    direction: FaceDirection,
+): number[][] => {
+    const paddedImage: number[][] = padImage(image, direction);
+    const numRows = paddedImage.length - 2;
+    const numCols = paddedImage[0].length - 2;
+
+    // Create an output image initialized to 0.
+    const outputImage: number[][] = Array.from({ length: numRows }, () =>
+        new Array(numCols).fill(0),
+    );
+
+    // Define the Laplacian kernel.
+    const kernel: number[][] = [
+        [0, 1, 0],
+        [1, -4, 1],
+        [0, 1, 0],
+    ];
+
+    // Apply the kernel to each pixel
+    for (let i = 0; i < numRows; i++) {
+        for (let j = 0; j < numCols; j++) {
+            let sum = 0;
+            for (let ki = 0; ki < 3; ki++) {
+                for (let kj = 0; kj < 3; kj++) {
+                    sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
+                }
+            }
+            // Adjust the output value if necessary (e.g., clipping).
+            outputImage[i][j] = sum;
+        }
+    }
+
+    return outputImage;
+};
+
+const padImage = (image: number[][], direction: FaceDirection): number[][] => {
+    const removeSideColumns = 56; /* must be even */
+
+    const numRows = image.length;
+    const numCols = image[0].length;
+    const paddedNumCols = numCols + 2 - removeSideColumns;
+    const paddedNumRows = numRows + 2;
+
+    // Create a new matrix with extra padding.
+    const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
+        new Array(paddedNumCols).fill(0),
+    );
+
+    if (direction === "straight") {
+        // Copy original image into the center of the padded image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] =
+                    image[i][j + Math.round(removeSideColumns / 2)];
+            }
+        }
+    } else if (direction === "left") {
+        // If the face is facing left, we only take the right side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
+            }
+        }
+    } else if (direction === "right") {
+        // If the face is facing right, we only take the left side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j];
+            }
+        }
+    }
+
+    // Reflect padding
+    // Top and bottom rows
+    for (let j = 1; j <= paddedNumCols - 2; j++) {
+        paddedImage[0][j] = paddedImage[2][j]; // Top row
+        paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
+    }
+    // Left and right columns
+    for (let i = 0; i < numRows + 2; i++) {
+        paddedImage[i][0] = paddedImage[i][2]; // Left column
+        paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
+    }
+
+    return paddedImage;
+};
+
+const matrixVariance = (matrix: number[][]): number => {
+    const numRows = matrix.length;
+    const numCols = matrix[0].length;
+    const totalElements = numRows * numCols;
+
+    // Calculate the mean.
+    let mean: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            mean += value;
+        });
+    });
+    mean /= totalElements;
+
+    // Calculate the variance.
+    let variance: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            const diff: number = value - mean;
+            variance += diff * diff;
+        });
+    });
+    variance /= totalElements;
+
+    return variance;
+};

+ 34 - 0
web/apps/photos/src/services/face/cluster.ts

@@ -0,0 +1,34 @@
+import { Hdbscan, type DebugInfo } from "hdbscan";
+import { type Cluster } from "services/face/types";
+
+export interface ClusterFacesResult {
+    clusters: Array<Cluster>;
+    noise: Cluster;
+    debugInfo?: DebugInfo;
+}
+
+/**
+ * Cluster the given {@link faceEmbeddings}.
+ *
+ * @param faceEmbeddings An array of embeddings produced by our face indexing
+ * pipeline. Each embedding is for a face detected in an image (a single image
+ * may have multiple faces detected within it).
+ */
+export const clusterFaces = async (
+    faceEmbeddings: Array<Array<number>>,
+): Promise<ClusterFacesResult> => {
+    const hdbscan = new Hdbscan({
+        input: faceEmbeddings,
+        minClusterSize: 3,
+        minSamples: 5,
+        clusterSelectionEpsilon: 0.6,
+        clusterSelectionMethod: "leaf",
+        debug: true,
+    });
+
+    return {
+        clusters: hdbscan.getClusters(),
+        noise: hdbscan.getNoise(),
+        debugInfo: hdbscan.getDebugInfo(),
+    };
+};

+ 32 - 0
web/apps/photos/src/services/face/crop.ts

@@ -0,0 +1,32 @@
+import { Box, enlargeBox } from "services/face/geom";
+import { FaceCrop, FaceDetection } from "services/face/types";
+import { cropWithRotation } from "utils/image";
+import { faceAlignment } from "./align";
+
+export const getFaceCrop = (
+    imageBitmap: ImageBitmap,
+    faceDetection: FaceDetection,
+): FaceCrop => {
+    const alignment = faceAlignment(faceDetection);
+
+    const padding = 0.25;
+    const maxSize = 256;
+
+    const alignmentBox = new Box({
+        x: alignment.center.x - alignment.size / 2,
+        y: alignment.center.y - alignment.size / 2,
+        width: alignment.size,
+        height: alignment.size,
+    }).round();
+    const scaleForPadding = 1 + padding * 2;
+    const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
+    const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
+        width: maxSize,
+        height: maxSize,
+    });
+
+    return {
+        image: faceImageBitmap,
+        imageBox: paddedBox,
+    };
+};

+ 7 - 4
web/apps/photos/src/services/ml/db.ts → web/apps/photos/src/services/face/db.ts

@@ -9,12 +9,11 @@ import {
     openDB,
 } from "idb";
 import isElectron from "is-electron";
+import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
 import {
     DEFAULT_ML_SEARCH_CONFIG,
-    DEFAULT_ML_SYNC_CONFIG,
     MAX_ML_SYNC_ERROR_COUNT,
 } from "services/machineLearning/machineLearningService";
-import { Face, MLLibraryData, MlFileData, Person } from "services/ml/types";
 
 export interface IndexStatus {
     outOfSyncFilesExists: boolean;
@@ -26,7 +25,6 @@ export interface IndexStatus {
 
 interface Config {}
 
-export const ML_SYNC_CONFIG_NAME = "ml-sync";
 export const ML_SEARCH_CONFIG_NAME = "ml-search";
 
 const MLDATA_DB_NAME = "mldata";
@@ -141,10 +139,11 @@ class MLIDbStorage {
                             DEFAULT_ML_SYNC_JOB_CONFIG,
                             "ml-sync-job",
                         );
-                    */
+
                     await tx
                         .objectStore("configs")
                         .add(DEFAULT_ML_SYNC_CONFIG, ML_SYNC_CONFIG_NAME);
+                    */
                 }
                 if (oldVersion < 3) {
                     await tx
@@ -163,6 +162,10 @@ class MLIDbStorage {
                             .objectStore("configs")
                             .delete(ML_SEARCH_CONFIG_NAME);
 
+                        await tx
+                            .objectStore("configs")
+                            .delete(""ml-sync"");
+
                         await tx
                             .objectStore("configs")
                             .delete("ml-sync-job");

+ 316 - 0
web/apps/photos/src/services/face/detect.ts

@@ -0,0 +1,316 @@
+import { workerBridge } from "@/next/worker/worker-bridge";
+import { euclidean } from "hdbscan";
+import {
+    Box,
+    Dimensions,
+    Point,
+    boxFromBoundingBox,
+    newBox,
+} from "services/face/geom";
+import { FaceDetection } from "services/face/types";
+import {
+    Matrix,
+    applyToPoint,
+    compose,
+    scale,
+    translate,
+} from "transformation-matrix";
+import {
+    clamp,
+    getPixelBilinear,
+    normalizePixelBetween0And1,
+} from "utils/image";
+
+/**
+ * Detect faces in the given {@link imageBitmap}.
+ *
+ * The model used is YOLO, running in an ONNX runtime.
+ */
+export const detectFaces = async (
+    imageBitmap: ImageBitmap,
+): Promise<Array<FaceDetection>> => {
+    const maxFaceDistancePercent = Math.sqrt(2) / 100;
+    const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
+    const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
+        imageBitmap,
+        640,
+        640,
+    );
+    const data = preprocessResult.data;
+    const resized = preprocessResult.newSize;
+    const outputData = await workerBridge.detectFaces(data);
+    const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
+    const inBox = newBox(0, 0, resized.width, resized.height);
+    const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
+    const transform = computeTransformToBox(inBox, toBox);
+    const faceDetections: Array<FaceDetection> = faces?.map((f) => {
+        const box = transformBox(f.box, transform);
+        const normLandmarks = f.landmarks;
+        const landmarks = transformPoints(normLandmarks, transform);
+        return {
+            box,
+            landmarks,
+            probability: f.probability as number,
+        } as FaceDetection;
+    });
+    return removeDuplicateDetections(faceDetections, maxFaceDistance);
+};
+
+const preprocessImageBitmapToFloat32ChannelsFirst = (
+    imageBitmap: ImageBitmap,
+    requiredWidth: number,
+    requiredHeight: number,
+    maintainAspectRatio: boolean = true,
+    normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
+) => {
+    // Create an OffscreenCanvas and set its size.
+    const offscreenCanvas = new OffscreenCanvas(
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const ctx = offscreenCanvas.getContext("2d");
+    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
+    const imageData = ctx.getImageData(
+        0,
+        0,
+        imageBitmap.width,
+        imageBitmap.height,
+    );
+    const pixelData = imageData.data;
+
+    let scaleW = requiredWidth / imageBitmap.width;
+    let scaleH = requiredHeight / imageBitmap.height;
+    if (maintainAspectRatio) {
+        const scale = Math.min(
+            requiredWidth / imageBitmap.width,
+            requiredHeight / imageBitmap.height,
+        );
+        scaleW = scale;
+        scaleH = scale;
+    }
+    const scaledWidth = clamp(
+        Math.round(imageBitmap.width * scaleW),
+        0,
+        requiredWidth,
+    );
+    const scaledHeight = clamp(
+        Math.round(imageBitmap.height * scaleH),
+        0,
+        requiredHeight,
+    );
+
+    const processedImage = new Float32Array(
+        1 * 3 * requiredWidth * requiredHeight,
+    );
+
+    // Populate the Float32Array with normalized pixel values
+    let pixelIndex = 0;
+    const channelOffsetGreen = requiredHeight * requiredWidth;
+    const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
+    for (let h = 0; h < requiredHeight; h++) {
+        for (let w = 0; w < requiredWidth; w++) {
+            let pixel: {
+                r: number;
+                g: number;
+                b: number;
+            };
+            if (w >= scaledWidth || h >= scaledHeight) {
+                pixel = { r: 114, g: 114, b: 114 };
+            } else {
+                pixel = getPixelBilinear(
+                    w / scaleW,
+                    h / scaleH,
+                    pixelData,
+                    imageBitmap.width,
+                    imageBitmap.height,
+                );
+            }
+            processedImage[pixelIndex] = normFunction(pixel.r);
+            processedImage[pixelIndex + channelOffsetGreen] = normFunction(
+                pixel.g,
+            );
+            processedImage[pixelIndex + channelOffsetBlue] = normFunction(
+                pixel.b,
+            );
+            pixelIndex++;
+        }
+    }
+
+    return {
+        data: processedImage,
+        originalSize: {
+            width: imageBitmap.width,
+            height: imageBitmap.height,
+        },
+        newSize: { width: scaledWidth, height: scaledHeight },
+    };
+};
+
+/**
+ * @param rowOutput A Float32Array of shape [25200, 16], where each row
+ * represents a bounding box.
+ */
+const getFacesFromYOLOOutput = (
+    rowOutput: Float32Array,
+    minScore: number,
+): Array<FaceDetection> => {
+    const faces: Array<FaceDetection> = [];
+    // Iterate over each row.
+    for (let i = 0; i < rowOutput.length; i += 16) {
+        const score = rowOutput[i + 4];
+        if (score < minScore) {
+            continue;
+        }
+        // The first 4 values represent the bounding box's coordinates:
+        //
+        //     (x1, y1, x2, y2)
+        //
+        const xCenter = rowOutput[i];
+        const yCenter = rowOutput[i + 1];
+        const width = rowOutput[i + 2];
+        const height = rowOutput[i + 3];
+        const xMin = xCenter - width / 2.0; // topLeft
+        const yMin = yCenter - height / 2.0; // topLeft
+
+        const leftEyeX = rowOutput[i + 5];
+        const leftEyeY = rowOutput[i + 6];
+        const rightEyeX = rowOutput[i + 7];
+        const rightEyeY = rowOutput[i + 8];
+        const noseX = rowOutput[i + 9];
+        const noseY = rowOutput[i + 10];
+        const leftMouthX = rowOutput[i + 11];
+        const leftMouthY = rowOutput[i + 12];
+        const rightMouthX = rowOutput[i + 13];
+        const rightMouthY = rowOutput[i + 14];
+
+        const box = new Box({
+            x: xMin,
+            y: yMin,
+            width: width,
+            height: height,
+        });
+        const probability = score as number;
+        const landmarks = [
+            new Point(leftEyeX, leftEyeY),
+            new Point(rightEyeX, rightEyeY),
+            new Point(noseX, noseY),
+            new Point(leftMouthX, leftMouthY),
+            new Point(rightMouthX, rightMouthY),
+        ];
+        faces.push({ box, landmarks, probability });
+    }
+    return faces;
+};
+
+export const getRelativeDetection = (
+    faceDetection: FaceDetection,
+    dimensions: Dimensions,
+): FaceDetection => {
+    const oldBox: Box = faceDetection.box;
+    const box = new Box({
+        x: oldBox.x / dimensions.width,
+        y: oldBox.y / dimensions.height,
+        width: oldBox.width / dimensions.width,
+        height: oldBox.height / dimensions.height,
+    });
+    const oldLandmarks: Point[] = faceDetection.landmarks;
+    const landmarks = oldLandmarks.map((l) => {
+        return new Point(l.x / dimensions.width, l.y / dimensions.height);
+    });
+    const probability = faceDetection.probability;
+    return { box, landmarks, probability };
+};
+
+/**
+ * Removes duplicate face detections from an array of detections.
+ *
+ * This function sorts the detections by their probability in descending order,
+ * then iterates over them.
+ *
+ * For each detection, it calculates the Euclidean distance to all other
+ * detections.
+ *
+ * If the distance is less than or equal to the specified threshold
+ * (`withinDistance`), the other detection is considered a duplicate and is
+ * removed.
+ *
+ * @param detections - An array of face detections to remove duplicates from.
+ *
+ * @param withinDistance - The maximum Euclidean distance between two detections
+ * for them to be considered duplicates.
+ *
+ * @returns An array of face detections with duplicates removed.
+ */
+const removeDuplicateDetections = (
+    detections: Array<FaceDetection>,
+    withinDistance: number,
+) => {
+    detections.sort((a, b) => b.probability - a.probability);
+    const isSelected = new Map<number, boolean>();
+    for (let i = 0; i < detections.length; i++) {
+        if (isSelected.get(i) === false) {
+            continue;
+        }
+        isSelected.set(i, true);
+        for (let j = i + 1; j < detections.length; j++) {
+            if (isSelected.get(j) === false) {
+                continue;
+            }
+            const centeri = getDetectionCenter(detections[i]);
+            const centerj = getDetectionCenter(detections[j]);
+            const dist = euclidean(
+                [centeri.x, centeri.y],
+                [centerj.x, centerj.y],
+            );
+            if (dist <= withinDistance) {
+                isSelected.set(j, false);
+            }
+        }
+    }
+
+    const uniques: Array<FaceDetection> = [];
+    for (let i = 0; i < detections.length; i++) {
+        isSelected.get(i) && uniques.push(detections[i]);
+    }
+    return uniques;
+};
+
+function getDetectionCenter(detection: FaceDetection) {
+    const center = new Point(0, 0);
+    // TODO: first 4 landmarks is applicable to blazeface only
+    // this needs to consider eyes, nose and mouth landmarks to take center
+    detection.landmarks?.slice(0, 4).forEach((p) => {
+        center.x += p.x;
+        center.y += p.y;
+    });
+
+    return new Point(center.x / 4, center.y / 4);
+}
+
+function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
+    return compose(
+        translate(toBox.x, toBox.y),
+        scale(toBox.width / inBox.width, toBox.height / inBox.height),
+    );
+}
+
+function transformPoint(point: Point, transform: Matrix) {
+    const txdPoint = applyToPoint(transform, point);
+    return new Point(txdPoint.x, txdPoint.y);
+}
+
+function transformPoints(points: Point[], transform: Matrix) {
+    return points?.map((p) => transformPoint(p, transform));
+}
+
+function transformBox(box: Box, transform: Matrix) {
+    const topLeft = transformPoint(box.topLeft, transform);
+    const bottomRight = transformPoint(box.bottomRight, transform);
+
+    return boxFromBoundingBox({
+        left: topLeft.x,
+        top: topLeft.y,
+        right: bottomRight.x,
+        bottom: bottomRight.y,
+    });
+}

+ 26 - 0
web/apps/photos/src/services/face/embed.ts

@@ -0,0 +1,26 @@
+import { workerBridge } from "@/next/worker/worker-bridge";
+import { FaceEmbedding } from "services/face/types";
+
+export const mobileFaceNetFaceSize = 112;
+
+/**
+ * Compute embeddings for the given {@link faceData}.
+ *
+ * The model used is MobileFaceNet, running in an ONNX runtime.
+ */
+export const faceEmbeddings = async (
+    faceData: Float32Array,
+): Promise<Array<FaceEmbedding>> => {
+    const outputData = await workerBridge.faceEmbeddings(faceData);
+
+    const embeddingSize = 192;
+    const embeddings = new Array<FaceEmbedding>(
+        outputData.length / embeddingSize,
+    );
+    for (let i = 0; i < embeddings.length; i++) {
+        embeddings[i] = new Float32Array(
+            outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
+        );
+    }
+    return embeddings;
+};

+ 194 - 0
web/apps/photos/src/services/face/f-index.ts

@@ -0,0 +1,194 @@
+import { openCache } from "@/next/blob-cache";
+import log from "@/next/log";
+import { faceAlignment } from "services/face/align";
+import mlIDbStorage from "services/face/db";
+import { detectFaces, getRelativeDetection } from "services/face/detect";
+import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
+import {
+    DetectedFace,
+    Face,
+    MLSyncFileContext,
+    type FaceAlignment,
+} from "services/face/types";
+import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
+import { detectBlur } from "./blur";
+import { getFaceCrop } from "./crop";
+import {
+    fetchImageBitmap,
+    fetchImageBitmapForContext,
+    getFaceId,
+    getLocalFile,
+} from "./image";
+
+export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
+    const { newMlFile } = fileContext;
+    const startTime = Date.now();
+
+    await syncFileFaceDetections(fileContext);
+
+    if (newMlFile.faces && newMlFile.faces.length > 0) {
+        await syncFileFaceCrops(fileContext);
+
+        const alignedFacesData = await syncFileFaceAlignments(fileContext);
+
+        await syncFileFaceEmbeddings(fileContext, alignedFacesData);
+
+        await syncFileFaceMakeRelativeDetections(fileContext);
+    }
+    log.debug(
+        () =>
+            `Face detection for file ${fileContext.enteFile.id} took ${Math.round(Date.now() - startTime)} ms`,
+    );
+};
+
+const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
+    const { newMlFile } = fileContext;
+    newMlFile.faceDetectionMethod = {
+        value: "YoloFace",
+        version: 1,
+    };
+    fileContext.newDetection = true;
+    const imageBitmap = await fetchImageBitmapForContext(fileContext);
+    const faceDetections = await detectFaces(imageBitmap);
+    // TODO: reenable faces filtering based on width
+    const detectedFaces = faceDetections?.map((detection) => {
+        return {
+            fileId: fileContext.enteFile.id,
+            detection,
+        } as DetectedFace;
+    });
+    newMlFile.faces = detectedFaces?.map((detectedFace) => ({
+        ...detectedFace,
+        id: getFaceId(detectedFace, newMlFile.imageDimensions),
+    }));
+    // ?.filter((f) =>
+    //     f.box.width > syncContext.config.faceDetection.minFaceSize
+    // );
+    log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
+};
+
+const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
+    const { newMlFile } = fileContext;
+    const imageBitmap = await fetchImageBitmapForContext(fileContext);
+    newMlFile.faceCropMethod = {
+        value: "ArcFace",
+        version: 1,
+    };
+
+    for (const face of newMlFile.faces) {
+        await saveFaceCrop(imageBitmap, face);
+    }
+};
+
+const syncFileFaceAlignments = async (
+    fileContext: MLSyncFileContext,
+): Promise<Float32Array> => {
+    const { newMlFile } = fileContext;
+    newMlFile.faceAlignmentMethod = {
+        value: "ArcFace",
+        version: 1,
+    };
+    fileContext.newAlignment = true;
+    const imageBitmap =
+        fileContext.imageBitmap ||
+        (await fetchImageBitmapForContext(fileContext));
+
+    // Execute the face alignment calculations
+    for (const face of newMlFile.faces) {
+        face.alignment = faceAlignment(face.detection);
+    }
+    // Extract face images and convert to Float32Array
+    const faceAlignments = newMlFile.faces.map((f) => f.alignment);
+    const faceImages = await extractFaceImagesToFloat32(
+        faceAlignments,
+        mobileFaceNetFaceSize,
+        imageBitmap,
+    );
+    const blurValues = detectBlur(faceImages, newMlFile.faces);
+    newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
+
+    imageBitmap.close();
+    log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
+
+    return faceImages;
+};
+
+const syncFileFaceEmbeddings = async (
+    fileContext: MLSyncFileContext,
+    alignedFacesInput: Float32Array,
+) => {
+    const { newMlFile } = fileContext;
+    newMlFile.faceEmbeddingMethod = {
+        value: "MobileFaceNet",
+        version: 2,
+    };
+    // TODO: when not storing face crops, image will be needed to extract faces
+    // fileContext.imageBitmap ||
+    //     (await this.getImageBitmap(fileContext));
+
+    const embeddings = await faceEmbeddings(alignedFacesInput);
+    newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
+
+    log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
+};
+
+const syncFileFaceMakeRelativeDetections = async (
+    fileContext: MLSyncFileContext,
+) => {
+    const { newMlFile } = fileContext;
+    for (let i = 0; i < newMlFile.faces.length; i++) {
+        const face = newMlFile.faces[i];
+        if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
+        face.detection = getRelativeDetection(
+            face.detection,
+            newMlFile.imageDimensions,
+        );
+    }
+};
+
+export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
+    const faceCrop = getFaceCrop(imageBitmap, face.detection);
+
+    const blob = await imageBitmapToBlob(faceCrop.image);
+
+    const cache = await openCache("face-crops");
+    await cache.put(face.id, blob);
+
+    faceCrop.image.close();
+
+    return blob;
+};
+
+export const regenerateFaceCrop = async (faceID: string) => {
+    const fileID = Number(faceID.split("-")[0]);
+    const personFace = await mlIDbStorage.getFace(fileID, faceID);
+    if (!personFace) {
+        throw Error("Face not found");
+    }
+
+    const file = await getLocalFile(personFace.fileId);
+    const imageBitmap = await fetchImageBitmap(file);
+    return await saveFaceCrop(imageBitmap, personFace);
+};
+
+async function extractFaceImagesToFloat32(
+    faceAlignments: Array<FaceAlignment>,
+    faceSize: number,
+    image: ImageBitmap,
+): Promise<Float32Array> {
+    const faceData = new Float32Array(
+        faceAlignments.length * faceSize * faceSize * 3,
+    );
+    for (let i = 0; i < faceAlignments.length; i++) {
+        const alignedFace = faceAlignments[i];
+        const faceDataOffset = i * faceSize * faceSize * 3;
+        warpAffineFloat32List(
+            image,
+            alignedFace,
+            faceSize,
+            faceData,
+            faceDataOffset,
+        );
+    }
+    return faceData;
+}

+ 8 - 18
web/apps/photos/src/services/ml/face.worker.ts → web/apps/photos/src/services/face/face.worker.ts

@@ -1,14 +1,10 @@
-import log from "@/next/log";
+import { APPS } from "@ente/shared/apps/constants";
 import { expose } from "comlink";
+import downloadManager from "services/download";
 import mlService from "services/machineLearning/machineLearningService";
-import { MachineLearningWorker } from "services/ml/types";
 import { EnteFile } from "types/file";
 
-export class DedicatedMLWorker implements MachineLearningWorker {
-    constructor() {
-        log.info("DedicatedMLWorker constructor called");
-    }
-
+export class DedicatedMLWorker {
     public async closeLocalSyncContext() {
         return mlService.closeLocalSyncContext();
     }
@@ -19,23 +15,17 @@ export class DedicatedMLWorker implements MachineLearningWorker {
         enteFile: EnteFile,
         localFile: globalThis.File,
     ) {
-        return mlService.syncLocalFile(token, userID, enteFile, localFile);
+        mlService.syncLocalFile(token, userID, enteFile, localFile);
     }
 
     public async sync(token: string, userID: number) {
+        await downloadManager.init(APPS.PHOTOS, { token });
         return mlService.sync(token, userID);
     }
 
-    public async regenerateFaceCrop(
-        token: string,
-        userID: number,
-        faceID: string,
-    ) {
-        return mlService.regenerateFaceCrop(token, userID, faceID);
-    }
-
-    public close() {
-        self.close();
+    public async regenerateFaceCrop(token: string, faceID: string) {
+        await downloadManager.init(APPS.PHOTOS, { token });
+        return mlService.regenerateFaceCrop(faceID);
     }
 }
 

+ 0 - 0
web/apps/photos/src/services/ml/geom.ts → web/apps/photos/src/services/face/geom.ts


+ 33 - 51
web/apps/photos/src/services/machineLearning/readerService.ts → web/apps/photos/src/services/face/image.ts

@@ -2,65 +2,47 @@ import { FILE_TYPE } from "@/media/file-type";
 import { decodeLivePhoto } from "@/media/live-photo";
 import log from "@/next/log";
 import DownloadManager from "services/download";
+import { Dimensions } from "services/face/geom";
+import { DetectedFace, MLSyncFileContext } from "services/face/types";
 import { getLocalFiles } from "services/fileService";
-import { Dimensions } from "services/ml/geom";
-import {
-    DetectedFace,
-    MLSyncContext,
-    MLSyncFileContext,
-} from "services/ml/types";
 import { EnteFile } from "types/file";
 import { getRenderableImage } from "utils/file";
 import { clamp } from "utils/image";
 
-class ReaderService {
-    async getImageBitmap(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
+export const fetchImageBitmapForContext = async (
+    fileContext: MLSyncFileContext,
+) => {
+    if (fileContext.imageBitmap) {
+        return fileContext.imageBitmap;
+    }
+    if (fileContext.localFile) {
+        if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
+            throw new Error("Local file of only image type is supported");
+        }
+        fileContext.imageBitmap = await getLocalFileImageBitmap(
+            fileContext.enteFile,
+            fileContext.localFile,
+        );
+    } else if (
+        [FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
+            fileContext.enteFile.metadata.fileType,
+        )
     ) {
-        try {
-            if (fileContext.imageBitmap) {
-                return fileContext.imageBitmap;
-            }
-            if (fileContext.localFile) {
-                if (
-                    fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE
-                ) {
-                    throw new Error(
-                        "Local file of only image type is supported",
-                    );
-                }
-                fileContext.imageBitmap = await getLocalFileImageBitmap(
-                    fileContext.enteFile,
-                    fileContext.localFile,
-                );
-            } else if (
-                syncContext.config.imageSource === "Original" &&
-                [FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
-                    fileContext.enteFile.metadata.fileType,
-                )
-            ) {
-                fileContext.imageBitmap = await fetchImageBitmap(
-                    fileContext.enteFile,
-                );
-            } else {
-                fileContext.imageBitmap = await getThumbnailImageBitmap(
-                    fileContext.enteFile,
-                );
-            }
+        fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
+    } else {
+        // TODO-ML(MR): We don't do it on videos, when will we ever come
+        // here?
+        fileContext.imageBitmap = await getThumbnailImageBitmap(
+            fileContext.enteFile,
+        );
+    }
 
-            fileContext.newMlFile.imageSource = syncContext.config.imageSource;
-            const { width, height } = fileContext.imageBitmap;
-            fileContext.newMlFile.imageDimensions = { width, height };
+    fileContext.newMlFile.imageSource = "Original";
+    const { width, height } = fileContext.imageBitmap;
+    fileContext.newMlFile.imageDimensions = { width, height };
 
-            return fileContext.imageBitmap;
-        } catch (e) {
-            log.error("failed to create image bitmap", e);
-            throw e;
-        }
-    }
-}
-export default new ReaderService();
+    return fileContext.imageBitmap;
+};
 
 export async function getLocalFile(fileId: number) {
     const localFiles = await getLocalFiles();

+ 1 - 1
web/apps/photos/src/services/ml/face.ts → web/apps/photos/src/services/face/index.ts

@@ -1,5 +1,5 @@
 import { ComlinkWorker } from "@/next/worker/comlink-worker";
-import type { DedicatedMLWorker } from "services/ml/face.worker";
+import type { DedicatedMLWorker } from "services/face/face.worker";
 
 const createFaceWebWorker = () =>
     new Worker(new URL("face.worker.ts", import.meta.url));

+ 111 - 0
web/apps/photos/src/services/face/people.ts

@@ -0,0 +1,111 @@
+import log from "@/next/log";
+import mlIDbStorage from "services/face/db";
+import { Face, Person } from "services/face/types";
+import { type MLSyncContext } from "services/machineLearning/machineLearningService";
+import { clusterFaces } from "./cluster";
+import { saveFaceCrop } from "./f-index";
+import { fetchImageBitmap, getLocalFile } from "./image";
+
+export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
+    const filesVersion = await mlIDbStorage.getIndexVersion("files");
+    if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
+        return;
+    }
+
+    // TODO: have faces addresable through fileId + faceId
+    // to avoid index based addressing, which is prone to wrong results
+    // one way could be to match nearest face within threshold in the file
+    const allFacesMap =
+        syncContext.allSyncedFacesMap ??
+        (syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
+    const allFaces = [...allFacesMap.values()].flat();
+
+    await runFaceClustering(syncContext, allFaces);
+    await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
+
+    await mlIDbStorage.setIndexVersion("people", filesVersion);
+};
+
+const runFaceClustering = async (
+    syncContext: MLSyncContext,
+    allFaces: Array<Face>,
+) => {
+    // await this.init();
+
+    if (!allFaces || allFaces.length < 50) {
+        log.info(
+            `Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
+        );
+        return;
+    }
+
+    log.info("Running clustering allFaces: ", allFaces.length);
+    syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
+        allFaces.map((f) => Array.from(f.embedding)),
+    );
+    syncContext.mlLibraryData.faceClusteringMethod = {
+        value: "Hdbscan",
+        version: 1,
+    };
+    log.info(
+        "[MLService] Got face clustering results: ",
+        JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
+    );
+
+    // syncContext.faceClustersWithNoise = {
+    //     clusters: syncContext.faceClusteringResults.clusters.map(
+    //         (faces) => ({
+    //             faces,
+    //         })
+    //     ),
+    //     noise: syncContext.faceClusteringResults.noise,
+    // };
+};
+
+const syncPeopleFromClusters = async (
+    syncContext: MLSyncContext,
+    allFacesMap: Map<number, Array<Face>>,
+    allFaces: Array<Face>,
+) => {
+    const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
+    if (!clusters || clusters.length < 1) {
+        return;
+    }
+
+    for (const face of allFaces) {
+        face.personId = undefined;
+    }
+    await mlIDbStorage.clearAllPeople();
+    for (const [index, cluster] of clusters.entries()) {
+        const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
+
+        // TODO: take default display face from last leaves of hdbscan clusters
+        const personFace = faces.reduce((best, face) =>
+            face.detection.probability > best.detection.probability
+                ? face
+                : best,
+        );
+
+        if (personFace && !personFace.crop?.cacheKey) {
+            const file = await getLocalFile(personFace.fileId);
+            const imageBitmap = await fetchImageBitmap(file);
+            await saveFaceCrop(imageBitmap, personFace);
+        }
+
+        const person: Person = {
+            id: index,
+            files: faces.map((f) => f.fileId),
+            displayFaceId: personFace?.id,
+            faceCropCacheKey: personFace?.crop?.cacheKey,
+        };
+
+        await mlIDbStorage.putPerson(person);
+
+        faces.forEach((face) => {
+            face.personId = person.id;
+        });
+        // log.info("Creating person: ", person, faces);
+    }
+
+    await mlIDbStorage.updateFaces(allFacesMap);
+};

+ 161 - 0
web/apps/photos/src/services/face/types.ts

@@ -0,0 +1,161 @@
+import type { ClusterFacesResult } from "services/face/cluster";
+import { Dimensions } from "services/face/geom";
+import { EnteFile } from "types/file";
+import { Box, Point } from "./geom";
+
+export interface MLSyncResult {
+    nOutOfSyncFiles: number;
+    nSyncedFiles: number;
+    nSyncedFaces: number;
+    nFaceClusters: number;
+    nFaceNoise: number;
+    error?: Error;
+}
+
+export declare type FaceDescriptor = Float32Array;
+
+export declare type Cluster = Array<number>;
+
+export interface FacesCluster {
+    faces: Cluster;
+    summary?: FaceDescriptor;
+}
+
+export interface FacesClustersWithNoise {
+    clusters: Array<FacesCluster>;
+    noise: Cluster;
+}
+
+export interface NearestCluster {
+    cluster: FacesCluster;
+    distance: number;
+}
+
+export declare type Landmark = Point;
+
+export declare type ImageType = "Original" | "Preview";
+
+export declare type FaceDetectionMethod = "YoloFace";
+
+export declare type FaceCropMethod = "ArcFace";
+
+export declare type FaceAlignmentMethod = "ArcFace";
+
+export declare type FaceEmbeddingMethod = "MobileFaceNet";
+
+export declare type BlurDetectionMethod = "Laplacian";
+
+export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
+
+export class AlignedBox {
+    box: Box;
+    rotation: number;
+}
+
+export interface Versioned<T> {
+    value: T;
+    version: number;
+}
+
+export interface FaceDetection {
+    // box and landmarks is relative to image dimentions stored at mlFileData
+    box: Box;
+    landmarks?: Array<Landmark>;
+    probability?: number;
+}
+
+export interface DetectedFace {
+    fileId: number;
+    detection: FaceDetection;
+}
+
+export interface DetectedFaceWithId extends DetectedFace {
+    id: string;
+}
+
+export interface FaceCrop {
+    image: ImageBitmap;
+    // imageBox is relative to image dimentions stored at mlFileData
+    imageBox: Box;
+}
+
+export interface StoredFaceCrop {
+    cacheKey: string;
+    imageBox: Box;
+}
+
+export interface CroppedFace extends DetectedFaceWithId {
+    crop?: StoredFaceCrop;
+}
+
+export interface FaceAlignment {
+    // TODO: remove affine matrix as rotation, size and center
+    // are simple to store and use, affine matrix adds complexity while getting crop
+    affineMatrix: Array<Array<number>>;
+    rotation: number;
+    // size and center is relative to image dimentions stored at mlFileData
+    size: number;
+    center: Point;
+}
+
+export interface AlignedFace extends CroppedFace {
+    alignment?: FaceAlignment;
+    blurValue?: number;
+}
+
+export declare type FaceEmbedding = Float32Array;
+
+export interface FaceWithEmbedding extends AlignedFace {
+    embedding?: FaceEmbedding;
+}
+
+export interface Face extends FaceWithEmbedding {
+    personId?: number;
+}
+
+export interface Person {
+    id: number;
+    name?: string;
+    files: Array<number>;
+    displayFaceId?: string;
+    faceCropCacheKey?: string;
+}
+
+export interface MlFileData {
+    fileId: number;
+    faces?: Face[];
+    imageSource?: ImageType;
+    imageDimensions?: Dimensions;
+    faceDetectionMethod?: Versioned<FaceDetectionMethod>;
+    faceCropMethod?: Versioned<FaceCropMethod>;
+    faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
+    faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
+    mlVersion: number;
+    errorCount: number;
+    lastErrorMessage?: string;
+}
+
+export interface MLSearchConfig {
+    enabled: boolean;
+}
+
+export interface MLSyncFileContext {
+    enteFile: EnteFile;
+    localFile?: globalThis.File;
+
+    oldMlFile?: MlFileData;
+    newMlFile?: MlFileData;
+
+    imageBitmap?: ImageBitmap;
+
+    newDetection?: boolean;
+    newAlignment?: boolean;
+}
+
+export interface MLLibraryData {
+    faceClusteringMethod?: Versioned<ClusteringMethod>;
+    faceClusteringResults?: ClusterFacesResult;
+    faceClustersWithNoise?: FacesClustersWithNoise;
+}
+
+export declare type MLIndex = "files" | "people";

+ 0 - 60
web/apps/photos/src/services/machineLearning/arcfaceCropService.ts

@@ -1,60 +0,0 @@
-import { Box, enlargeBox } from "services/ml/geom";
-import {
-    FaceAlignment,
-    FaceCrop,
-    FaceCropConfig,
-    FaceCropMethod,
-    FaceCropService,
-    FaceDetection,
-    Versioned,
-} from "services/ml/types";
-import { cropWithRotation } from "utils/image";
-import { getArcfaceAlignment } from "./arcfaceAlignmentService";
-
-class ArcFaceCropService implements FaceCropService {
-    public method: Versioned<FaceCropMethod>;
-
-    constructor() {
-        this.method = {
-            value: "ArcFace",
-            version: 1,
-        };
-    }
-
-    public async getFaceCrop(
-        imageBitmap: ImageBitmap,
-        faceDetection: FaceDetection,
-        config: FaceCropConfig,
-    ): Promise<FaceCrop> {
-        const alignedFace = getArcfaceAlignment(faceDetection);
-        const faceCrop = getFaceCrop(imageBitmap, alignedFace, config);
-
-        return faceCrop;
-    }
-}
-
-export default new ArcFaceCropService();
-
-export function getFaceCrop(
-    imageBitmap: ImageBitmap,
-    alignment: FaceAlignment,
-    config: FaceCropConfig,
-): FaceCrop {
-    const alignmentBox = new Box({
-        x: alignment.center.x - alignment.size / 2,
-        y: alignment.center.y - alignment.size / 2,
-        width: alignment.size,
-        height: alignment.size,
-    }).round();
-    const scaleForPadding = 1 + config.padding * 2;
-    const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
-    const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
-        width: config.maxSize,
-        height: config.maxSize,
-    });
-
-    return {
-        image: faceImageBitmap,
-        imageBox: paddedBox,
-    };
-}

+ 0 - 88
web/apps/photos/src/services/machineLearning/clusteringService.ts

@@ -1,88 +0,0 @@
-import { DBSCAN, KMEANS, OPTICS } from "density-clustering";
-import { Hdbscan } from "hdbscan";
-import { HdbscanInput } from "hdbscan/dist/types";
-import {
-    ClusteringConfig,
-    ClusteringInput,
-    ClusteringMethod,
-    ClusteringResults,
-    HdbscanResults,
-    Versioned,
-} from "services/ml/types";
-
-class ClusteringService {
-    private dbscan: DBSCAN;
-    private optics: OPTICS;
-    private kmeans: KMEANS;
-
-    constructor() {
-        this.dbscan = new DBSCAN();
-        this.optics = new OPTICS();
-        this.kmeans = new KMEANS();
-    }
-
-    public clusterUsingDBSCAN(
-        dataset: Array<Array<number>>,
-        epsilon: number = 1.0,
-        minPts: number = 2,
-    ): ClusteringResults {
-        // log.info("distanceFunction", DBSCAN._);
-        const clusters = this.dbscan.run(dataset, epsilon, minPts);
-        const noise = this.dbscan.noise;
-        return { clusters, noise };
-    }
-
-    public clusterUsingOPTICS(
-        dataset: Array<Array<number>>,
-        epsilon: number = 1.0,
-        minPts: number = 2,
-    ) {
-        const clusters = this.optics.run(dataset, epsilon, minPts);
-        return { clusters, noise: [] };
-    }
-
-    public clusterUsingKMEANS(
-        dataset: Array<Array<number>>,
-        numClusters: number = 5,
-    ) {
-        const clusters = this.kmeans.run(dataset, numClusters);
-        return { clusters, noise: [] };
-    }
-
-    public clusterUsingHdbscan(hdbscanInput: HdbscanInput): HdbscanResults {
-        if (hdbscanInput.input.length < 10) {
-            throw Error("too few samples to run Hdbscan");
-        }
-
-        const hdbscan = new Hdbscan(hdbscanInput);
-        const clusters = hdbscan.getClusters();
-        const noise = hdbscan.getNoise();
-        const debugInfo = hdbscan.getDebugInfo();
-
-        return { clusters, noise, debugInfo };
-    }
-
-    public cluster(
-        method: Versioned<ClusteringMethod>,
-        input: ClusteringInput,
-        config: ClusteringConfig,
-    ) {
-        if (method.value === "Hdbscan") {
-            return this.clusterUsingHdbscan({
-                input,
-                minClusterSize: config.minClusterSize,
-                debug: config.generateDebugInfo,
-            });
-        } else if (method.value === "Dbscan") {
-            return this.clusterUsingDBSCAN(
-                input,
-                config.maxDistanceInsideCluster,
-                config.minClusterSize,
-            );
-        } else {
-            throw Error("Unknown clustering method: " + method.value);
-        }
-    }
-}
-
-export default ClusteringService;

+ 0 - 37
web/apps/photos/src/services/machineLearning/dbscanClusteringService.ts

@@ -1,37 +0,0 @@
-import { DBSCAN } from "density-clustering";
-import {
-    ClusteringConfig,
-    ClusteringInput,
-    ClusteringMethod,
-    ClusteringService,
-    HdbscanResults,
-    Versioned,
-} from "services/ml/types";
-
-class DbscanClusteringService implements ClusteringService {
-    public method: Versioned<ClusteringMethod>;
-
-    constructor() {
-        this.method = {
-            value: "Dbscan",
-            version: 1,
-        };
-    }
-
-    public async cluster(
-        input: ClusteringInput,
-        config: ClusteringConfig,
-    ): Promise<HdbscanResults> {
-        // log.info('Clustering input: ', input);
-        const dbscan = new DBSCAN();
-        const clusters = dbscan.run(
-            input,
-            config.clusterSelectionEpsilon,
-            config.minClusterSize,
-        );
-        const noise = dbscan.noise;
-        return { clusters, noise };
-    }
-}
-
-export default new DbscanClusteringService();

+ 0 - 359
web/apps/photos/src/services/machineLearning/faceService.ts

@@ -1,359 +0,0 @@
-import { openCache } from "@/next/blob-cache";
-import log from "@/next/log";
-import mlIDbStorage from "services/ml/db";
-import {
-    DetectedFace,
-    Face,
-    MLSyncContext,
-    MLSyncFileContext,
-    type FaceAlignment,
-    type Versioned,
-} from "services/ml/types";
-import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
-import ReaderService, {
-    fetchImageBitmap,
-    getFaceId,
-    getLocalFile,
-} from "./readerService";
-
-class FaceService {
-    async syncFileFaceDetections(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-    ) {
-        const { oldMlFile, newMlFile } = fileContext;
-        if (
-            !isDifferentOrOld(
-                oldMlFile?.faceDetectionMethod,
-                syncContext.faceDetectionService.method,
-            ) &&
-            oldMlFile?.imageSource === syncContext.config.imageSource
-        ) {
-            newMlFile.faces = oldMlFile?.faces?.map((existingFace) => ({
-                id: existingFace.id,
-                fileId: existingFace.fileId,
-                detection: existingFace.detection,
-            }));
-
-            newMlFile.imageSource = oldMlFile.imageSource;
-            newMlFile.imageDimensions = oldMlFile.imageDimensions;
-            newMlFile.faceDetectionMethod = oldMlFile.faceDetectionMethod;
-            return;
-        }
-
-        newMlFile.faceDetectionMethod = syncContext.faceDetectionService.method;
-        fileContext.newDetection = true;
-        const imageBitmap = await ReaderService.getImageBitmap(
-            syncContext,
-            fileContext,
-        );
-        const timerId = `faceDetection-${fileContext.enteFile.id}`;
-        console.time(timerId);
-        const faceDetections =
-            await syncContext.faceDetectionService.detectFaces(imageBitmap);
-        console.timeEnd(timerId);
-        console.log("faceDetections: ", faceDetections?.length);
-
-        // TODO: reenable faces filtering based on width
-        const detectedFaces = faceDetections?.map((detection) => {
-            return {
-                fileId: fileContext.enteFile.id,
-                detection,
-            } as DetectedFace;
-        });
-        newMlFile.faces = detectedFaces?.map((detectedFace) => ({
-            ...detectedFace,
-            id: getFaceId(detectedFace, newMlFile.imageDimensions),
-        }));
-        // ?.filter((f) =>
-        //     f.box.width > syncContext.config.faceDetection.minFaceSize
-        // );
-        log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
-    }
-
-    async syncFileFaceCrops(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-    ) {
-        const { oldMlFile, newMlFile } = fileContext;
-        if (
-            // !syncContext.config.faceCrop.enabled ||
-            !fileContext.newDetection &&
-            !isDifferentOrOld(
-                oldMlFile?.faceCropMethod,
-                syncContext.faceCropService.method,
-            ) &&
-            areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
-        ) {
-            for (const [index, face] of newMlFile.faces.entries()) {
-                face.crop = oldMlFile.faces[index].crop;
-            }
-            newMlFile.faceCropMethod = oldMlFile.faceCropMethod;
-            return;
-        }
-
-        const imageBitmap = await ReaderService.getImageBitmap(
-            syncContext,
-            fileContext,
-        );
-        newMlFile.faceCropMethod = syncContext.faceCropService.method;
-
-        for (const face of newMlFile.faces) {
-            await this.saveFaceCrop(imageBitmap, face, syncContext);
-        }
-    }
-
-    async syncFileFaceAlignments(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-    ): Promise<Float32Array> {
-        const { oldMlFile, newMlFile } = fileContext;
-        if (
-            !fileContext.newDetection &&
-            !isDifferentOrOld(
-                oldMlFile?.faceAlignmentMethod,
-                syncContext.faceAlignmentService.method,
-            ) &&
-            areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
-        ) {
-            for (const [index, face] of newMlFile.faces.entries()) {
-                face.alignment = oldMlFile.faces[index].alignment;
-            }
-            newMlFile.faceAlignmentMethod = oldMlFile.faceAlignmentMethod;
-            return;
-        }
-
-        newMlFile.faceAlignmentMethod = syncContext.faceAlignmentService.method;
-        fileContext.newAlignment = true;
-        const imageBitmap =
-            fileContext.imageBitmap ||
-            (await ReaderService.getImageBitmap(syncContext, fileContext));
-
-        // Execute the face alignment calculations
-        for (const face of newMlFile.faces) {
-            face.alignment = syncContext.faceAlignmentService.getFaceAlignment(
-                face.detection,
-            );
-        }
-        // Extract face images and convert to Float32Array
-        const faceAlignments = newMlFile.faces.map((f) => f.alignment);
-        const faceImages = await extractFaceImagesToFloat32(
-            faceAlignments,
-            syncContext.faceEmbeddingService.faceSize,
-            imageBitmap,
-        );
-        const blurValues = syncContext.blurDetectionService.detectBlur(
-            faceImages,
-            newMlFile.faces,
-        );
-        newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
-
-        imageBitmap.close();
-        log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
-
-        return faceImages;
-    }
-
-    async syncFileFaceEmbeddings(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-        alignedFacesInput: Float32Array,
-    ) {
-        const { oldMlFile, newMlFile } = fileContext;
-        if (
-            !fileContext.newAlignment &&
-            !isDifferentOrOld(
-                oldMlFile?.faceEmbeddingMethod,
-                syncContext.faceEmbeddingService.method,
-            ) &&
-            areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
-        ) {
-            for (const [index, face] of newMlFile.faces.entries()) {
-                face.embedding = oldMlFile.faces[index].embedding;
-            }
-            newMlFile.faceEmbeddingMethod = oldMlFile.faceEmbeddingMethod;
-            return;
-        }
-
-        newMlFile.faceEmbeddingMethod = syncContext.faceEmbeddingService.method;
-        // TODO: when not storing face crops, image will be needed to extract faces
-        // fileContext.imageBitmap ||
-        //     (await this.getImageBitmap(syncContext, fileContext));
-
-        const embeddings =
-            await syncContext.faceEmbeddingService.getFaceEmbeddings(
-                alignedFacesInput,
-            );
-        newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
-
-        log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
-    }
-
-    async syncFileFaceMakeRelativeDetections(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-    ) {
-        const { oldMlFile, newMlFile } = fileContext;
-        if (
-            !fileContext.newAlignment &&
-            !isDifferentOrOld(
-                oldMlFile?.faceEmbeddingMethod,
-                syncContext.faceEmbeddingService.method,
-            ) &&
-            areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
-        ) {
-            return;
-        }
-        for (let i = 0; i < newMlFile.faces.length; i++) {
-            const face = newMlFile.faces[i];
-            if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
-            face.detection =
-                syncContext.faceDetectionService.getRelativeDetection(
-                    face.detection,
-                    newMlFile.imageDimensions,
-                );
-        }
-    }
-
-    async saveFaceCrop(
-        imageBitmap: ImageBitmap,
-        face: Face,
-        syncContext: MLSyncContext,
-    ) {
-        const faceCrop = await syncContext.faceCropService.getFaceCrop(
-            imageBitmap,
-            face.detection,
-            syncContext.config.faceCrop,
-        );
-
-        const blobOptions = syncContext.config.faceCrop.blobOptions;
-        const blob = await imageBitmapToBlob(faceCrop.image, blobOptions);
-
-        const cache = await openCache("face-crops");
-        await cache.put(face.id, blob);
-
-        faceCrop.image.close();
-
-        return blob;
-    }
-
-    async getAllSyncedFacesMap(syncContext: MLSyncContext) {
-        if (syncContext.allSyncedFacesMap) {
-            return syncContext.allSyncedFacesMap;
-        }
-
-        syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap();
-        return syncContext.allSyncedFacesMap;
-    }
-
-    public async runFaceClustering(
-        syncContext: MLSyncContext,
-        allFaces: Array<Face>,
-    ) {
-        // await this.init();
-
-        const clusteringConfig = syncContext.config.faceClustering;
-
-        if (!allFaces || allFaces.length < clusteringConfig.minInputSize) {
-            log.info(
-                "[MLService] Too few faces to cluster, not running clustering: ",
-                allFaces.length,
-            );
-            return;
-        }
-
-        log.info("Running clustering allFaces: ", allFaces.length);
-        syncContext.mlLibraryData.faceClusteringResults =
-            await syncContext.faceClusteringService.cluster(
-                allFaces.map((f) => Array.from(f.embedding)),
-                syncContext.config.faceClustering,
-            );
-        syncContext.mlLibraryData.faceClusteringMethod =
-            syncContext.faceClusteringService.method;
-        log.info(
-            "[MLService] Got face clustering results: ",
-            JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
-        );
-
-        // syncContext.faceClustersWithNoise = {
-        //     clusters: syncContext.faceClusteringResults.clusters.map(
-        //         (faces) => ({
-        //             faces,
-        //         })
-        //     ),
-        //     noise: syncContext.faceClusteringResults.noise,
-        // };
-    }
-
-    public async regenerateFaceCrop(
-        syncContext: MLSyncContext,
-        faceID: string,
-    ) {
-        const fileID = Number(faceID.split("-")[0]);
-        const personFace = await mlIDbStorage.getFace(fileID, faceID);
-        if (!personFace) {
-            throw Error("Face not found");
-        }
-
-        const file = await getLocalFile(personFace.fileId);
-        const imageBitmap = await fetchImageBitmap(file);
-        return await this.saveFaceCrop(imageBitmap, personFace, syncContext);
-    }
-}
-
-export default new FaceService();
-
-export function areFaceIdsSame(ofFaces: Array<Face>, toFaces: Array<Face>) {
-    if (
-        (ofFaces === null || ofFaces === undefined) &&
-        (toFaces === null || toFaces === undefined)
-    ) {
-        return true;
-    }
-    return primitiveArrayEquals(
-        ofFaces?.map((f) => f.id),
-        toFaces?.map((f) => f.id),
-    );
-}
-
-function primitiveArrayEquals(a, b) {
-    return (
-        Array.isArray(a) &&
-        Array.isArray(b) &&
-        a.length === b.length &&
-        a.every((val, index) => val === b[index])
-    );
-}
-
-export function isDifferentOrOld(
-    method: Versioned<string>,
-    thanMethod: Versioned<string>,
-) {
-    return (
-        !method ||
-        method.value !== thanMethod.value ||
-        method.version < thanMethod.version
-    );
-}
-
-async function extractFaceImagesToFloat32(
-    faceAlignments: Array<FaceAlignment>,
-    faceSize: number,
-    image: ImageBitmap,
-): Promise<Float32Array> {
-    const faceData = new Float32Array(
-        faceAlignments.length * faceSize * faceSize * 3,
-    );
-    for (let i = 0; i < faceAlignments.length; i++) {
-        const alignedFace = faceAlignments[i];
-        const faceDataOffset = i * faceSize * faceSize * 3;
-        warpAffineFloat32List(
-            image,
-            alignedFace,
-            faceSize,
-            faceData,
-            faceDataOffset,
-        );
-    }
-    return faceData;
-}

+ 0 - 44
web/apps/photos/src/services/machineLearning/hdbscanClusteringService.ts

@@ -1,44 +0,0 @@
-import { Hdbscan } from "hdbscan";
-import {
-    ClusteringConfig,
-    ClusteringInput,
-    ClusteringMethod,
-    ClusteringService,
-    HdbscanResults,
-    Versioned,
-} from "services/ml/types";
-
-class HdbscanClusteringService implements ClusteringService {
-    public method: Versioned<ClusteringMethod>;
-
-    constructor() {
-        this.method = {
-            value: "Hdbscan",
-            version: 1,
-        };
-    }
-
-    public async cluster(
-        input: ClusteringInput,
-        config: ClusteringConfig,
-    ): Promise<HdbscanResults> {
-        // log.info('Clustering input: ', input);
-        const hdbscan = new Hdbscan({
-            input,
-
-            minClusterSize: config.minClusterSize,
-            minSamples: config.minSamples,
-            clusterSelectionEpsilon: config.clusterSelectionEpsilon,
-            clusterSelectionMethod: config.clusterSelectionMethod,
-            debug: config.generateDebugInfo,
-        });
-
-        return {
-            clusters: hdbscan.getClusters(),
-            noise: hdbscan.getNoise(),
-            debugInfo: hdbscan.getDebugInfo(),
-        };
-    }
-}
-
-export default new HdbscanClusteringService();

+ 0 - 211
web/apps/photos/src/services/machineLearning/laplacianBlurDetectionService.ts

@@ -1,211 +0,0 @@
-import {
-    BlurDetectionMethod,
-    BlurDetectionService,
-    Face,
-    Versioned,
-} from "services/ml/types";
-import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
-import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
-
-class LaplacianBlurDetectionService implements BlurDetectionService {
-    public method: Versioned<BlurDetectionMethod>;
-
-    public constructor() {
-        this.method = {
-            value: "Laplacian",
-            version: 1,
-        };
-    }
-
-    public detectBlur(alignedFaces: Float32Array, faces: Face[]): number[] {
-        const numFaces = Math.round(
-            alignedFaces.length /
-                (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
-        );
-        const blurValues: number[] = [];
-        for (let i = 0; i < numFaces; i++) {
-            const face = faces[i];
-            const direction = getFaceDirection(face);
-            const faceImage = createGrayscaleIntMatrixFromNormalized2List(
-                alignedFaces,
-                i,
-            );
-            const laplacian = this.applyLaplacian(faceImage, direction);
-            const variance = this.calculateVariance(laplacian);
-            blurValues.push(variance);
-        }
-        return blurValues;
-    }
-
-    private calculateVariance(matrix: number[][]): number {
-        const numRows = matrix.length;
-        const numCols = matrix[0].length;
-        const totalElements = numRows * numCols;
-
-        // Calculate the mean
-        let mean: number = 0;
-        matrix.forEach((row) => {
-            row.forEach((value) => {
-                mean += value;
-            });
-        });
-        mean /= totalElements;
-
-        // Calculate the variance
-        let variance: number = 0;
-        matrix.forEach((row) => {
-            row.forEach((value) => {
-                const diff: number = value - mean;
-                variance += diff * diff;
-            });
-        });
-        variance /= totalElements;
-
-        return variance;
-    }
-
-    private padImage(
-        image: number[][],
-        removeSideColumns: number = 56,
-        direction: FaceDirection = "straight",
-    ): number[][] {
-        // Exception is removeSideColumns is not even
-        if (removeSideColumns % 2 != 0) {
-            throw new Error("removeSideColumns must be even");
-        }
-        const numRows = image.length;
-        const numCols = image[0].length;
-        const paddedNumCols = numCols + 2 - removeSideColumns;
-        const paddedNumRows = numRows + 2;
-
-        // Create a new matrix with extra padding
-        const paddedImage: number[][] = Array.from(
-            { length: paddedNumRows },
-            () => new Array(paddedNumCols).fill(0),
-        );
-
-        // Copy original image into the center of the padded image
-        if (direction === "straight") {
-            for (let i = 0; i < numRows; i++) {
-                for (let j = 0; j < paddedNumCols - 2; j++) {
-                    paddedImage[i + 1][j + 1] =
-                        image[i][j + Math.round(removeSideColumns / 2)];
-                }
-            }
-        } // If the face is facing left, we only take the right side of the face image
-        else if (direction === "left") {
-            for (let i = 0; i < numRows; i++) {
-                for (let j = 0; j < paddedNumCols - 2; j++) {
-                    paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
-                }
-            }
-        } // If the face is facing right, we only take the left side of the face image
-        else if (direction === "right") {
-            for (let i = 0; i < numRows; i++) {
-                for (let j = 0; j < paddedNumCols - 2; j++) {
-                    paddedImage[i + 1][j + 1] = image[i][j];
-                }
-            }
-        }
-
-        // Reflect padding
-        // Top and bottom rows
-        for (let j = 1; j <= paddedNumCols - 2; j++) {
-            paddedImage[0][j] = paddedImage[2][j]; // Top row
-            paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
-        }
-        // Left and right columns
-        for (let i = 0; i < numRows + 2; i++) {
-            paddedImage[i][0] = paddedImage[i][2]; // Left column
-            paddedImage[i][paddedNumCols - 1] =
-                paddedImage[i][paddedNumCols - 3]; // Right column
-        }
-
-        return paddedImage;
-    }
-
-    private applyLaplacian(
-        image: number[][],
-        direction: FaceDirection = "straight",
-    ): number[][] {
-        const paddedImage: number[][] = this.padImage(
-            image,
-            undefined,
-            direction,
-        );
-        const numRows = paddedImage.length - 2;
-        const numCols = paddedImage[0].length - 2;
-
-        // Create an output image initialized to 0
-        const outputImage: number[][] = Array.from({ length: numRows }, () =>
-            new Array(numCols).fill(0),
-        );
-
-        // Define the Laplacian kernel
-        const kernel: number[][] = [
-            [0, 1, 0],
-            [1, -4, 1],
-            [0, 1, 0],
-        ];
-
-        // Apply the kernel to each pixel
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < numCols; j++) {
-                let sum = 0;
-                for (let ki = 0; ki < 3; ki++) {
-                    for (let kj = 0; kj < 3; kj++) {
-                        sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
-                    }
-                }
-                // Adjust the output value if necessary (e.g., clipping)
-                outputImage[i][j] = sum;
-            }
-        }
-
-        return outputImage;
-    }
-}
-
-export default new LaplacianBlurDetectionService();
-
-type FaceDirection = "left" | "right" | "straight";
-
-const getFaceDirection = (face: Face): FaceDirection => {
-    const landmarks = face.detection.landmarks;
-    const leftEye = landmarks[0];
-    const rightEye = landmarks[1];
-    const nose = landmarks[2];
-    const leftMouth = landmarks[3];
-    const rightMouth = landmarks[4];
-
-    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
-    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
-    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
-
-    const faceIsUpright =
-        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
-        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
-
-    const noseStickingOutLeft =
-        nose.x < Math.min(leftEye.x, rightEye.x) &&
-        nose.x < Math.min(leftMouth.x, rightMouth.x);
-
-    const noseStickingOutRight =
-        nose.x > Math.max(leftEye.x, rightEye.x) &&
-        nose.x > Math.max(leftMouth.x, rightMouth.x);
-
-    const noseCloseToLeftEye =
-        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
-    const noseCloseToRightEye =
-        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
-
-    // if (faceIsUpright && (noseStickingOutLeft || noseCloseToLeftEye)) {
-    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
-        return "left";
-        // } else if (faceIsUpright && (noseStickingOutRight || noseCloseToRightEye)) {
-    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
-        return "right";
-    }
-
-    return "straight";
-};

+ 62 - 278
web/apps/photos/src/services/machineLearning/machineLearningService.ts

@@ -1,107 +1,44 @@
 import { haveWindow } from "@/next/env";
 import log from "@/next/log";
 import { ComlinkWorker } from "@/next/worker/comlink-worker";
-import { APPS } from "@ente/shared/apps/constants";
 import ComlinkCryptoWorker, {
     getDedicatedCryptoWorker,
 } from "@ente/shared/crypto";
 import { DedicatedCryptoWorker } from "@ente/shared/crypto/internal/crypto.worker";
 import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
 import PQueue from "p-queue";
-import downloadManager from "services/download";
 import { putEmbedding } from "services/embeddingService";
-import { getLocalFiles } from "services/fileService";
-import mlIDbStorage, {
-    ML_SEARCH_CONFIG_NAME,
-    ML_SYNC_CONFIG_NAME,
-} from "services/ml/db";
+import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
 import {
-    BlurDetectionMethod,
-    BlurDetectionService,
-    ClusteringMethod,
-    ClusteringService,
     Face,
-    FaceAlignmentMethod,
-    FaceAlignmentService,
-    FaceCropMethod,
-    FaceCropService,
     FaceDetection,
-    FaceDetectionMethod,
-    FaceDetectionService,
-    FaceEmbeddingMethod,
-    FaceEmbeddingService,
     Landmark,
     MLLibraryData,
     MLSearchConfig,
-    MLSyncConfig,
-    MLSyncContext,
     MLSyncFileContext,
     MLSyncResult,
     MlFileData,
-} from "services/ml/types";
+} from "services/face/types";
+import { getLocalFiles } from "services/fileService";
 import { EnteFile } from "types/file";
 import { isInternalUserForML } from "utils/user";
-import arcfaceAlignmentService from "./arcfaceAlignmentService";
-import arcfaceCropService from "./arcfaceCropService";
-import dbscanClusteringService from "./dbscanClusteringService";
-import FaceService from "./faceService";
-import hdbscanClusteringService from "./hdbscanClusteringService";
-import laplacianBlurDetectionService from "./laplacianBlurDetectionService";
-import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
-import PeopleService from "./peopleService";
-import ReaderService from "./readerService";
-import yoloFaceDetectionService from "./yoloFaceDetectionService";
-
-export const DEFAULT_ML_SYNC_CONFIG: MLSyncConfig = {
-    batchSize: 200,
-    imageSource: "Original",
-    faceDetection: {
-        method: "YoloFace",
-    },
-    faceCrop: {
-        enabled: true,
-        method: "ArcFace",
-        padding: 0.25,
-        maxSize: 256,
-        blobOptions: {
-            type: "image/jpeg",
-            quality: 0.8,
-        },
-    },
-    faceAlignment: {
-        method: "ArcFace",
-    },
-    blurDetection: {
-        method: "Laplacian",
-        threshold: 15,
-    },
-    faceEmbedding: {
-        method: "MobileFaceNet",
-        faceSize: 112,
-        generateTsne: true,
-    },
-    faceClustering: {
-        method: "Hdbscan",
-        minClusterSize: 3,
-        minSamples: 5,
-        clusterSelectionEpsilon: 0.6,
-        clusterSelectionMethod: "leaf",
-        minInputSize: 50,
-        // maxDistanceInsideCluster: 0.4,
-        generateDebugInfo: true,
-    },
-    mlVersion: 3,
-};
+import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
+import { fetchImageBitmapForContext } from "../face/image";
+import { syncPeopleIndex } from "../face/people";
 
-export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = {
-    enabled: false,
-};
+/**
+ * TODO-ML(MR): What and why.
+ * Also, needs to be 1 (in sync with mobile) when we move out of beta.
+ */
+export const defaultMLVersion = 3;
+
+const batchSize = 200;
 
 export const MAX_ML_SYNC_ERROR_COUNT = 1;
 
-export async function getMLSyncConfig() {
-    return mlIDbStorage.getConfig(ML_SYNC_CONFIG_NAME, DEFAULT_ML_SYNC_CONFIG);
-}
+export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = {
+    enabled: false,
+};
 
 export async function getMLSearchConfig() {
     if (isInternalUserForML()) {
@@ -119,95 +56,30 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
     return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
 }
 
-export class MLFactory {
-    public static getFaceDetectionService(
-        method: FaceDetectionMethod,
-    ): FaceDetectionService {
-        if (method === "YoloFace") {
-            return yoloFaceDetectionService;
-        }
-
-        throw Error("Unknon face detection method: " + method);
-    }
-
-    public static getFaceCropService(method: FaceCropMethod) {
-        if (method === "ArcFace") {
-            return arcfaceCropService;
-        }
-
-        throw Error("Unknon face crop method: " + method);
-    }
-
-    public static getFaceAlignmentService(
-        method: FaceAlignmentMethod,
-    ): FaceAlignmentService {
-        if (method === "ArcFace") {
-            return arcfaceAlignmentService;
-        }
-
-        throw Error("Unknon face alignment method: " + method);
-    }
-
-    public static getBlurDetectionService(
-        method: BlurDetectionMethod,
-    ): BlurDetectionService {
-        if (method === "Laplacian") {
-            return laplacianBlurDetectionService;
-        }
-
-        throw Error("Unknon blur detection method: " + method);
-    }
+export interface MLSyncContext {
+    token: string;
+    userID: number;
 
-    public static getFaceEmbeddingService(
-        method: FaceEmbeddingMethod,
-    ): FaceEmbeddingService {
-        if (method === "MobileFaceNet") {
-            return mobileFaceNetEmbeddingService;
-        }
+    localFilesMap: Map<number, EnteFile>;
+    outOfSyncFiles: EnteFile[];
+    nSyncedFiles: number;
+    nSyncedFaces: number;
+    allSyncedFacesMap?: Map<number, Array<Face>>;
 
-        throw Error("Unknon face embedding method: " + method);
-    }
+    error?: Error;
 
-    public static getClusteringService(
-        method: ClusteringMethod,
-    ): ClusteringService {
-        if (method === "Hdbscan") {
-            return hdbscanClusteringService;
-        }
-        if (method === "Dbscan") {
-            return dbscanClusteringService;
-        }
+    // oldMLLibraryData: MLLibraryData;
+    mlLibraryData: MLLibraryData;
 
-        throw Error("Unknon clustering method: " + method);
-    }
+    syncQueue: PQueue;
 
-    public static getMLSyncContext(
-        token: string,
-        userID: number,
-        config: MLSyncConfig,
-        shouldUpdateMLVersion: boolean = true,
-    ) {
-        return new LocalMLSyncContext(
-            token,
-            userID,
-            config,
-            shouldUpdateMLVersion,
-        );
-    }
+    getEnteWorker(id: number): Promise<any>;
+    dispose(): Promise<void>;
 }
 
 export class LocalMLSyncContext implements MLSyncContext {
     public token: string;
     public userID: number;
-    public config: MLSyncConfig;
-    public shouldUpdateMLVersion: boolean;
-
-    public faceDetectionService: FaceDetectionService;
-    public faceCropService: FaceCropService;
-    public faceAlignmentService: FaceAlignmentService;
-    public blurDetectionService: BlurDetectionService;
-    public faceEmbeddingService: FaceEmbeddingService;
-    public faceClusteringService: ClusteringService;
 
     public localFilesMap: Map<number, EnteFile>;
     public outOfSyncFiles: EnteFile[];
@@ -229,36 +101,9 @@ export class LocalMLSyncContext implements MLSyncContext {
     >;
     private enteWorkers: Array<any>;
 
-    constructor(
-        token: string,
-        userID: number,
-        config: MLSyncConfig,
-        shouldUpdateMLVersion: boolean = true,
-        concurrency?: number,
-    ) {
+    constructor(token: string, userID: number, concurrency?: number) {
         this.token = token;
         this.userID = userID;
-        this.config = config;
-        this.shouldUpdateMLVersion = shouldUpdateMLVersion;
-
-        this.faceDetectionService = MLFactory.getFaceDetectionService(
-            this.config.faceDetection.method,
-        );
-        this.faceCropService = MLFactory.getFaceCropService(
-            this.config.faceCrop.method,
-        );
-        this.faceAlignmentService = MLFactory.getFaceAlignmentService(
-            this.config.faceAlignment.method,
-        );
-        this.blurDetectionService = MLFactory.getBlurDetectionService(
-            this.config.blurDetection.method,
-        );
-        this.faceEmbeddingService = MLFactory.getFaceEmbeddingService(
-            this.config.faceEmbedding.method,
-        );
-        this.faceClusteringService = MLFactory.getClusteringService(
-            this.config.faceClustering.method,
-        );
 
         this.outOfSyncFiles = [];
         this.nSyncedFiles = 0;
@@ -311,8 +156,6 @@ class MachineLearningService {
             throw Error("Token needed by ml service to sync file");
         }
 
-        await downloadManager.init(APPS.PHOTOS, { token });
-
         const syncContext = await this.getSyncContext(token, userID);
 
         await this.syncLocalFiles(syncContext);
@@ -323,12 +166,10 @@ class MachineLearningService {
             await this.syncFiles(syncContext);
         }
 
-        // TODO: running index before all files are on latest ml version
-        // may be need to just take synced files on latest ml version for indexing
         if (
             syncContext.outOfSyncFiles.length <= 0 ||
-            (syncContext.nSyncedFiles === syncContext.config.batchSize &&
-                Math.random() < 0.2)
+            // TODO-ML(MR): Forced disable.
+            (syncContext.nSyncedFiles === batchSize && Math.random() < 0)
         ) {
             await this.syncIndex(syncContext);
         }
@@ -349,14 +190,8 @@ class MachineLearningService {
         return mlSyncResult;
     }
 
-    public async regenerateFaceCrop(
-        token: string,
-        userID: number,
-        faceID: string,
-    ) {
-        await downloadManager.init(APPS.PHOTOS, { token });
-        const syncContext = await this.getSyncContext(token, userID);
-        return FaceService.regenerateFaceCrop(syncContext, faceID);
+    public async regenerateFaceCrop(faceID: string) {
+        return regenerateFaceCrop(faceID);
     }
 
     private newMlData(fileId: number) {
@@ -434,8 +269,8 @@ class MachineLearningService {
     private async getOutOfSyncFiles(syncContext: MLSyncContext) {
         const startTime = Date.now();
         const fileIds = await mlIDbStorage.getFileIds(
-            syncContext.config.batchSize,
-            syncContext.config.mlVersion,
+            batchSize,
+            defaultMLVersion,
             MAX_ML_SYNC_ERROR_COUNT,
         );
 
@@ -481,9 +316,10 @@ class MachineLearningService {
         if (!this.syncContext) {
             log.info("Creating syncContext");
 
-            this.syncContext = getMLSyncConfig().then((mlSyncConfig) =>
-                MLFactory.getMLSyncContext(token, userID, mlSyncConfig, true),
-            );
+            // TODO-ML(MR): Keep as promise for now.
+            this.syncContext = new Promise((resolve) => {
+                resolve(new LocalMLSyncContext(token, userID));
+            });
         } else {
             log.info("reusing existing syncContext");
         }
@@ -491,11 +327,13 @@ class MachineLearningService {
     }
 
     private async getLocalSyncContext(token: string, userID: number) {
+        // TODO-ML(MR): This is updating the file ML version. verify.
         if (!this.localSyncContext) {
             log.info("Creating localSyncContext");
-            this.localSyncContext = getMLSyncConfig().then((mlSyncConfig) =>
-                MLFactory.getMLSyncContext(token, userID, mlSyncConfig, false),
-            );
+            // TODO-ML(MR):
+            this.localSyncContext = new Promise((resolve) => {
+                resolve(new LocalMLSyncContext(token, userID));
+            });
         } else {
             log.info("reusing existing localSyncContext");
         }
@@ -516,24 +354,22 @@ class MachineLearningService {
         userID: number,
         enteFile: EnteFile,
         localFile?: globalThis.File,
-    ): Promise<MlFileData | Error> {
+    ) {
         const syncContext = await this.getLocalSyncContext(token, userID);
 
         try {
-            const mlFileData = await this.syncFileWithErrorHandler(
+            await this.syncFileWithErrorHandler(
                 syncContext,
                 enteFile,
                 localFile,
             );
 
-            if (syncContext.nSyncedFiles >= syncContext.config.batchSize) {
+            if (syncContext.nSyncedFiles >= batchSize) {
                 await this.closeLocalSyncContext();
             }
             // await syncContext.dispose();
-            return mlFileData;
         } catch (e) {
             console.error("Error while syncing local file: ", enteFile.id, e);
-            return e;
         }
     }
 
@@ -541,16 +377,12 @@ class MachineLearningService {
         syncContext: MLSyncContext,
         enteFile: EnteFile,
         localFile?: globalThis.File,
-    ): Promise<MlFileData> {
+    ) {
         try {
             console.log(
                 `Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
             );
-            const mlFileData = await this.syncFile(
-                syncContext,
-                enteFile,
-                localFile,
-            );
+            const mlFileData = await this.syncFile(enteFile, localFile);
             syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
             syncContext.nSyncedFiles += 1;
             return mlFileData;
@@ -583,35 +415,20 @@ class MachineLearningService {
         }
     }
 
-    private async syncFile(
-        syncContext: MLSyncContext,
-        enteFile: EnteFile,
-        localFile?: globalThis.File,
-    ) {
-        console.log("Syncing for file" + enteFile.title);
+    private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
+        log.debug(() => ({ a: "Syncing file", enteFile }));
         const fileContext: MLSyncFileContext = { enteFile, localFile };
-        const oldMlFile =
-            (fileContext.oldMlFile = await this.getMLFileData(enteFile.id)) ??
-            this.newMlData(enteFile.id);
-        if (
-            fileContext.oldMlFile?.mlVersion === syncContext.config.mlVersion
-            // TODO: reset mlversion of all files when user changes image source
-        ) {
-            return fileContext.oldMlFile;
+        const oldMlFile = await this.getMLFileData(enteFile.id);
+        if (oldMlFile && oldMlFile.mlVersion) {
+            return oldMlFile;
         }
-        const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
 
-        if (syncContext.shouldUpdateMLVersion) {
-            newMlFile.mlVersion = syncContext.config.mlVersion;
-        } else if (fileContext.oldMlFile?.mlVersion) {
-            newMlFile.mlVersion = fileContext.oldMlFile.mlVersion;
-        }
+        const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
+        newMlFile.mlVersion = defaultMLVersion;
 
         try {
-            await ReaderService.getImageBitmap(syncContext, fileContext);
-            await Promise.all([
-                this.syncFileAnalyzeFaces(syncContext, fileContext),
-            ]);
+            await fetchImageBitmapForContext(fileContext);
+            await syncFileAnalyzeFaces(fileContext);
             newMlFile.errorCount = 0;
             newMlFile.lastErrorMessage = undefined;
             await this.persistOnServer(newMlFile, enteFile);
@@ -685,44 +502,11 @@ class MachineLearningService {
     public async syncIndex(syncContext: MLSyncContext) {
         await this.getMLLibraryData(syncContext);
 
-        await PeopleService.syncPeopleIndex(syncContext);
+        // TODO-ML(MR): Ensure this doesn't run until fixed.
+        await syncPeopleIndex(syncContext);
 
         await this.persistMLLibraryData(syncContext);
     }
-
-    private async syncFileAnalyzeFaces(
-        syncContext: MLSyncContext,
-        fileContext: MLSyncFileContext,
-    ) {
-        const { newMlFile } = fileContext;
-        const startTime = Date.now();
-        await FaceService.syncFileFaceDetections(syncContext, fileContext);
-
-        if (newMlFile.faces && newMlFile.faces.length > 0) {
-            await FaceService.syncFileFaceCrops(syncContext, fileContext);
-
-            const alignedFacesData = await FaceService.syncFileFaceAlignments(
-                syncContext,
-                fileContext,
-            );
-
-            await FaceService.syncFileFaceEmbeddings(
-                syncContext,
-                fileContext,
-                alignedFacesData,
-            );
-
-            await FaceService.syncFileFaceMakeRelativeDetections(
-                syncContext,
-                fileContext,
-            );
-        }
-        log.info(
-            `face detection time taken ${fileContext.enteFile.id}`,
-            Date.now() - startTime,
-            "ms",
-        );
-    }
 }
 
 export default new MachineLearningService();

+ 10 - 25
web/apps/photos/src/services/machineLearning/mlWorkManager.ts

@@ -5,24 +5,15 @@ import { eventBus, Events } from "@ente/shared/events";
 import { getToken, getUserID } from "@ente/shared/storage/localStorage/helpers";
 import debounce from "debounce";
 import PQueue from "p-queue";
-import mlIDbStorage from "services/ml/db";
-import { createFaceComlinkWorker } from "services/ml/face";
-import type { DedicatedMLWorker } from "services/ml/face.worker";
-import { MLSyncResult } from "services/ml/types";
+import { createFaceComlinkWorker } from "services/face";
+import mlIDbStorage from "services/face/db";
+import type { DedicatedMLWorker } from "services/face/face.worker";
+import { MLSyncResult } from "services/face/types";
 import { EnteFile } from "types/file";
 import { logQueueStats } from "./machineLearningService";
 
-const LIVE_SYNC_IDLE_DEBOUNCE_SEC = 30;
-const LIVE_SYNC_QUEUE_TIMEOUT_SEC = 300;
-const LOCAL_FILES_UPDATED_DEBOUNCE_SEC = 30;
-
 export type JobState = "Scheduled" | "Running" | "NotScheduled";
 
-export interface JobConfig {
-    intervalSec: number;
-    backoffMultiplier: number;
-}
-
 export interface MLSyncJobResult {
     shouldBackoff: boolean;
     mlSyncResult: MLSyncResult;
@@ -118,18 +109,18 @@ class MLWorkManager {
         this.liveSyncQueue = new PQueue({
             concurrency: 1,
             // TODO: temp, remove
-            timeout: LIVE_SYNC_QUEUE_TIMEOUT_SEC * 1000,
+            timeout: 300 * 1000,
             throwOnTimeout: true,
         });
         this.mlSearchEnabled = false;
 
         this.debouncedLiveSyncIdle = debounce(
             () => this.onLiveSyncIdle(),
-            LIVE_SYNC_IDLE_DEBOUNCE_SEC * 1000,
+            30 * 1000,
         );
         this.debouncedFilesUpdated = debounce(
             () => this.mlSearchEnabled && this.localFilesUpdatedHandler(),
-            LOCAL_FILES_UPDATED_DEBOUNCE_SEC * 1000,
+            30 * 1000,
         );
     }
 
@@ -241,19 +232,13 @@ class MLWorkManager {
     }
 
     public async syncLocalFile(enteFile: EnteFile, localFile: globalThis.File) {
-        const result = await this.liveSyncQueue.add(async () => {
+        await this.liveSyncQueue.add(async () => {
             this.stopSyncJob();
             const token = getToken();
             const userID = getUserID();
             const mlWorker = await this.getLiveSyncWorker();
             return mlWorker.syncLocalFile(token, userID, enteFile, localFile);
         });
-
-        if (result instanceof Error) {
-            // TODO: redirect/refresh to gallery in case of session_expired
-            // may not be required as uploader should anyways take care of this
-            console.error("Error while syncing local file: ", result);
-        }
     }
 
     // Sync Job
@@ -326,11 +311,11 @@ class MLWorkManager {
         }
     }
 
-    public stopSyncJob(terminateWorker: boolean = true) {
+    public stopSyncJob() {
         try {
             log.info("MLWorkManager.stopSyncJob");
             this.mlSyncJob?.stop();
-            terminateWorker && this.terminateSyncJobWorker();
+            this.terminateSyncJobWorker();
         } catch (e) {
             log.error("Failed to stop MLSync Job", e);
         }

+ 0 - 41
web/apps/photos/src/services/machineLearning/mobileFaceNetEmbeddingService.ts

@@ -1,41 +0,0 @@
-import { workerBridge } from "@/next/worker/worker-bridge";
-import {
-    FaceEmbedding,
-    FaceEmbeddingMethod,
-    FaceEmbeddingService,
-    Versioned,
-} from "services/ml/types";
-
-export const mobileFaceNetFaceSize = 112;
-
-class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
-    public method: Versioned<FaceEmbeddingMethod>;
-    public faceSize: number;
-
-    public constructor() {
-        this.method = {
-            value: "MobileFaceNet",
-            version: 2,
-        };
-        this.faceSize = mobileFaceNetFaceSize;
-    }
-
-    public async getFaceEmbeddings(
-        faceData: Float32Array,
-    ): Promise<Array<FaceEmbedding>> {
-        const outputData = await workerBridge.faceEmbedding(faceData);
-
-        const embeddingSize = 192;
-        const embeddings = new Array<FaceEmbedding>(
-            outputData.length / embeddingSize,
-        );
-        for (let i = 0; i < embeddings.length; i++) {
-            embeddings[i] = new Float32Array(
-                outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
-            );
-        }
-        return embeddings;
-    }
-}
-
-export default new MobileFaceNetEmbeddingService();

+ 0 - 113
web/apps/photos/src/services/machineLearning/peopleService.ts

@@ -1,113 +0,0 @@
-import log from "@/next/log";
-import mlIDbStorage from "services/ml/db";
-import { Face, MLSyncContext, Person } from "services/ml/types";
-import FaceService, { isDifferentOrOld } from "./faceService";
-import { fetchImageBitmap, getLocalFile } from "./readerService";
-
-class PeopleService {
-    async syncPeopleIndex(syncContext: MLSyncContext) {
-        const filesVersion = await mlIDbStorage.getIndexVersion("files");
-        if (
-            filesVersion <= (await mlIDbStorage.getIndexVersion("people")) &&
-            !isDifferentOrOld(
-                syncContext.mlLibraryData?.faceClusteringMethod,
-                syncContext.faceClusteringService.method,
-            )
-        ) {
-            log.info(
-                "[MLService] Skipping people index as already synced to latest version",
-            );
-            return;
-        }
-
-        // TODO: have faces addresable through fileId + faceId
-        // to avoid index based addressing, which is prone to wrong results
-        // one way could be to match nearest face within threshold in the file
-        const allFacesMap = await FaceService.getAllSyncedFacesMap(syncContext);
-        const allFaces = getAllFacesFromMap(allFacesMap);
-
-        await FaceService.runFaceClustering(syncContext, allFaces);
-        await this.syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
-
-        await mlIDbStorage.setIndexVersion("people", filesVersion);
-    }
-
-    private async syncPeopleFromClusters(
-        syncContext: MLSyncContext,
-        allFacesMap: Map<number, Array<Face>>,
-        allFaces: Array<Face>,
-    ) {
-        const clusters =
-            syncContext.mlLibraryData.faceClusteringResults?.clusters;
-        if (!clusters || clusters.length < 1) {
-            return;
-        }
-
-        for (const face of allFaces) {
-            face.personId = undefined;
-        }
-        await mlIDbStorage.clearAllPeople();
-        for (const [index, cluster] of clusters.entries()) {
-            const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
-
-            // TODO: take default display face from last leaves of hdbscan clusters
-            const personFace = findFirstIfSorted(
-                faces,
-                (a, b) => b.detection.probability - a.detection.probability,
-            );
-
-            if (personFace && !personFace.crop?.cacheKey) {
-                const file = await getLocalFile(personFace.fileId);
-                const imageBitmap = await fetchImageBitmap(file);
-                await FaceService.saveFaceCrop(
-                    imageBitmap,
-                    personFace,
-                    syncContext,
-                );
-            }
-
-            const person: Person = {
-                id: index,
-                files: faces.map((f) => f.fileId),
-                displayFaceId: personFace?.id,
-                faceCropCacheKey: personFace?.crop?.cacheKey,
-            };
-
-            await mlIDbStorage.putPerson(person);
-
-            faces.forEach((face) => {
-                face.personId = person.id;
-            });
-            // log.info("Creating person: ", person, faces);
-        }
-
-        await mlIDbStorage.updateFaces(allFacesMap);
-    }
-}
-
-export default new PeopleService();
-
-function findFirstIfSorted<T>(
-    elements: Array<T>,
-    comparator: (a: T, b: T) => number,
-) {
-    if (!elements || elements.length < 1) {
-        return;
-    }
-    let first = elements[0];
-
-    for (let i = 1; i < elements.length; i++) {
-        const comp = comparator(elements[i], first);
-        if (comp < 0) {
-            first = elements[i];
-        }
-    }
-
-    return first;
-}
-
-function getAllFacesFromMap(allFacesMap: Map<number, Array<Face>>) {
-    const allFaces = [...allFacesMap.values()].flat();
-
-    return allFaces;
-}

+ 0 - 332
web/apps/photos/src/services/machineLearning/yoloFaceDetectionService.ts

@@ -1,332 +0,0 @@
-import { workerBridge } from "@/next/worker/worker-bridge";
-import { euclidean } from "hdbscan";
-import {
-    Box,
-    Dimensions,
-    Point,
-    boxFromBoundingBox,
-    newBox,
-} from "services/ml/geom";
-import {
-    FaceDetection,
-    FaceDetectionMethod,
-    FaceDetectionService,
-    Versioned,
-} from "services/ml/types";
-import {
-    Matrix,
-    applyToPoint,
-    compose,
-    scale,
-    translate,
-} from "transformation-matrix";
-import {
-    clamp,
-    getPixelBilinear,
-    normalizePixelBetween0And1,
-} from "utils/image";
-
-class YoloFaceDetectionService implements FaceDetectionService {
-    public method: Versioned<FaceDetectionMethod>;
-
-    public constructor() {
-        this.method = {
-            value: "YoloFace",
-            version: 1,
-        };
-    }
-
-    public async detectFaces(
-        imageBitmap: ImageBitmap,
-    ): Promise<Array<FaceDetection>> {
-        const maxFaceDistancePercent = Math.sqrt(2) / 100;
-        const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
-        const preprocessResult =
-            this.preprocessImageBitmapToFloat32ChannelsFirst(
-                imageBitmap,
-                640,
-                640,
-            );
-        const data = preprocessResult.data;
-        const resized = preprocessResult.newSize;
-        const outputData = await workerBridge.detectFaces(data);
-        const faces = this.getFacesFromYoloOutput(
-            outputData as Float32Array,
-            0.7,
-        );
-        const inBox = newBox(0, 0, resized.width, resized.height);
-        const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
-        const transform = computeTransformToBox(inBox, toBox);
-        const faceDetections: Array<FaceDetection> = faces?.map((f) => {
-            const box = transformBox(f.box, transform);
-            const normLandmarks = f.landmarks;
-            const landmarks = transformPoints(normLandmarks, transform);
-            return {
-                box,
-                landmarks,
-                probability: f.probability as number,
-            } as FaceDetection;
-        });
-        return removeDuplicateDetections(faceDetections, maxFaceDistance);
-    }
-
-    private preprocessImageBitmapToFloat32ChannelsFirst(
-        imageBitmap: ImageBitmap,
-        requiredWidth: number,
-        requiredHeight: number,
-        maintainAspectRatio: boolean = true,
-        normFunction: (
-            pixelValue: number,
-        ) => number = normalizePixelBetween0And1,
-    ) {
-        // Create an OffscreenCanvas and set its size
-        const offscreenCanvas = new OffscreenCanvas(
-            imageBitmap.width,
-            imageBitmap.height,
-        );
-        const ctx = offscreenCanvas.getContext("2d");
-        ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
-        const imageData = ctx.getImageData(
-            0,
-            0,
-            imageBitmap.width,
-            imageBitmap.height,
-        );
-        const pixelData = imageData.data;
-
-        let scaleW = requiredWidth / imageBitmap.width;
-        let scaleH = requiredHeight / imageBitmap.height;
-        if (maintainAspectRatio) {
-            const scale = Math.min(
-                requiredWidth / imageBitmap.width,
-                requiredHeight / imageBitmap.height,
-            );
-            scaleW = scale;
-            scaleH = scale;
-        }
-        const scaledWidth = clamp(
-            Math.round(imageBitmap.width * scaleW),
-            0,
-            requiredWidth,
-        );
-        const scaledHeight = clamp(
-            Math.round(imageBitmap.height * scaleH),
-            0,
-            requiredHeight,
-        );
-
-        const processedImage = new Float32Array(
-            1 * 3 * requiredWidth * requiredHeight,
-        );
-
-        // Populate the Float32Array with normalized pixel values
-        let pixelIndex = 0;
-        const channelOffsetGreen = requiredHeight * requiredWidth;
-        const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
-        for (let h = 0; h < requiredHeight; h++) {
-            for (let w = 0; w < requiredWidth; w++) {
-                let pixel: {
-                    r: number;
-                    g: number;
-                    b: number;
-                };
-                if (w >= scaledWidth || h >= scaledHeight) {
-                    pixel = { r: 114, g: 114, b: 114 };
-                } else {
-                    pixel = getPixelBilinear(
-                        w / scaleW,
-                        h / scaleH,
-                        pixelData,
-                        imageBitmap.width,
-                        imageBitmap.height,
-                    );
-                }
-                processedImage[pixelIndex] = normFunction(pixel.r);
-                processedImage[pixelIndex + channelOffsetGreen] = normFunction(
-                    pixel.g,
-                );
-                processedImage[pixelIndex + channelOffsetBlue] = normFunction(
-                    pixel.b,
-                );
-                pixelIndex++;
-            }
-        }
-
-        return {
-            data: processedImage,
-            originalSize: {
-                width: imageBitmap.width,
-                height: imageBitmap.height,
-            },
-            newSize: { width: scaledWidth, height: scaledHeight },
-        };
-    }
-
-    // The rowOutput is a Float32Array of shape [25200, 16], where each row represents a bounding box.
-    private getFacesFromYoloOutput(
-        rowOutput: Float32Array,
-        minScore: number,
-    ): Array<FaceDetection> {
-        const faces: Array<FaceDetection> = [];
-        // iterate over each row
-        for (let i = 0; i < rowOutput.length; i += 16) {
-            const score = rowOutput[i + 4];
-            if (score < minScore) {
-                continue;
-            }
-            // The first 4 values represent the bounding box's coordinates (x1, y1, x2, y2)
-            const xCenter = rowOutput[i];
-            const yCenter = rowOutput[i + 1];
-            const width = rowOutput[i + 2];
-            const height = rowOutput[i + 3];
-            const xMin = xCenter - width / 2.0; // topLeft
-            const yMin = yCenter - height / 2.0; // topLeft
-
-            const leftEyeX = rowOutput[i + 5];
-            const leftEyeY = rowOutput[i + 6];
-            const rightEyeX = rowOutput[i + 7];
-            const rightEyeY = rowOutput[i + 8];
-            const noseX = rowOutput[i + 9];
-            const noseY = rowOutput[i + 10];
-            const leftMouthX = rowOutput[i + 11];
-            const leftMouthY = rowOutput[i + 12];
-            const rightMouthX = rowOutput[i + 13];
-            const rightMouthY = rowOutput[i + 14];
-
-            const box = new Box({
-                x: xMin,
-                y: yMin,
-                width: width,
-                height: height,
-            });
-            const probability = score as number;
-            const landmarks = [
-                new Point(leftEyeX, leftEyeY),
-                new Point(rightEyeX, rightEyeY),
-                new Point(noseX, noseY),
-                new Point(leftMouthX, leftMouthY),
-                new Point(rightMouthX, rightMouthY),
-            ];
-            const face: FaceDetection = {
-                box,
-                landmarks,
-                probability,
-                // detectionMethod: this.method,
-            };
-            faces.push(face);
-        }
-        return faces;
-    }
-
-    public getRelativeDetection(
-        faceDetection: FaceDetection,
-        dimensions: Dimensions,
-    ): FaceDetection {
-        const oldBox: Box = faceDetection.box;
-        const box = new Box({
-            x: oldBox.x / dimensions.width,
-            y: oldBox.y / dimensions.height,
-            width: oldBox.width / dimensions.width,
-            height: oldBox.height / dimensions.height,
-        });
-        const oldLandmarks: Point[] = faceDetection.landmarks;
-        const landmarks = oldLandmarks.map((l) => {
-            return new Point(l.x / dimensions.width, l.y / dimensions.height);
-        });
-        return {
-            box,
-            landmarks,
-            probability: faceDetection.probability,
-        };
-    }
-}
-
-export default new YoloFaceDetectionService();
-
-/**
- * Removes duplicate face detections from an array of detections.
- *
- * This function sorts the detections by their probability in descending order, then iterates over them.
- * For each detection, it calculates the Euclidean distance to all other detections.
- * If the distance is less than or equal to the specified threshold (`withinDistance`), the other detection is considered a duplicate and is removed.
- *
- * @param detections - An array of face detections to remove duplicates from.
- * @param withinDistance - The maximum Euclidean distance between two detections for them to be considered duplicates.
- *
- * @returns An array of face detections with duplicates removed.
- */
-function removeDuplicateDetections(
-    detections: Array<FaceDetection>,
-    withinDistance: number,
-) {
-    // console.time('removeDuplicates');
-    detections.sort((a, b) => b.probability - a.probability);
-    const isSelected = new Map<number, boolean>();
-    for (let i = 0; i < detections.length; i++) {
-        if (isSelected.get(i) === false) {
-            continue;
-        }
-        isSelected.set(i, true);
-        for (let j = i + 1; j < detections.length; j++) {
-            if (isSelected.get(j) === false) {
-                continue;
-            }
-            const centeri = getDetectionCenter(detections[i]);
-            const centerj = getDetectionCenter(detections[j]);
-            const dist = euclidean(
-                [centeri.x, centeri.y],
-                [centerj.x, centerj.y],
-            );
-            if (dist <= withinDistance) {
-                isSelected.set(j, false);
-            }
-        }
-    }
-
-    const uniques: Array<FaceDetection> = [];
-    for (let i = 0; i < detections.length; i++) {
-        isSelected.get(i) && uniques.push(detections[i]);
-    }
-    // console.timeEnd('removeDuplicates');
-    return uniques;
-}
-
-function getDetectionCenter(detection: FaceDetection) {
-    const center = new Point(0, 0);
-    // TODO: first 4 landmarks is applicable to blazeface only
-    // this needs to consider eyes, nose and mouth landmarks to take center
-    detection.landmarks?.slice(0, 4).forEach((p) => {
-        center.x += p.x;
-        center.y += p.y;
-    });
-
-    return new Point(center.x / 4, center.y / 4);
-}
-
-function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
-    return compose(
-        translate(toBox.x, toBox.y),
-        scale(toBox.width / inBox.width, toBox.height / inBox.height),
-    );
-}
-
-function transformPoint(point: Point, transform: Matrix) {
-    const txdPoint = applyToPoint(transform, point);
-    return new Point(txdPoint.x, txdPoint.y);
-}
-
-function transformPoints(points: Point[], transform: Matrix) {
-    return points?.map((p) => transformPoint(p, transform));
-}
-
-function transformBox(box: Box, transform: Matrix) {
-    const topLeft = transformPoint(box.topLeft, transform);
-    const bottomRight = transformPoint(box.bottomRight, transform);
-
-    return boxFromBoundingBox({
-        left: topLeft.x,
-        top: topLeft.y,
-        right: bottomRight.x,
-        bottom: bottomRight.y,
-    });
-}

+ 0 - 331
web/apps/photos/src/services/ml/types.ts

@@ -1,331 +0,0 @@
-import { DebugInfo } from "hdbscan";
-import PQueue from "p-queue";
-import { Dimensions } from "services/ml/geom";
-import { EnteFile } from "types/file";
-import { Box, Point } from "./geom";
-
-export interface MLSyncResult {
-    nOutOfSyncFiles: number;
-    nSyncedFiles: number;
-    nSyncedFaces: number;
-    nFaceClusters: number;
-    nFaceNoise: number;
-    error?: Error;
-}
-
-export declare type FaceDescriptor = Float32Array;
-
-export declare type Cluster = Array<number>;
-
-export interface ClusteringResults {
-    clusters: Array<Cluster>;
-    noise: Cluster;
-}
-
-export interface HdbscanResults extends ClusteringResults {
-    debugInfo?: DebugInfo;
-}
-
-export interface FacesCluster {
-    faces: Cluster;
-    summary?: FaceDescriptor;
-}
-
-export interface FacesClustersWithNoise {
-    clusters: Array<FacesCluster>;
-    noise: Cluster;
-}
-
-export interface NearestCluster {
-    cluster: FacesCluster;
-    distance: number;
-}
-
-export declare type Landmark = Point;
-
-export declare type ImageType = "Original" | "Preview";
-
-export declare type FaceDetectionMethod = "YoloFace";
-
-export declare type FaceCropMethod = "ArcFace";
-
-export declare type FaceAlignmentMethod = "ArcFace";
-
-export declare type FaceEmbeddingMethod = "MobileFaceNet";
-
-export declare type BlurDetectionMethod = "Laplacian";
-
-export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
-
-export class AlignedBox {
-    box: Box;
-    rotation: number;
-}
-
-export interface Versioned<T> {
-    value: T;
-    version: number;
-}
-
-export interface FaceDetection {
-    // box and landmarks is relative to image dimentions stored at mlFileData
-    box: Box;
-    landmarks?: Array<Landmark>;
-    probability?: number;
-}
-
-export interface DetectedFace {
-    fileId: number;
-    detection: FaceDetection;
-}
-
-export interface DetectedFaceWithId extends DetectedFace {
-    id: string;
-}
-
-export interface FaceCrop {
-    image: ImageBitmap;
-    // imageBox is relative to image dimentions stored at mlFileData
-    imageBox: Box;
-}
-
-export interface StoredFaceCrop {
-    cacheKey: string;
-    imageBox: Box;
-}
-
-export interface CroppedFace extends DetectedFaceWithId {
-    crop?: StoredFaceCrop;
-}
-
-export interface FaceAlignment {
-    // TODO: remove affine matrix as rotation, size and center
-    // are simple to store and use, affine matrix adds complexity while getting crop
-    affineMatrix: Array<Array<number>>;
-    rotation: number;
-    // size and center is relative to image dimentions stored at mlFileData
-    size: number;
-    center: Point;
-}
-
-export interface AlignedFace extends CroppedFace {
-    alignment?: FaceAlignment;
-    blurValue?: number;
-}
-
-export declare type FaceEmbedding = Float32Array;
-
-export interface FaceWithEmbedding extends AlignedFace {
-    embedding?: FaceEmbedding;
-}
-
-export interface Face extends FaceWithEmbedding {
-    personId?: number;
-}
-
-export interface Person {
-    id: number;
-    name?: string;
-    files: Array<number>;
-    displayFaceId?: string;
-    faceCropCacheKey?: string;
-}
-
-export interface MlFileData {
-    fileId: number;
-    faces?: Face[];
-    imageSource?: ImageType;
-    imageDimensions?: Dimensions;
-    faceDetectionMethod?: Versioned<FaceDetectionMethod>;
-    faceCropMethod?: Versioned<FaceCropMethod>;
-    faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
-    faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
-    mlVersion: number;
-    errorCount: number;
-    lastErrorMessage?: string;
-}
-
-export interface FaceDetectionConfig {
-    method: FaceDetectionMethod;
-}
-
-export interface FaceCropConfig {
-    enabled: boolean;
-    method: FaceCropMethod;
-    padding: number;
-    maxSize: number;
-    blobOptions: {
-        type: string;
-        quality: number;
-    };
-}
-
-export interface FaceAlignmentConfig {
-    method: FaceAlignmentMethod;
-}
-
-export interface BlurDetectionConfig {
-    method: BlurDetectionMethod;
-    threshold: number;
-}
-
-export interface FaceEmbeddingConfig {
-    method: FaceEmbeddingMethod;
-    faceSize: number;
-    generateTsne?: boolean;
-}
-
-export interface FaceClusteringConfig extends ClusteringConfig {}
-
-export declare type TSNEMetric = "euclidean" | "manhattan";
-
-export interface TSNEConfig {
-    samples: number;
-    dim: number;
-    perplexity?: number;
-    earlyExaggeration?: number;
-    learningRate?: number;
-    nIter?: number;
-    metric?: TSNEMetric;
-}
-
-export interface MLSyncConfig {
-    batchSize: number;
-    imageSource: ImageType;
-    faceDetection: FaceDetectionConfig;
-    faceCrop: FaceCropConfig;
-    faceAlignment: FaceAlignmentConfig;
-    blurDetection: BlurDetectionConfig;
-    faceEmbedding: FaceEmbeddingConfig;
-    faceClustering: FaceClusteringConfig;
-    mlVersion: number;
-}
-
-export interface MLSearchConfig {
-    enabled: boolean;
-}
-
-export interface MLSyncContext {
-    token: string;
-    userID: number;
-    config: MLSyncConfig;
-    shouldUpdateMLVersion: boolean;
-
-    faceDetectionService: FaceDetectionService;
-    faceCropService: FaceCropService;
-    faceAlignmentService: FaceAlignmentService;
-    faceEmbeddingService: FaceEmbeddingService;
-    blurDetectionService: BlurDetectionService;
-    faceClusteringService: ClusteringService;
-
-    localFilesMap: Map<number, EnteFile>;
-    outOfSyncFiles: EnteFile[];
-    nSyncedFiles: number;
-    nSyncedFaces: number;
-    allSyncedFacesMap?: Map<number, Array<Face>>;
-
-    error?: Error;
-
-    // oldMLLibraryData: MLLibraryData;
-    mlLibraryData: MLLibraryData;
-
-    syncQueue: PQueue;
-
-    getEnteWorker(id: number): Promise<any>;
-    dispose(): Promise<void>;
-}
-
-export interface MLSyncFileContext {
-    enteFile: EnteFile;
-    localFile?: globalThis.File;
-
-    oldMlFile?: MlFileData;
-    newMlFile?: MlFileData;
-
-    imageBitmap?: ImageBitmap;
-
-    newDetection?: boolean;
-    newAlignment?: boolean;
-}
-
-export interface MLLibraryData {
-    faceClusteringMethod?: Versioned<ClusteringMethod>;
-    faceClusteringResults?: ClusteringResults;
-    faceClustersWithNoise?: FacesClustersWithNoise;
-}
-
-export declare type MLIndex = "files" | "people";
-
-export interface FaceDetectionService {
-    method: Versioned<FaceDetectionMethod>;
-
-    detectFaces(image: ImageBitmap): Promise<Array<FaceDetection>>;
-    getRelativeDetection(
-        faceDetection: FaceDetection,
-        imageDimensions: Dimensions,
-    ): FaceDetection;
-}
-
-export interface FaceCropService {
-    method: Versioned<FaceCropMethod>;
-
-    getFaceCrop(
-        imageBitmap: ImageBitmap,
-        face: FaceDetection,
-        config: FaceCropConfig,
-    ): Promise<FaceCrop>;
-}
-
-export interface FaceAlignmentService {
-    method: Versioned<FaceAlignmentMethod>;
-    getFaceAlignment(faceDetection: FaceDetection): FaceAlignment;
-}
-
-export interface FaceEmbeddingService {
-    method: Versioned<FaceEmbeddingMethod>;
-    faceSize: number;
-
-    getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
-}
-
-export interface BlurDetectionService {
-    method: Versioned<BlurDetectionMethod>;
-    detectBlur(alignedFaces: Float32Array, faces: Face[]): number[];
-}
-
-export interface ClusteringService {
-    method: Versioned<ClusteringMethod>;
-
-    cluster(
-        input: ClusteringInput,
-        config: ClusteringConfig,
-    ): Promise<ClusteringResults>;
-}
-
-export interface ClusteringConfig {
-    method: ClusteringMethod;
-    minClusterSize: number;
-    minSamples?: number;
-    clusterSelectionEpsilon?: number;
-    clusterSelectionMethod?: "eom" | "leaf";
-    maxDistanceInsideCluster?: number;
-    minInputSize?: number;
-    generateDebugInfo?: boolean;
-}
-
-export declare type ClusteringInput = Array<Array<number>>;
-
-export interface MachineLearningWorker {
-    closeLocalSyncContext(): Promise<void>;
-
-    syncLocalFile(
-        token: string,
-        userID: number,
-        enteFile: EnteFile,
-        localFile: globalThis.File,
-    ): Promise<MlFileData | Error>;
-
-    sync(token: string, userID: number): Promise<MLSyncResult>;
-
-    close(): void;
-}

+ 4 - 5
web/apps/photos/src/services/searchService.ts

@@ -2,9 +2,9 @@ import { FILE_TYPE } from "@/media/file-type";
 import log from "@/next/log";
 import * as chrono from "chrono-node";
 import { t } from "i18next";
-import { getMLSyncConfig } from "services/machineLearning/machineLearningService";
-import mlIDbStorage from "services/ml/db";
-import { Person } from "services/ml/types";
+import mlIDbStorage from "services/face/db";
+import { Person } from "services/face/types";
+import { defaultMLVersion } from "services/machineLearning/machineLearningService";
 import { Collection } from "types/collection";
 import { EntityType, LocationTag, LocationTagData } from "types/entity";
 import { EnteFile } from "types/file";
@@ -175,8 +175,7 @@ export async function getAllPeopleSuggestion(): Promise<Array<Suggestion>> {
 
 export async function getIndexStatusSuggestion(): Promise<Suggestion> {
     try {
-        const config = await getMLSyncConfig();
-        const indexStatus = await mlIDbStorage.getIndexStatus(config.mlVersion);
+        const indexStatus = await mlIDbStorage.getIndexStatus(defaultMLVersion);
 
         let label;
         if (!indexStatus.localFilesSynced) {

+ 2 - 2
web/apps/photos/src/types/search/index.ts

@@ -1,7 +1,7 @@
 import { FILE_TYPE } from "@/media/file-type";
+import { IndexStatus } from "services/face/db";
+import { Person } from "services/face/types";
 import { City } from "services/locationSearchService";
-import { IndexStatus } from "services/ml/db";
-import { Person } from "services/ml/types";
 import { LocationTagData } from "types/entity";
 import { EnteFile } from "types/file";
 

+ 7 - 7
web/apps/photos/src/utils/image/index.ts

@@ -1,8 +1,8 @@
 // these utils only work in env where OffscreenCanvas is available
 
 import { Matrix, inverse } from "ml-matrix";
-import { Box, Dimensions, enlargeBox } from "services/ml/geom";
-import { FaceAlignment } from "services/ml/types";
+import { Box, Dimensions, enlargeBox } from "services/face/geom";
+import { FaceAlignment } from "services/face/types";
 
 export function normalizePixelBetween0And1(pixelValue: number) {
     return pixelValue / 255.0;
@@ -450,17 +450,17 @@ export interface BlobOptions {
     quality?: number;
 }
 
-export async function imageBitmapToBlob(
-    imageBitmap: ImageBitmap,
-    options?: BlobOptions,
-) {
+export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
     const offscreen = new OffscreenCanvas(
         imageBitmap.width,
         imageBitmap.height,
     );
     offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
 
-    return offscreen.convertToBlob(options);
+    return offscreen.convertToBlob({
+        type: "image/jpeg",
+        quality: 0.8,
+    });
 }
 
 export async function imageBitmapFromBlob(blob: Blob) {

+ 12 - 0
web/docs/dependencies.md

@@ -174,3 +174,15 @@ some cases.
 -   [sanitize-filename](https://github.com/parshap/node-sanitize-filename) is
     for converting arbitrary strings into strings that are suitable for being
     used as filenames.
+
+## Face search
+
+-   [matrix](https://github.com/mljs/matrix) and
+    [similarity-transformation](https://github.com/shaileshpandit/similarity-transformation-js)
+    are used during face alignment.
+
+-   [transformation-matrix](https://github.com/chrvadala/transformation-matrix)
+    is used during face detection.
+
+-   [hdbscan](https://github.com/shaileshpandit/hdbscan-js) is used for face
+    clustering.

+ 2 - 2
web/packages/next/types/ipc.ts

@@ -332,12 +332,12 @@ export interface Electron {
     detectFaces: (input: Float32Array) => Promise<Float32Array>;
 
     /**
-     * Return a MobileFaceNet embedding for the given face data.
+     * Return a MobileFaceNet embeddings for the given faces.
      *
      * Both the input and output are opaque binary data whose internal structure
      * is specific to our implementation and the model (MobileFaceNet) we use.
      */
-    faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
+    faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
 
     /**
      * Return a face crop stored by a previous version of ML.

+ 2 - 2
web/packages/next/worker/comlink-worker.ts

@@ -47,8 +47,8 @@ const workerBridge = {
     convertToJPEG: (imageData: Uint8Array) =>
         ensureElectron().convertToJPEG(imageData),
     detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
-    faceEmbedding: (input: Float32Array) =>
-        ensureElectron().faceEmbedding(input),
+    faceEmbeddings: (input: Float32Array) =>
+        ensureElectron().faceEmbeddings(input),
 };
 
 export type WorkerBridge = typeof workerBridge;

+ 0 - 5
web/yarn.lock

@@ -1896,11 +1896,6 @@ delayed-stream@~1.0.0:
   resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
   integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
 
-density-clustering@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/density-clustering/-/density-clustering-1.3.0.tgz#dc9f59c8f0ab97e1624ac64930fd3194817dcac5"
-  integrity sha512-icpmBubVTwLnsaor9qH/4tG5+7+f61VcqMN3V3pm9sxxSCt2Jcs0zWOgwZW9ARJYaKD3FumIgHiMOcIMRRAzFQ==
-
 dequal@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/dequal/-/dequal-2.0.3.tgz#2644214f1997d39ed0ee0ece72335490a7ac67be"