Inline

2024-05-18 08:50:48 +05:30 · 2024-05-18 08:50:48 +05:30 · 93cdf73a66
commit 93cdf73a66
parent eaadc54184
3 changed files with 274 additions and 191 deletions
--- a/web/apps/photos/src/services/face/align.ts
+++ b/web/apps/photos/src/services/face/align.ts
--- a/web/apps/photos/src/services/face/blur.ts
+++ b/web/apps/photos/src/services/face/blur.ts
@ -1,187 +0,0 @@
-import { Face } from "services/face/types";
-import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
-import { mobileFaceNetFaceSize } from "./embed";
-
-/**
- * Laplacian blur detection.
- */
-export const detectBlur = (
-    alignedFaces: Float32Array,
-    faces: Face[],
-): number[] => {
-    const numFaces = Math.round(
-        alignedFaces.length /
-            (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
-    );
-    const blurValues: number[] = [];
-    for (let i = 0; i < numFaces; i++) {
-        const face = faces[i];
-        const direction = faceDirection(face);
-        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
-            alignedFaces,
-            i,
-        );
-        const laplacian = applyLaplacian(faceImage, direction);
-        blurValues.push(matrixVariance(laplacian));
-    }
-    return blurValues;
-};
-
-type FaceDirection = "left" | "right" | "straight";
-
-const faceDirection = (face: Face): FaceDirection => {
-    const landmarks = face.detection.landmarks;
-    const leftEye = landmarks[0];
-    const rightEye = landmarks[1];
-    const nose = landmarks[2];
-    const leftMouth = landmarks[3];
-    const rightMouth = landmarks[4];
-
-    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
-    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
-    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
-
-    const faceIsUpright =
-        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
-        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
-
-    const noseStickingOutLeft =
-        nose.x < Math.min(leftEye.x, rightEye.x) &&
-        nose.x < Math.min(leftMouth.x, rightMouth.x);
-
-    const noseStickingOutRight =
-        nose.x > Math.max(leftEye.x, rightEye.x) &&
-        nose.x > Math.max(leftMouth.x, rightMouth.x);
-
-    const noseCloseToLeftEye =
-        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
-    const noseCloseToRightEye =
-        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
-
-    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
-        return "left";
-    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
-        return "right";
-    }
-
-    return "straight";
-};
-
-/**
- * Return a new image by applying a Laplacian blur kernel to each pixel.
- */
-const applyLaplacian = (
-    image: number[][],
-    direction: FaceDirection,
-): number[][] => {
-    const paddedImage: number[][] = padImage(image, direction);
-    const numRows = paddedImage.length - 2;
-    const numCols = paddedImage[0].length - 2;
-
-    // Create an output image initialized to 0.
-    const outputImage: number[][] = Array.from({ length: numRows }, () =>
-        new Array(numCols).fill(0),
-    );
-
-    // Define the Laplacian kernel.
-    const kernel: number[][] = [
-        [0, 1, 0],
-        [1, -4, 1],
-        [0, 1, 0],
-    ];
-
-    // Apply the kernel to each pixel
-    for (let i = 0; i < numRows; i++) {
-        for (let j = 0; j < numCols; j++) {
-            let sum = 0;
-            for (let ki = 0; ki < 3; ki++) {
-                for (let kj = 0; kj < 3; kj++) {
-                    sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
-                }
-            }
-            // Adjust the output value if necessary (e.g., clipping).
-            outputImage[i][j] = sum;
-        }
-    }
-
-    return outputImage;
-};
-
-const padImage = (image: number[][], direction: FaceDirection): number[][] => {
-    const removeSideColumns = 56; /* must be even */
-
-    const numRows = image.length;
-    const numCols = image[0].length;
-    const paddedNumCols = numCols + 2 - removeSideColumns;
-    const paddedNumRows = numRows + 2;
-
-    // Create a new matrix with extra padding.
-    const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
-        new Array(paddedNumCols).fill(0),
-    );
-
-    if (direction === "straight") {
-        // Copy original image into the center of the padded image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] =
-                    image[i][j + Math.round(removeSideColumns / 2)];
-            }
-        }
-    } else if (direction === "left") {
-        // If the face is facing left, we only take the right side of the face image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
-            }
-        }
-    } else if (direction === "right") {
-        // If the face is facing right, we only take the left side of the face image.
-        for (let i = 0; i < numRows; i++) {
-            for (let j = 0; j < paddedNumCols - 2; j++) {
-                paddedImage[i + 1][j + 1] = image[i][j];
-            }
-        }
-    }
-
-    // Reflect padding
-    // Top and bottom rows
-    for (let j = 1; j <= paddedNumCols - 2; j++) {
-        paddedImage[0][j] = paddedImage[2][j]; // Top row
-        paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
-    }
-    // Left and right columns
-    for (let i = 0; i < numRows + 2; i++) {
-        paddedImage[i][0] = paddedImage[i][2]; // Left column
-        paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
-    }
-
-    return paddedImage;
-};
-
-const matrixVariance = (matrix: number[][]): number => {
-    const numRows = matrix.length;
-    const numCols = matrix[0].length;
-    const totalElements = numRows * numCols;
-
-    // Calculate the mean.
-    let mean: number = 0;
-    matrix.forEach((row) => {
-        row.forEach((value) => {
-            mean += value;
-        });
-    });
-    mean /= totalElements;
-
-    // Calculate the variance.
-    let variance: number = 0;
-    matrix.forEach((row) => {
-        row.forEach((value) => {
-            const diff: number = value - mean;
-            variance += diff * diff;
-        });
-    });
-    variance /= totalElements;
-
-    return variance;
-};
--- a/web/apps/photos/src/services/face/f-index.ts
+++ b/web/apps/photos/src/services/face/f-index.ts
@ -1,27 +1,28 @@
 import { openCache } from "@/next/blob-cache";
 import log from "@/next/log";
-import { faceAlignment } from "services/face/align";
+import { Matrix } from "ml-matrix";
 import mlIDbStorage from "services/face/db";
 import { detectFaces, getRelativeDetection } from "services/face/detect";
 import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
-import { Box, enlargeBox } from "services/face/geom";
+import { Box, Point, enlargeBox } from "services/face/geom";
 import {
    DetectedFace,
    Face,
+    FaceAlignment,
    FaceCrop,
    FaceDetection,
    MLSyncFileContext,
-    type FaceAlignment,
    type MlFileData,
 } from "services/face/types";
 import { defaultMLVersion } from "services/machineLearning/machineLearningService";
+import { getSimilarityTransformation } from "similarity-transformation";
 import type { EnteFile } from "types/file";
 import {
+    createGrayscaleIntMatrixFromNormalized2List,
    cropWithRotation,
    imageBitmapToBlob,
    warpAffineFloat32List,
 } from "utils/image";
-import { detectBlur } from "./blur";
 import {
    fetchImageBitmap,
    fetchImageBitmapForContext,
@ -149,6 +150,275 @@ const syncFileFaceAlignments = async (
    return faceImages;
 };

+// TODO-ML(MR): When is this used or is it as Blazeface leftover?
+const ARCFACE_LANDMARKS = [
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [56.1396, 92.2848],
+] as Array<[number, number]>;
+
+const ARCFACE_LANDMARKS_FACE_SIZE = 112;
+
+const ARC_FACE_5_LANDMARKS = [
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [41.5493, 92.3655],
+    [70.7299, 92.2041],
+] as Array<[number, number]>;
+
+/**
+ * Compute and return an {@link FaceAlignment} for the given face detection.
+ *
+ * @param faceDetection A geometry indicating a face detected in an image.
+ */
+export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
+    const landmarkCount = faceDetection.landmarks.length;
+    return getFaceAlignmentUsingSimilarityTransform(
+        faceDetection,
+        normalizeLandmarks(
+            landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
+            ARCFACE_LANDMARKS_FACE_SIZE,
+        ),
+    );
+};
+
+function getFaceAlignmentUsingSimilarityTransform(
+    faceDetection: FaceDetection,
+    alignedLandmarks: Array<[number, number]>,
+): FaceAlignment {
+    const landmarksMat = new Matrix(
+        faceDetection.landmarks
+            .map((p) => [p.x, p.y])
+            .slice(0, alignedLandmarks.length),
+    ).transpose();
+    const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
+
+    const simTransform = getSimilarityTransformation(
+        landmarksMat,
+        alignedLandmarksMat,
+    );
+
+    const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
+    const TR = simTransform.translation;
+
+    const affineMatrix = [
+        [RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
+        [RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
+        [0, 0, 1],
+    ];
+
+    const size = 1 / simTransform.scale;
+    const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
+    const centerMat = simTransform.fromMean.sub(meanTranslation);
+    const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
+    const rotation = -Math.atan2(
+        simTransform.rotation.get(0, 1),
+        simTransform.rotation.get(0, 0),
+    );
+
+    return {
+        affineMatrix,
+        center,
+        size,
+        rotation,
+    };
+}
+
+function normalizeLandmarks(
+    landmarks: Array<[number, number]>,
+    faceSize: number,
+): Array<[number, number]> {
+    return landmarks.map((landmark) =>
+        landmark.map((p) => p / faceSize),
+    ) as Array<[number, number]>;
+}
+
+/**
+ * Laplacian blur detection.
+ */
+export const detectBlur = (
+    alignedFaces: Float32Array,
+    faces: Face[],
+): number[] => {
+    const numFaces = Math.round(
+        alignedFaces.length /
+            (mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
+    );
+    const blurValues: number[] = [];
+    for (let i = 0; i < numFaces; i++) {
+        const face = faces[i];
+        const direction = faceDirection(face);
+        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
+            alignedFaces,
+            i,
+        );
+        const laplacian = applyLaplacian(faceImage, direction);
+        blurValues.push(matrixVariance(laplacian));
+    }
+    return blurValues;
+};
+
+type FaceDirection = "left" | "right" | "straight";
+
+const faceDirection = (face: Face): FaceDirection => {
+    const landmarks = face.detection.landmarks;
+    const leftEye = landmarks[0];
+    const rightEye = landmarks[1];
+    const nose = landmarks[2];
+    const leftMouth = landmarks[3];
+    const rightMouth = landmarks[4];
+
+    const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
+    const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
+    const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
+
+    const faceIsUpright =
+        Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
+        nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
+
+    const noseStickingOutLeft =
+        nose.x < Math.min(leftEye.x, rightEye.x) &&
+        nose.x < Math.min(leftMouth.x, rightMouth.x);
+
+    const noseStickingOutRight =
+        nose.x > Math.max(leftEye.x, rightEye.x) &&
+        nose.x > Math.max(leftMouth.x, rightMouth.x);
+
+    const noseCloseToLeftEye =
+        Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
+    const noseCloseToRightEye =
+        Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
+
+    if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
+        return "left";
+    } else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
+        return "right";
+    }
+
+    return "straight";
+};
+
+/**
+ * Return a new image by applying a Laplacian blur kernel to each pixel.
+ */
+const applyLaplacian = (
+    image: number[][],
+    direction: FaceDirection,
+): number[][] => {
+    const paddedImage: number[][] = padImage(image, direction);
+    const numRows = paddedImage.length - 2;
+    const numCols = paddedImage[0].length - 2;
+
+    // Create an output image initialized to 0.
+    const outputImage: number[][] = Array.from({ length: numRows }, () =>
+        new Array(numCols).fill(0),
+    );
+
+    // Define the Laplacian kernel.
+    const kernel: number[][] = [
+        [0, 1, 0],
+        [1, -4, 1],
+        [0, 1, 0],
+    ];
+
+    // Apply the kernel to each pixel
+    for (let i = 0; i < numRows; i++) {
+        for (let j = 0; j < numCols; j++) {
+            let sum = 0;
+            for (let ki = 0; ki < 3; ki++) {
+                for (let kj = 0; kj < 3; kj++) {
+                    sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
+                }
+            }
+            // Adjust the output value if necessary (e.g., clipping).
+            outputImage[i][j] = sum;
+        }
+    }
+
+    return outputImage;
+};
+
+const padImage = (image: number[][], direction: FaceDirection): number[][] => {
+    const removeSideColumns = 56; /* must be even */
+
+    const numRows = image.length;
+    const numCols = image[0].length;
+    const paddedNumCols = numCols + 2 - removeSideColumns;
+    const paddedNumRows = numRows + 2;
+
+    // Create a new matrix with extra padding.
+    const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
+        new Array(paddedNumCols).fill(0),
+    );
+
+    if (direction === "straight") {
+        // Copy original image into the center of the padded image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] =
+                    image[i][j + Math.round(removeSideColumns / 2)];
+            }
+        }
+    } else if (direction === "left") {
+        // If the face is facing left, we only take the right side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
+            }
+        }
+    } else if (direction === "right") {
+        // If the face is facing right, we only take the left side of the face image.
+        for (let i = 0; i < numRows; i++) {
+            for (let j = 0; j < paddedNumCols - 2; j++) {
+                paddedImage[i + 1][j + 1] = image[i][j];
+            }
+        }
+    }
+
+    // Reflect padding
+    // Top and bottom rows
+    for (let j = 1; j <= paddedNumCols - 2; j++) {
+        paddedImage[0][j] = paddedImage[2][j]; // Top row
+        paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
+    }
+    // Left and right columns
+    for (let i = 0; i < numRows + 2; i++) {
+        paddedImage[i][0] = paddedImage[i][2]; // Left column
+        paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
+    }
+
+    return paddedImage;
+};
+
+const matrixVariance = (matrix: number[][]): number => {
+    const numRows = matrix.length;
+    const numCols = matrix[0].length;
+    const totalElements = numRows * numCols;
+
+    // Calculate the mean.
+    let mean: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            mean += value;
+        });
+    });
+    mean /= totalElements;
+
+    // Calculate the variance.
+    let variance: number = 0;
+    matrix.forEach((row) => {
+        row.forEach((value) => {
+            const diff: number = value - mean;
+            variance += diff * diff;
+        });
+    });
+    variance /= totalElements;
+
+    return variance;
+};
+
 const syncFileFaceEmbeddings = async (
    fileContext: MLSyncFileContext,
    alignedFacesInput: Float32Array,