Convert point and box to regular objects

2024-05-21 14:24:51 +05:30 · 2024-05-21 14:24:51 +05:30 · 2d5894c5d6
commit 2d5894c5d6
parent 4dbc8ab31e
5 changed files with 146 additions and 167 deletions
--- a/web/apps/photos/src/services/face/crop.ts
+++ b/web/apps/photos/src/services/face/crop.ts
@ -0,0 +1,94 @@
+import { blobCache } from "@/next/blob-cache";
+import type { Box, Face, FaceAlignment } from "./types";
+
+export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
+    const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
+    const blob = await imageBitmapToBlob(faceCrop);
+    faceCrop.close();
+
+    const cache = await blobCache("face-crops");
+    await cache.put(face.id, blob);
+
+    return blob;
+};
+
+const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
+    const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
+    canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
+    return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
+};
+
+const extractFaceCrop = (
+    imageBitmap: ImageBitmap,
+    alignment: FaceAlignment,
+): ImageBitmap => {
+    // TODO-ML: This algorithm is different from what is used by the mobile app.
+    // Also, it needs to be something that can work fully using the embedding we
+    // receive from remote - the `alignment.boundingBox` will not be available
+    // to us in such cases.
+    const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
+    const outputSize = { width: paddedBox.width, height: paddedBox.height };
+
+    const maxDimension = 256;
+    const scale = Math.min(
+        maxDimension / paddedBox.width,
+        maxDimension / paddedBox.height,
+    );
+
+    if (scale < 1) {
+        outputSize.width = Math.round(scale * paddedBox.width);
+        outputSize.height = Math.round(scale * paddedBox.height);
+    }
+
+    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
+    const offscreenCtx = offscreen.getContext("2d");
+    offscreenCtx.imageSmoothingQuality = "high";
+
+    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
+
+    const outputBox = {
+        x: -outputSize.width / 2,
+        y: -outputSize.height / 2,
+        width: outputSize.width,
+        height: outputSize.height,
+    };
+
+    const enlargedBox = enlargeBox(paddedBox, 1.5);
+    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
+
+    offscreenCtx.drawImage(
+        imageBitmap,
+        enlargedBox.x,
+        enlargedBox.y,
+        enlargedBox.width,
+        enlargedBox.height,
+        enlargedOutputBox.x,
+        enlargedOutputBox.y,
+        enlargedOutputBox.width,
+        enlargedOutputBox.height,
+    );
+
+    return offscreen.transferToImageBitmap();
+};
+
+/** Round all the components of the box. */
+const roundBox = (box: Box): Box => {
+    const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
+        (val) => Math.round(val),
+    );
+    return { x, y, width, height };
+};
+
+/** Increase the size of the given {@link box} by {@link factor}. */
+const enlargeBox = (box: Box, factor: number): Box => {
+    const center = { x: box.x + box.width / 2, y: box.y + box.height / 2 };
+    const newWidth = factor * box.width;
+    const newHeight = factor * box.height;
+
+    return {
+        x: center.x - newWidth / 2,
+        y: center.y - newHeight / 2,
+        width: newWidth,
+        height: newHeight,
+    };
+};
--- a/web/apps/photos/src/services/face/f-index.ts
+++ b/web/apps/photos/src/services/face/f-index.ts
@ -1,16 +1,10 @@
 import { FILE_TYPE } from "@/media/file-type";
-import { blobCache } from "@/next/blob-cache";
 import log from "@/next/log";
 import { workerBridge } from "@/next/worker/worker-bridge";
 import { Matrix } from "ml-matrix";
-import {
+import type {
    Box,
    Dimensions,
-    Point,
-    enlargeBox,
-    roundBox,
-} from "services/face/geom";
-import type {
    Face,
    FaceAlignment,
    FaceDetection,
@ -26,6 +20,7 @@ import {
    translate,
 } from "transformation-matrix";
 import type { EnteFile } from "types/file";
+import { saveFaceCrop } from "./crop";
 import { fetchImageBitmap, getLocalFileImageBitmap } from "./file";
 import {
    clamp,
@ -148,8 +143,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => {
 const detectFaces = async (
    imageBitmap: ImageBitmap,
 ): Promise<FaceDetection[]> => {
-    const rect = ({ width, height }: Dimensions) =>
-        new Box({ x: 0, y: 0, width, height });
+    const rect = ({ width, height }) => ({ x: 0, y: 0, width, height });

    const { yoloInput, yoloSize } =
        convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
@ -259,19 +253,19 @@ const filterExtractDetectionsFromYOLOOutput = (
        const rightMouthX = rows[i + 13];
        const rightMouthY = rows[i + 14];

-        const box = new Box({
+        const box = {
            x: xMin,
            y: yMin,
            width: width,
            height: height,
-        });
+        };
        const probability = score as number;
        const landmarks = [
-            new Point(leftEyeX, leftEyeY),
-            new Point(rightEyeX, rightEyeY),
-            new Point(noseX, noseY),
-            new Point(leftMouthX, leftMouthY),
-            new Point(rightMouthX, rightMouthY),
+            { x: leftEyeX, y: leftEyeY },
+            { x: rightEyeX, y: rightEyeY },
+            { x: noseX, y: noseY },
+            { x: leftMouthX, y: leftMouthY },
+            { x: rightMouthX, y: rightMouthY },
        ];
        faces.push({ box, landmarks, probability });
    }
@ -291,7 +285,7 @@ const transformFaceDetections = (
    const transform = boxTransformationMatrix(inBox, toBox);
    return faceDetections.map((f) => ({
        box: transformBox(f.box, transform),
-        landmarks: f.landmarks.map((p) => transformPoint(p, transform)),
+        landmarks: f.landmarks.map((p) => applyToPoint(transform, p)),
        probability: f.probability,
    }));
 };
@ -305,24 +299,19 @@ const boxTransformationMatrix = (
        scale(toBox.width / inBox.width, toBox.height / inBox.height),
    );

-const transformPoint = (point: Point, transform: TransformationMatrix) => {
-    const txdPoint = applyToPoint(transform, point);
-    return new Point(txdPoint.x, txdPoint.y);
-};
+const transformBox = (box: Box, transform: TransformationMatrix): Box => {
+    const topLeft = applyToPoint(transform, { x: box.x, y: box.y });
+    const bottomRight = applyToPoint(transform, {
+        x: box.x + box.width,
+        y: box.y + box.height,
+    });

-const transformBox = (box: Box, transform: TransformationMatrix) => {
-    const topLeft = transformPoint(new Point(box.x, box.y), transform);
-    const bottomRight = transformPoint(
-        new Point(box.x + box.width, box.y + box.height),
-        transform,
-    );
-
-    return new Box({
+    return {
        x: topLeft.x,
        y: topLeft.y,
        width: bottomRight.x - topLeft.x,
        height: bottomRight.y - topLeft.y,
-    });
+    };
 };

 /**
@ -470,14 +459,14 @@ const faceAlignmentUsingSimilarityTransform = (
    const size = 1 / simTransform.scale;
    const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
    const centerMat = simTransform.fromMean.sub(meanTranslation);
-    const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
+    const center = { x: centerMat.get(0, 0), y: centerMat.get(1, 0) };

-    const boundingBox = new Box({
+    const boundingBox = {
        x: center.x - size / 2,
        y: center.y - size / 2,
        width: size,
        height: size,
-    });
+    };

    return { affineMatrix, boundingBox };
 };
@ -720,85 +709,16 @@ const relativeDetection = (
    { width, height }: Dimensions,
 ): FaceDetection => {
    const oldBox: Box = faceDetection.box;
-    const box = new Box({
+    const box = {
        x: oldBox.x / width,
        y: oldBox.y / height,
        width: oldBox.width / width,
        height: oldBox.height / height,
-    });
-    const landmarks = faceDetection.landmarks.map((l) => {
-        return new Point(l.x / width, l.y / height);
-    });
+    };
+    const landmarks = faceDetection.landmarks.map((l) => ({
+        x: l.x / width,
+        y: l.y / height,
+    }));
    const probability = faceDetection.probability;
    return { box, landmarks, probability };
 };
-
-export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
-    const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
-    const blob = await imageBitmapToBlob(faceCrop);
-    faceCrop.close();
-
-    const cache = await blobCache("face-crops");
-    await cache.put(face.id, blob);
-
-    return blob;
-};
-
-const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
-    const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
-    canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
-    return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
-};
-
-const extractFaceCrop = (
-    imageBitmap: ImageBitmap,
-    alignment: FaceAlignment,
-): ImageBitmap => {
-    // TODO-ML: This algorithm is different from what is used by the mobile app.
-    // Also, it needs to be something that can work fully using the embedding we
-    // receive from remote - the `alignment.boundingBox` will not be available
-    // to us in such cases.
-    const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
-    const outputSize = { width: paddedBox.width, height: paddedBox.height };
-
-    const maxDimension = 256;
-    const scale = Math.min(
-        maxDimension / paddedBox.width,
-        maxDimension / paddedBox.height,
-    );
-
-    if (scale < 1) {
-        outputSize.width = Math.round(scale * paddedBox.width);
-        outputSize.height = Math.round(scale * paddedBox.height);
-    }
-
-    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
-    const offscreenCtx = offscreen.getContext("2d");
-    offscreenCtx.imageSmoothingQuality = "high";
-
-    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
-
-    const outputBox = new Box({
-        x: -outputSize.width / 2,
-        y: -outputSize.height / 2,
-        width: outputSize.width,
-        height: outputSize.height,
-    });
-
-    const enlargedBox = enlargeBox(paddedBox, 1.5);
-    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
-
-    offscreenCtx.drawImage(
-        imageBitmap,
-        enlargedBox.x,
-        enlargedBox.y,
-        enlargedBox.width,
-        enlargedBox.height,
-        enlargedOutputBox.x,
-        enlargedOutputBox.y,
-        enlargedOutputBox.width,
-        enlargedOutputBox.height,
-    );
-
-    return offscreen.transferToImageBitmap();
-};
--- a/web/apps/photos/src/services/face/geom.ts
+++ b/web/apps/photos/src/services/face/geom.ts
@ -1,57 +0,0 @@
-export class Point {
-    public x: number;
-    public y: number;
-
-    constructor(x: number, y: number) {
-        this.x = x;
-        this.y = y;
-    }
-}
-
-export interface Dimensions {
-    width: number;
-    height: number;
-}
-
-export interface IRect {
-    x: number;
-    y: number;
-    width: number;
-    height: number;
-}
-
-export class Box implements IRect {
-    public x: number;
-    public y: number;
-    public width: number;
-    public height: number;
-
-    constructor({ x, y, width, height }: IRect) {
-        this.x = x;
-        this.y = y;
-        this.width = width;
-        this.height = height;
-    }
-}
-
-/** Round all the components of the box. */
-export const roundBox = (box: Box): Box => {
-    const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
-        (val) => Math.round(val),
-    );
-    return new Box({ x, y, width, height });
-};
-
-/** Increase the size of the given {@link box} by {@link factor}. */
-export const enlargeBox = (box: Box, factor: number) => {
-    const center = new Point(box.x + box.width / 2, box.y + box.height / 2);
-    const newWidth = factor * box.width;
-    const newHeight = factor * box.height;
-
-    return new Box({
-        x: center.x - newWidth / 2,
-        y: center.y - newHeight / 2,
-        width: newWidth,
-        height: newHeight,
-    });
-};
--- a/web/apps/photos/src/services/face/remote.ts
+++ b/web/apps/photos/src/services/face/remote.ts
@ -2,7 +2,7 @@ import log from "@/next/log";
 import ComlinkCryptoWorker from "@ente/shared/crypto";
 import { putEmbedding } from "services/embeddingService";
 import type { EnteFile } from "types/file";
-import type { Point } from "./geom";
+import type { Point } from "./crop";
 import type { Face, FaceDetection, MlFileData } from "./types";

 export const putFaceEmbedding = async (
--- a/web/apps/photos/src/services/face/types.ts
+++ b/web/apps/photos/src/services/face/types.ts
@ -1,4 +1,26 @@
-import { Box, Dimensions, Point } from "services/face/geom";
+/** The x and y coordinates of a point. */
+export interface Point {
+    x: number;
+    y: number;
+}
+
+/** The dimensions of something, say an image. */
+export interface Dimensions {
+    width: number;
+    height: number;
+}
+
+/** A rectangle given by its top left coordinates and dimensions. */
+export interface Box {
+    /** The x coordinate of the the top left (xMin). */
+    x: number;
+    /** The y coodinate of the top left (yMin). */
+    y: number;
+    /** The width of the box. */
+    width: number;
+    /** The height of the box. */
+    height: number;
+}

 export interface FaceDetection {
    // box and landmarks is relative to image dimentions stored at mlFileData