From e224ad19d3f837ed00c3c71f15cfa3ea7f504d69 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 18 May 2024 09:12:25 +0530 Subject: [PATCH] Separate --- web/apps/photos/src/services/face/detect.ts | 57 +---------------- .../photos/src/services/face/transform-box.ts | 64 +++++++++++++++++++ 2 files changed, 67 insertions(+), 54 deletions(-) create mode 100644 web/apps/photos/src/services/face/transform-box.ts diff --git a/web/apps/photos/src/services/face/detect.ts b/web/apps/photos/src/services/face/detect.ts index c4829c10b..af3bda680 100644 --- a/web/apps/photos/src/services/face/detect.ts +++ b/web/apps/photos/src/services/face/detect.ts @@ -1,22 +1,9 @@ import { workerBridge } from "@/next/worker/worker-bridge"; import { euclidean } from "hdbscan"; -import { - Box, - Dimensions, - Point, - boxFromBoundingBox, - newBox, -} from "services/face/geom"; +import { Box, Dimensions, Point, newBox } from "services/face/geom"; import { FaceDetection } from "services/face/types"; -// TODO-ML(MR): Do we need two separate Matrix libraries? -import { - Matrix, - applyToPoint, - compose, - scale, - translate, -} from "transformation-matrix"; import { clamp, getPixelBilinear, normalizePixelBetween0And1 } from "./image"; +import { transformFaceDetections } from "./transform-box"; /** * Detect faces in the given {@link imageBitmap}. @@ -39,17 +26,7 @@ export const detectFaces = async ( const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7); const inBox = newBox(0, 0, resized.width, resized.height); const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height); - const transform = computeTransformToBox(inBox, toBox); - const faceDetections: Array = faces?.map((f) => { - const box = transformBox(f.box, transform); - const normLandmarks = f.landmarks; - const landmarks = transformPoints(normLandmarks, transform); - return { - box, - landmarks, - probability: f.probability as number, - } as FaceDetection; - }); + const faceDetections = transformFaceDetections(faces, inBox, toBox); return removeDuplicateDetections(faceDetections, maxFaceDistance); }; @@ -283,31 +260,3 @@ function getDetectionCenter(detection: FaceDetection) { return new Point(center.x / 4, center.y / 4); } - -function computeTransformToBox(inBox: Box, toBox: Box): Matrix { - return compose( - translate(toBox.x, toBox.y), - scale(toBox.width / inBox.width, toBox.height / inBox.height), - ); -} - -function transformPoint(point: Point, transform: Matrix) { - const txdPoint = applyToPoint(transform, point); - return new Point(txdPoint.x, txdPoint.y); -} - -function transformPoints(points: Point[], transform: Matrix) { - return points?.map((p) => transformPoint(p, transform)); -} - -function transformBox(box: Box, transform: Matrix) { - const topLeft = transformPoint(box.topLeft, transform); - const bottomRight = transformPoint(box.bottomRight, transform); - - return boxFromBoundingBox({ - left: topLeft.x, - top: topLeft.y, - right: bottomRight.x, - bottom: bottomRight.y, - }); -} diff --git a/web/apps/photos/src/services/face/transform-box.ts b/web/apps/photos/src/services/face/transform-box.ts new file mode 100644 index 000000000..8234b8739 --- /dev/null +++ b/web/apps/photos/src/services/face/transform-box.ts @@ -0,0 +1,64 @@ +import { Box, Point, boxFromBoundingBox } from "services/face/geom"; +import { FaceDetection } from "services/face/types"; +// TODO-ML(MR): Do we need two separate Matrix libraries? +// +// Keeping this in a separate file so that we can audit this. If these can be +// expressed using ml-matrix, then we can move the code to f-index. +import { + Matrix, + applyToPoint, + compose, + scale, + translate, +} from "transformation-matrix"; + +/** + * Detect faces in the given {@link imageBitmap}. + * + * The model used is YOLO, running in an ONNX runtime. + */ +export const transformFaceDetections = ( + faces: FaceDetection[], + inBox: Box, + toBox: Box, +): FaceDetection[] => { + const transform = computeTransformToBox(inBox, toBox); + return faces.map((f) => { + const box = transformBox(f.box, transform); + const normLandmarks = f.landmarks; + const landmarks = transformPoints(normLandmarks, transform); + return { + box, + landmarks, + probability: f.probability as number, + } as FaceDetection; + }); +}; + +function computeTransformToBox(inBox: Box, toBox: Box): Matrix { + return compose( + translate(toBox.x, toBox.y), + scale(toBox.width / inBox.width, toBox.height / inBox.height), + ); +} + +function transformPoint(point: Point, transform: Matrix) { + const txdPoint = applyToPoint(transform, point); + return new Point(txdPoint.x, txdPoint.y); +} + +function transformPoints(points: Point[], transform: Matrix) { + return points?.map((p) => transformPoint(p, transform)); +} + +function transformBox(box: Box, transform: Matrix) { + const topLeft = transformPoint(box.topLeft, transform); + const bottomRight = transformPoint(box.bottomRight, transform); + + return boxFromBoundingBox({ + left: topLeft.x, + top: topLeft.y, + right: bottomRight.x, + bottom: bottomRight.y, + }); +}