diff --git a/web/apps/photos/src/constants/mlConfig.ts b/web/apps/photos/src/constants/mlConfig.ts index 35a21d390..ac40cd5fb 100644 --- a/web/apps/photos/src/constants/mlConfig.ts +++ b/web/apps/photos/src/constants/mlConfig.ts @@ -77,12 +77,6 @@ export const MAX_ML_SYNC_ERROR_COUNT = 1; export const TEXT_DETECTION_TIMEOUT_MS = [10000, 30000, 60000, 120000, 240000]; -export const BLAZEFACE_MAX_FACES = 50; -export const BLAZEFACE_INPUT_SIZE = 256; -export const BLAZEFACE_IOU_THRESHOLD = 0.3; -export const BLAZEFACE_SCORE_THRESHOLD = 0.75; -export const BLAZEFACE_PASS1_SCORE_THRESHOLD = 0.4; -export const BLAZEFACE_FACE_SIZE = 112; export const MOBILEFACENET_FACE_SIZE = 112; export const MOBILEFACENET_EMBEDDING_SIZE = 192; diff --git a/web/apps/photos/src/services/machineLearning/blazeFaceDetectionService.ts b/web/apps/photos/src/services/machineLearning/blazeFaceDetectionService.ts deleted file mode 100644 index d557df78e..000000000 --- a/web/apps/photos/src/services/machineLearning/blazeFaceDetectionService.ts +++ /dev/null @@ -1,257 +0,0 @@ -import log from "@/next/log"; -import { GraphModel } from "@tensorflow/tfjs-converter"; -import * as tf from "@tensorflow/tfjs-core"; -import { - load as blazeFaceLoad, - BlazeFaceModel, - NormalizedFace, -} from "blazeface-back"; -import { - BLAZEFACE_FACE_SIZE, - BLAZEFACE_INPUT_SIZE, - BLAZEFACE_IOU_THRESHOLD, - BLAZEFACE_MAX_FACES, - BLAZEFACE_PASS1_SCORE_THRESHOLD, - BLAZEFACE_SCORE_THRESHOLD, - MAX_FACE_DISTANCE_PERCENT, -} from "constants/mlConfig"; -import { - FaceDetection, - FaceDetectionMethod, - FaceDetectionService, - Versioned, -} from "types/machineLearning"; -import { addPadding, crop, resizeToSquare } from "utils/image"; -import { enlargeBox, newBox, normFaceBox } from "utils/machineLearning"; -import { - getNearestDetection, - removeDuplicateDetections, - transformPaddedToImage, -} from "utils/machineLearning/faceDetection"; -import { - computeTransformToBox, - transformBox, - transformPoints, -} from "utils/machineLearning/transform"; -import { Box, Point } from "../../../thirdparty/face-api/classes"; - -class BlazeFaceDetectionService implements FaceDetectionService { - private blazeFaceModel: Promise; - private blazeFaceBackModel: GraphModel; - public method: Versioned; - - private desiredLeftEye = [0.36, 0.45]; - private desiredFaceSize; - - public constructor(desiredFaceSize: number = BLAZEFACE_FACE_SIZE) { - this.method = { - value: "BlazeFace", - version: 1, - }; - this.desiredFaceSize = desiredFaceSize; - } - - public getRelativeDetection(): FaceDetection { - // TODO(MR): onnx-yolo - throw new Error(); - } - - private async init() { - this.blazeFaceModel = blazeFaceLoad({ - maxFaces: BLAZEFACE_MAX_FACES, - scoreThreshold: BLAZEFACE_PASS1_SCORE_THRESHOLD, - iouThreshold: BLAZEFACE_IOU_THRESHOLD, - modelUrl: "/models/blazeface/back/model.json", - inputHeight: BLAZEFACE_INPUT_SIZE, - inputWidth: BLAZEFACE_INPUT_SIZE, - }); - log.info( - "loaded blazeFaceModel: ", - // await this.blazeFaceModel, - // eslint-disable-next-line @typescript-eslint/await-thenable - await tf.getBackend(), - ); - } - - private getDlibAlignedFace(normFace: NormalizedFace): Box { - const relX = 0.5; - const relY = 0.43; - const relScale = 0.45; - - const leftEyeCenter = normFace.landmarks[0]; - const rightEyeCenter = normFace.landmarks[1]; - const mountCenter = normFace.landmarks[3]; - - const distToMouth = (pt) => { - const dy = mountCenter[1] - pt[1]; - const dx = mountCenter[0] - pt[0]; - return Math.sqrt(dx * dx + dy * dy); - }; - const eyeToMouthDist = - (distToMouth(leftEyeCenter) + distToMouth(rightEyeCenter)) / 2; - - const size = Math.floor(eyeToMouthDist / relScale); - - const center = [ - (leftEyeCenter[0] + rightEyeCenter[0] + mountCenter[0]) / 3, - (leftEyeCenter[1] + rightEyeCenter[1] + mountCenter[1]) / 3, - ]; - - const left = center[0] - relX * size; - const top = center[1] - relY * size; - const right = center[0] + relX * size; - const bottom = center[1] + relY * size; - - return new Box({ - left: left, - top: top, - right: right, - bottom: bottom, - }); - } - - private getAlignedFace(normFace: NormalizedFace): Box { - const leftEye = normFace.landmarks[0]; - const rightEye = normFace.landmarks[1]; - // const noseTip = normFace.landmarks[2]; - - const dy = rightEye[1] - leftEye[1]; - const dx = rightEye[0] - leftEye[0]; - - const desiredRightEyeX = 1.0 - this.desiredLeftEye[0]; - - // const eyesCenterX = (leftEye[0] + rightEye[0]) / 2; - // const yaw = Math.abs(noseTip[0] - eyesCenterX) - const dist = Math.sqrt(dx * dx + dy * dy); - let desiredDist = desiredRightEyeX - this.desiredLeftEye[0]; - desiredDist *= this.desiredFaceSize; - const scale = desiredDist / dist; - // log.info("scale: ", scale); - - const eyesCenter = []; - eyesCenter[0] = Math.floor((leftEye[0] + rightEye[0]) / 2); - eyesCenter[1] = Math.floor((leftEye[1] + rightEye[1]) / 2); - // log.info("eyesCenter: ", eyesCenter); - - const faceWidth = this.desiredFaceSize / scale; - const faceHeight = this.desiredFaceSize / scale; - // log.info("faceWidth: ", faceWidth, "faceHeight: ", faceHeight) - - const tx = eyesCenter[0] - faceWidth * 0.5; - const ty = eyesCenter[1] - faceHeight * this.desiredLeftEye[1]; - // log.info("tx: ", tx, "ty: ", ty); - - return new Box({ - left: tx, - top: ty, - right: tx + faceWidth, - bottom: ty + faceHeight, - }); - } - - public async detectFacesUsingModel(image: tf.Tensor3D) { - const resizedImage = tf.image.resizeBilinear(image, [256, 256]); - const reshapedImage = tf.reshape(resizedImage, [ - 1, - resizedImage.shape[0], - resizedImage.shape[1], - 3, - ]); - const normalizedImage = tf.sub(tf.div(reshapedImage, 127.5), 1.0); - // eslint-disable-next-line @typescript-eslint/await-thenable - const results = await this.blazeFaceBackModel.predict(normalizedImage); - // log.info('onFacesDetected: ', results); - return results; - } - - private async getBlazefaceModel() { - if (!this.blazeFaceModel) { - await this.init(); - } - - return this.blazeFaceModel; - } - - private async estimateFaces( - imageBitmap: ImageBitmap, - ): Promise> { - const resized = resizeToSquare(imageBitmap, BLAZEFACE_INPUT_SIZE); - const tfImage = tf.browser.fromPixels(resized.image); - const blazeFaceModel = await this.getBlazefaceModel(); - // TODO: check if this works concurrently, else use serialqueue - const faces = await blazeFaceModel.estimateFaces(tfImage); - tf.dispose(tfImage); - - const inBox = newBox(0, 0, resized.width, resized.height); - const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height); - const transform = computeTransformToBox(inBox, toBox); - // log.info("1st pass: ", { transform }); - - const faceDetections: Array = faces?.map((f) => { - const box = transformBox(normFaceBox(f), transform); - const normLandmarks = (f.landmarks as number[][])?.map( - (l) => new Point(l[0], l[1]), - ); - const landmarks = transformPoints(normLandmarks, transform); - return { - box, - landmarks, - probability: f.probability as number, - // detectionMethod: this.method, - } as FaceDetection; - }); - - return faceDetections; - } - - public async detectFaces( - imageBitmap: ImageBitmap, - ): Promise> { - const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT; - const pass1Detections = await this.estimateFaces(imageBitmap); - - // run 2nd pass for accuracy - const detections: Array = []; - for (const pass1Detection of pass1Detections) { - const imageBox = enlargeBox(pass1Detection.box, 2); - const faceImage = crop( - imageBitmap, - imageBox, - BLAZEFACE_INPUT_SIZE / 2, - ); - const paddedImage = addPadding(faceImage, 0.5); - const paddedBox = enlargeBox(imageBox, 2); - const pass2Detections = await this.estimateFaces(paddedImage); - - pass2Detections?.forEach((d) => - transformPaddedToImage(d, faceImage, imageBox, paddedBox), - ); - let selected = pass2Detections?.[0]; - if (pass2Detections?.length > 1) { - // log.info('2nd pass >1 face', pass2Detections.length); - selected = getNearestDetection( - pass1Detection, - pass2Detections, - // maxFaceDistance - ); - } - - // we might miss 1st pass face actually having score within threshold - // it is ok as results will be consistent with 2nd pass only detections - if (selected && selected.probability >= BLAZEFACE_SCORE_THRESHOLD) { - // log.info("pass2: ", { imageBox, paddedBox, transform, selected }); - detections.push(selected); - } - } - - return removeDuplicateDetections(detections, maxFaceDistance); - } - - public async dispose() { - const blazeFaceModel = await this.getBlazefaceModel(); - blazeFaceModel?.dispose(); - this.blazeFaceModel = undefined; - } -} - -export default new BlazeFaceDetectionService(); diff --git a/web/apps/photos/src/types/machineLearning/index.ts b/web/apps/photos/src/types/machineLearning/index.ts index ddceb188b..f1895cd41 100644 --- a/web/apps/photos/src/types/machineLearning/index.ts +++ b/web/apps/photos/src/types/machineLearning/index.ts @@ -59,10 +59,7 @@ export declare type Landmark = Point; export declare type ImageType = "Original" | "Preview"; -export declare type FaceDetectionMethod = - | "BlazeFace" - | "FaceApiSSD" - | "YoloFace"; +export declare type FaceDetectionMethod = "FaceApiSSD" | "YoloFace"; export declare type ObjectDetectionMethod = "SSDMobileNetV2"; diff --git a/web/apps/photos/src/utils/machineLearning/faceDetection.ts b/web/apps/photos/src/utils/machineLearning/faceDetection.ts index 6b9aca1d0..6a0402dcf 100644 --- a/web/apps/photos/src/utils/machineLearning/faceDetection.ts +++ b/web/apps/photos/src/utils/machineLearning/faceDetection.ts @@ -1,27 +1,6 @@ import { euclidean } from "hdbscan"; import { FaceDetection } from "types/machineLearning"; -import { getNearestPointIndex, newBox } from "."; -import { Box, Point } from "../../../thirdparty/face-api/classes"; -import { - computeTransformToBox, - transformBox, - transformPoints, -} from "./transform"; - -export function transformPaddedToImage( - detection: FaceDetection, - faceImage: ImageBitmap, - imageBox: Box, - paddedBox: Box, -) { - const inBox = newBox(0, 0, faceImage.width, faceImage.height); - imageBox.x = paddedBox.x; - imageBox.y = paddedBox.y; - const transform = computeTransformToBox(inBox, imageBox); - - detection.box = transformBox(detection.box, transform); - detection.landmarks = transformPoints(detection.landmarks, transform); -} +import { Point } from "../../../thirdparty/face-api/classes"; export function getDetectionCenter(detection: FaceDetection) { const center = new Point(0, 0); @@ -35,30 +14,6 @@ export function getDetectionCenter(detection: FaceDetection) { return center.div({ x: 4, y: 4 }); } -/** - * Finds the nearest face detection from a list of detections to a specified detection. - * - * This function calculates the center of each detection and then finds the detection whose center is nearest to the center of the specified detection. - * If a maximum distance is specified, only detections within that distance are considered. - * - * @param toDetection - The face detection to find the nearest detection to. - * @param fromDetections - An array of face detections to search in. - * @param maxDistance - The maximum distance between the centers of the two detections for a detection to be considered. If not specified, all detections are considered. - * - * @returns The nearest face detection from the list, or `undefined` if no detection is within the maximum distance. - */ -export function getNearestDetection( - toDetection: FaceDetection, - fromDetections: Array, - maxDistance?: number, -) { - const toCenter = getDetectionCenter(toDetection); - const centers = fromDetections.map((d) => getDetectionCenter(d)); - const nearestIndex = getNearestPointIndex(toCenter, centers, maxDistance); - - return nearestIndex >= 0 && fromDetections[nearestIndex]; -} - /** * Removes duplicate face detections from an array of detections. * diff --git a/web/apps/photos/src/utils/machineLearning/index.ts b/web/apps/photos/src/utils/machineLearning/index.ts index 9b47f1a7b..afe4ba83e 100644 --- a/web/apps/photos/src/utils/machineLearning/index.ts +++ b/web/apps/photos/src/utils/machineLearning/index.ts @@ -1,7 +1,6 @@ import log from "@/next/log"; import { CACHES } from "@ente/shared/storage/cacheStorage/constants"; import { cached } from "@ente/shared/storage/cacheStorage/helpers"; -import { NormalizedFace } from "blazeface-back"; import { FILE_TYPE } from "constants/file"; import { BLAZEFACE_FACE_SIZE } from "constants/mlConfig"; import { euclidean } from "hdbscan"; @@ -64,15 +63,6 @@ export function newBoxFromPoints( return new Box({ left, top, right, bottom }); } -export function normFaceBox(face: NormalizedFace) { - return newBoxFromPoints( - face.topLeft[0], - face.topLeft[1], - face.bottomRight[0], - face.bottomRight[1], - ); -} - export function getBoxCenterPt(topLeft: Point, bottomRight: Point): Point { return topLeft.add(bottomRight.sub(topLeft).div(new Point(2, 2))); }