Remove blazeface
This commit is contained in:
parent
e6905577c6
commit
da3b58661a
5 changed files with 2 additions and 323 deletions
|
@ -77,12 +77,6 @@ export const MAX_ML_SYNC_ERROR_COUNT = 1;
|
|||
|
||||
export const TEXT_DETECTION_TIMEOUT_MS = [10000, 30000, 60000, 120000, 240000];
|
||||
|
||||
export const BLAZEFACE_MAX_FACES = 50;
|
||||
export const BLAZEFACE_INPUT_SIZE = 256;
|
||||
export const BLAZEFACE_IOU_THRESHOLD = 0.3;
|
||||
export const BLAZEFACE_SCORE_THRESHOLD = 0.75;
|
||||
export const BLAZEFACE_PASS1_SCORE_THRESHOLD = 0.4;
|
||||
export const BLAZEFACE_FACE_SIZE = 112;
|
||||
export const MOBILEFACENET_FACE_SIZE = 112;
|
||||
export const MOBILEFACENET_EMBEDDING_SIZE = 192;
|
||||
|
||||
|
|
|
@ -1,257 +0,0 @@
|
|||
import log from "@/next/log";
|
||||
import { GraphModel } from "@tensorflow/tfjs-converter";
|
||||
import * as tf from "@tensorflow/tfjs-core";
|
||||
import {
|
||||
load as blazeFaceLoad,
|
||||
BlazeFaceModel,
|
||||
NormalizedFace,
|
||||
} from "blazeface-back";
|
||||
import {
|
||||
BLAZEFACE_FACE_SIZE,
|
||||
BLAZEFACE_INPUT_SIZE,
|
||||
BLAZEFACE_IOU_THRESHOLD,
|
||||
BLAZEFACE_MAX_FACES,
|
||||
BLAZEFACE_PASS1_SCORE_THRESHOLD,
|
||||
BLAZEFACE_SCORE_THRESHOLD,
|
||||
MAX_FACE_DISTANCE_PERCENT,
|
||||
} from "constants/mlConfig";
|
||||
import {
|
||||
FaceDetection,
|
||||
FaceDetectionMethod,
|
||||
FaceDetectionService,
|
||||
Versioned,
|
||||
} from "types/machineLearning";
|
||||
import { addPadding, crop, resizeToSquare } from "utils/image";
|
||||
import { enlargeBox, newBox, normFaceBox } from "utils/machineLearning";
|
||||
import {
|
||||
getNearestDetection,
|
||||
removeDuplicateDetections,
|
||||
transformPaddedToImage,
|
||||
} from "utils/machineLearning/faceDetection";
|
||||
import {
|
||||
computeTransformToBox,
|
||||
transformBox,
|
||||
transformPoints,
|
||||
} from "utils/machineLearning/transform";
|
||||
import { Box, Point } from "../../../thirdparty/face-api/classes";
|
||||
|
||||
class BlazeFaceDetectionService implements FaceDetectionService {
|
||||
private blazeFaceModel: Promise<BlazeFaceModel>;
|
||||
private blazeFaceBackModel: GraphModel;
|
||||
public method: Versioned<FaceDetectionMethod>;
|
||||
|
||||
private desiredLeftEye = [0.36, 0.45];
|
||||
private desiredFaceSize;
|
||||
|
||||
public constructor(desiredFaceSize: number = BLAZEFACE_FACE_SIZE) {
|
||||
this.method = {
|
||||
value: "BlazeFace",
|
||||
version: 1,
|
||||
};
|
||||
this.desiredFaceSize = desiredFaceSize;
|
||||
}
|
||||
|
||||
public getRelativeDetection(): FaceDetection {
|
||||
// TODO(MR): onnx-yolo
|
||||
throw new Error();
|
||||
}
|
||||
|
||||
private async init() {
|
||||
this.blazeFaceModel = blazeFaceLoad({
|
||||
maxFaces: BLAZEFACE_MAX_FACES,
|
||||
scoreThreshold: BLAZEFACE_PASS1_SCORE_THRESHOLD,
|
||||
iouThreshold: BLAZEFACE_IOU_THRESHOLD,
|
||||
modelUrl: "/models/blazeface/back/model.json",
|
||||
inputHeight: BLAZEFACE_INPUT_SIZE,
|
||||
inputWidth: BLAZEFACE_INPUT_SIZE,
|
||||
});
|
||||
log.info(
|
||||
"loaded blazeFaceModel: ",
|
||||
// await this.blazeFaceModel,
|
||||
// eslint-disable-next-line @typescript-eslint/await-thenable
|
||||
await tf.getBackend(),
|
||||
);
|
||||
}
|
||||
|
||||
private getDlibAlignedFace(normFace: NormalizedFace): Box {
|
||||
const relX = 0.5;
|
||||
const relY = 0.43;
|
||||
const relScale = 0.45;
|
||||
|
||||
const leftEyeCenter = normFace.landmarks[0];
|
||||
const rightEyeCenter = normFace.landmarks[1];
|
||||
const mountCenter = normFace.landmarks[3];
|
||||
|
||||
const distToMouth = (pt) => {
|
||||
const dy = mountCenter[1] - pt[1];
|
||||
const dx = mountCenter[0] - pt[0];
|
||||
return Math.sqrt(dx * dx + dy * dy);
|
||||
};
|
||||
const eyeToMouthDist =
|
||||
(distToMouth(leftEyeCenter) + distToMouth(rightEyeCenter)) / 2;
|
||||
|
||||
const size = Math.floor(eyeToMouthDist / relScale);
|
||||
|
||||
const center = [
|
||||
(leftEyeCenter[0] + rightEyeCenter[0] + mountCenter[0]) / 3,
|
||||
(leftEyeCenter[1] + rightEyeCenter[1] + mountCenter[1]) / 3,
|
||||
];
|
||||
|
||||
const left = center[0] - relX * size;
|
||||
const top = center[1] - relY * size;
|
||||
const right = center[0] + relX * size;
|
||||
const bottom = center[1] + relY * size;
|
||||
|
||||
return new Box({
|
||||
left: left,
|
||||
top: top,
|
||||
right: right,
|
||||
bottom: bottom,
|
||||
});
|
||||
}
|
||||
|
||||
private getAlignedFace(normFace: NormalizedFace): Box {
|
||||
const leftEye = normFace.landmarks[0];
|
||||
const rightEye = normFace.landmarks[1];
|
||||
// const noseTip = normFace.landmarks[2];
|
||||
|
||||
const dy = rightEye[1] - leftEye[1];
|
||||
const dx = rightEye[0] - leftEye[0];
|
||||
|
||||
const desiredRightEyeX = 1.0 - this.desiredLeftEye[0];
|
||||
|
||||
// const eyesCenterX = (leftEye[0] + rightEye[0]) / 2;
|
||||
// const yaw = Math.abs(noseTip[0] - eyesCenterX)
|
||||
const dist = Math.sqrt(dx * dx + dy * dy);
|
||||
let desiredDist = desiredRightEyeX - this.desiredLeftEye[0];
|
||||
desiredDist *= this.desiredFaceSize;
|
||||
const scale = desiredDist / dist;
|
||||
// log.info("scale: ", scale);
|
||||
|
||||
const eyesCenter = [];
|
||||
eyesCenter[0] = Math.floor((leftEye[0] + rightEye[0]) / 2);
|
||||
eyesCenter[1] = Math.floor((leftEye[1] + rightEye[1]) / 2);
|
||||
// log.info("eyesCenter: ", eyesCenter);
|
||||
|
||||
const faceWidth = this.desiredFaceSize / scale;
|
||||
const faceHeight = this.desiredFaceSize / scale;
|
||||
// log.info("faceWidth: ", faceWidth, "faceHeight: ", faceHeight)
|
||||
|
||||
const tx = eyesCenter[0] - faceWidth * 0.5;
|
||||
const ty = eyesCenter[1] - faceHeight * this.desiredLeftEye[1];
|
||||
// log.info("tx: ", tx, "ty: ", ty);
|
||||
|
||||
return new Box({
|
||||
left: tx,
|
||||
top: ty,
|
||||
right: tx + faceWidth,
|
||||
bottom: ty + faceHeight,
|
||||
});
|
||||
}
|
||||
|
||||
public async detectFacesUsingModel(image: tf.Tensor3D) {
|
||||
const resizedImage = tf.image.resizeBilinear(image, [256, 256]);
|
||||
const reshapedImage = tf.reshape(resizedImage, [
|
||||
1,
|
||||
resizedImage.shape[0],
|
||||
resizedImage.shape[1],
|
||||
3,
|
||||
]);
|
||||
const normalizedImage = tf.sub(tf.div(reshapedImage, 127.5), 1.0);
|
||||
// eslint-disable-next-line @typescript-eslint/await-thenable
|
||||
const results = await this.blazeFaceBackModel.predict(normalizedImage);
|
||||
// log.info('onFacesDetected: ', results);
|
||||
return results;
|
||||
}
|
||||
|
||||
private async getBlazefaceModel() {
|
||||
if (!this.blazeFaceModel) {
|
||||
await this.init();
|
||||
}
|
||||
|
||||
return this.blazeFaceModel;
|
||||
}
|
||||
|
||||
private async estimateFaces(
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> {
|
||||
const resized = resizeToSquare(imageBitmap, BLAZEFACE_INPUT_SIZE);
|
||||
const tfImage = tf.browser.fromPixels(resized.image);
|
||||
const blazeFaceModel = await this.getBlazefaceModel();
|
||||
// TODO: check if this works concurrently, else use serialqueue
|
||||
const faces = await blazeFaceModel.estimateFaces(tfImage);
|
||||
tf.dispose(tfImage);
|
||||
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
// log.info("1st pass: ", { transform });
|
||||
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(normFaceBox(f), transform);
|
||||
const normLandmarks = (f.landmarks as number[][])?.map(
|
||||
(l) => new Point(l[0], l[1]),
|
||||
);
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
// detectionMethod: this.method,
|
||||
} as FaceDetection;
|
||||
});
|
||||
|
||||
return faceDetections;
|
||||
}
|
||||
|
||||
public async detectFaces(
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> {
|
||||
const maxFaceDistance = imageBitmap.width * MAX_FACE_DISTANCE_PERCENT;
|
||||
const pass1Detections = await this.estimateFaces(imageBitmap);
|
||||
|
||||
// run 2nd pass for accuracy
|
||||
const detections: Array<FaceDetection> = [];
|
||||
for (const pass1Detection of pass1Detections) {
|
||||
const imageBox = enlargeBox(pass1Detection.box, 2);
|
||||
const faceImage = crop(
|
||||
imageBitmap,
|
||||
imageBox,
|
||||
BLAZEFACE_INPUT_SIZE / 2,
|
||||
);
|
||||
const paddedImage = addPadding(faceImage, 0.5);
|
||||
const paddedBox = enlargeBox(imageBox, 2);
|
||||
const pass2Detections = await this.estimateFaces(paddedImage);
|
||||
|
||||
pass2Detections?.forEach((d) =>
|
||||
transformPaddedToImage(d, faceImage, imageBox, paddedBox),
|
||||
);
|
||||
let selected = pass2Detections?.[0];
|
||||
if (pass2Detections?.length > 1) {
|
||||
// log.info('2nd pass >1 face', pass2Detections.length);
|
||||
selected = getNearestDetection(
|
||||
pass1Detection,
|
||||
pass2Detections,
|
||||
// maxFaceDistance
|
||||
);
|
||||
}
|
||||
|
||||
// we might miss 1st pass face actually having score within threshold
|
||||
// it is ok as results will be consistent with 2nd pass only detections
|
||||
if (selected && selected.probability >= BLAZEFACE_SCORE_THRESHOLD) {
|
||||
// log.info("pass2: ", { imageBox, paddedBox, transform, selected });
|
||||
detections.push(selected);
|
||||
}
|
||||
}
|
||||
|
||||
return removeDuplicateDetections(detections, maxFaceDistance);
|
||||
}
|
||||
|
||||
public async dispose() {
|
||||
const blazeFaceModel = await this.getBlazefaceModel();
|
||||
blazeFaceModel?.dispose();
|
||||
this.blazeFaceModel = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export default new BlazeFaceDetectionService();
|
|
@ -59,10 +59,7 @@ export declare type Landmark = Point;
|
|||
|
||||
export declare type ImageType = "Original" | "Preview";
|
||||
|
||||
export declare type FaceDetectionMethod =
|
||||
| "BlazeFace"
|
||||
| "FaceApiSSD"
|
||||
| "YoloFace";
|
||||
export declare type FaceDetectionMethod = "FaceApiSSD" | "YoloFace";
|
||||
|
||||
export declare type ObjectDetectionMethod = "SSDMobileNetV2";
|
||||
|
||||
|
|
|
@ -1,27 +1,6 @@
|
|||
import { euclidean } from "hdbscan";
|
||||
import { FaceDetection } from "types/machineLearning";
|
||||
import { getNearestPointIndex, newBox } from ".";
|
||||
import { Box, Point } from "../../../thirdparty/face-api/classes";
|
||||
import {
|
||||
computeTransformToBox,
|
||||
transformBox,
|
||||
transformPoints,
|
||||
} from "./transform";
|
||||
|
||||
export function transformPaddedToImage(
|
||||
detection: FaceDetection,
|
||||
faceImage: ImageBitmap,
|
||||
imageBox: Box,
|
||||
paddedBox: Box,
|
||||
) {
|
||||
const inBox = newBox(0, 0, faceImage.width, faceImage.height);
|
||||
imageBox.x = paddedBox.x;
|
||||
imageBox.y = paddedBox.y;
|
||||
const transform = computeTransformToBox(inBox, imageBox);
|
||||
|
||||
detection.box = transformBox(detection.box, transform);
|
||||
detection.landmarks = transformPoints(detection.landmarks, transform);
|
||||
}
|
||||
import { Point } from "../../../thirdparty/face-api/classes";
|
||||
|
||||
export function getDetectionCenter(detection: FaceDetection) {
|
||||
const center = new Point(0, 0);
|
||||
|
@ -35,30 +14,6 @@ export function getDetectionCenter(detection: FaceDetection) {
|
|||
return center.div({ x: 4, y: 4 });
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the nearest face detection from a list of detections to a specified detection.
|
||||
*
|
||||
* This function calculates the center of each detection and then finds the detection whose center is nearest to the center of the specified detection.
|
||||
* If a maximum distance is specified, only detections within that distance are considered.
|
||||
*
|
||||
* @param toDetection - The face detection to find the nearest detection to.
|
||||
* @param fromDetections - An array of face detections to search in.
|
||||
* @param maxDistance - The maximum distance between the centers of the two detections for a detection to be considered. If not specified, all detections are considered.
|
||||
*
|
||||
* @returns The nearest face detection from the list, or `undefined` if no detection is within the maximum distance.
|
||||
*/
|
||||
export function getNearestDetection(
|
||||
toDetection: FaceDetection,
|
||||
fromDetections: Array<FaceDetection>,
|
||||
maxDistance?: number,
|
||||
) {
|
||||
const toCenter = getDetectionCenter(toDetection);
|
||||
const centers = fromDetections.map((d) => getDetectionCenter(d));
|
||||
const nearestIndex = getNearestPointIndex(toCenter, centers, maxDistance);
|
||||
|
||||
return nearestIndex >= 0 && fromDetections[nearestIndex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import log from "@/next/log";
|
||||
import { CACHES } from "@ente/shared/storage/cacheStorage/constants";
|
||||
import { cached } from "@ente/shared/storage/cacheStorage/helpers";
|
||||
import { NormalizedFace } from "blazeface-back";
|
||||
import { FILE_TYPE } from "constants/file";
|
||||
import { BLAZEFACE_FACE_SIZE } from "constants/mlConfig";
|
||||
import { euclidean } from "hdbscan";
|
||||
|
@ -64,15 +63,6 @@ export function newBoxFromPoints(
|
|||
return new Box({ left, top, right, bottom });
|
||||
}
|
||||
|
||||
export function normFaceBox(face: NormalizedFace) {
|
||||
return newBoxFromPoints(
|
||||
face.topLeft[0],
|
||||
face.topLeft[1],
|
||||
face.bottomRight[0],
|
||||
face.bottomRight[1],
|
||||
);
|
||||
}
|
||||
|
||||
export function getBoxCenterPt(topLeft: Point, bottomRight: Point): Point {
|
||||
return topLeft.add(bottomRight.sub(topLeft).div(new Point(2, 2)));
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue