Inline
This commit is contained in:
parent
eaadc54184
commit
93cdf73a66
3 changed files with 274 additions and 191 deletions
|
@ -1,187 +0,0 @@
|
|||
import { Face } from "services/face/types";
|
||||
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
|
||||
import { mobileFaceNetFaceSize } from "./embed";
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
export const detectBlur = (
|
||||
alignedFaces: Float32Array,
|
||||
faces: Face[],
|
||||
): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a new image by applying a Laplacian blur kernel to each pixel.
|
||||
*/
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection,
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0.
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel.
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping).
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
|
||||
const removeSideColumns = 56; /* must be even */
|
||||
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding.
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
if (direction === "straight") {
|
||||
// Copy original image into the center of the padded image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} else if (direction === "left") {
|
||||
// If the face is facing left, we only take the right side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} else if (direction === "right") {
|
||||
// If the face is facing right, we only take the left side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const matrixVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean.
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance.
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
};
|
|
@ -1,27 +1,28 @@
|
|||
import { openCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import { faceAlignment } from "services/face/align";
|
||||
import { Matrix } from "ml-matrix";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { detectFaces, getRelativeDetection } from "services/face/detect";
|
||||
import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
|
||||
import { Box, enlargeBox } from "services/face/geom";
|
||||
import { Box, Point, enlargeBox } from "services/face/geom";
|
||||
import {
|
||||
DetectedFace,
|
||||
Face,
|
||||
FaceAlignment,
|
||||
FaceCrop,
|
||||
FaceDetection,
|
||||
MLSyncFileContext,
|
||||
type FaceAlignment,
|
||||
type MlFileData,
|
||||
} from "services/face/types";
|
||||
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
|
||||
import { getSimilarityTransformation } from "similarity-transformation";
|
||||
import type { EnteFile } from "types/file";
|
||||
import {
|
||||
createGrayscaleIntMatrixFromNormalized2List,
|
||||
cropWithRotation,
|
||||
imageBitmapToBlob,
|
||||
warpAffineFloat32List,
|
||||
} from "utils/image";
|
||||
import { detectBlur } from "./blur";
|
||||
import {
|
||||
fetchImageBitmap,
|
||||
fetchImageBitmapForContext,
|
||||
|
@ -149,6 +150,275 @@ const syncFileFaceAlignments = async (
|
|||
return faceImages;
|
||||
};
|
||||
|
||||
// TODO-ML(MR): When is this used or is it as Blazeface leftover?
|
||||
const ARCFACE_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[56.1396, 92.2848],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
const ARCFACE_LANDMARKS_FACE_SIZE = 112;
|
||||
|
||||
const ARC_FACE_5_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
/**
|
||||
* Compute and return an {@link FaceAlignment} for the given face detection.
|
||||
*
|
||||
* @param faceDetection A geometry indicating a face detected in an image.
|
||||
*/
|
||||
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
|
||||
const landmarkCount = faceDetection.landmarks.length;
|
||||
return getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection,
|
||||
normalizeLandmarks(
|
||||
landmarkCount === 5 ? ARC_FACE_5_LANDMARKS : ARCFACE_LANDMARKS,
|
||||
ARCFACE_LANDMARKS_FACE_SIZE,
|
||||
),
|
||||
);
|
||||
};
|
||||
|
||||
function getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection: FaceDetection,
|
||||
alignedLandmarks: Array<[number, number]>,
|
||||
): FaceAlignment {
|
||||
const landmarksMat = new Matrix(
|
||||
faceDetection.landmarks
|
||||
.map((p) => [p.x, p.y])
|
||||
.slice(0, alignedLandmarks.length),
|
||||
).transpose();
|
||||
const alignedLandmarksMat = new Matrix(alignedLandmarks).transpose();
|
||||
|
||||
const simTransform = getSimilarityTransformation(
|
||||
landmarksMat,
|
||||
alignedLandmarksMat,
|
||||
);
|
||||
|
||||
const RS = Matrix.mul(simTransform.rotation, simTransform.scale);
|
||||
const TR = simTransform.translation;
|
||||
|
||||
const affineMatrix = [
|
||||
[RS.get(0, 0), RS.get(0, 1), TR.get(0, 0)],
|
||||
[RS.get(1, 0), RS.get(1, 1), TR.get(1, 0)],
|
||||
[0, 0, 1],
|
||||
];
|
||||
|
||||
const size = 1 / simTransform.scale;
|
||||
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
|
||||
const centerMat = simTransform.fromMean.sub(meanTranslation);
|
||||
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
|
||||
const rotation = -Math.atan2(
|
||||
simTransform.rotation.get(0, 1),
|
||||
simTransform.rotation.get(0, 0),
|
||||
);
|
||||
|
||||
return {
|
||||
affineMatrix,
|
||||
center,
|
||||
size,
|
||||
rotation,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeLandmarks(
|
||||
landmarks: Array<[number, number]>,
|
||||
faceSize: number,
|
||||
): Array<[number, number]> {
|
||||
return landmarks.map((landmark) =>
|
||||
landmark.map((p) => p / faceSize),
|
||||
) as Array<[number, number]>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
export const detectBlur = (
|
||||
alignedFaces: Float32Array,
|
||||
faces: Face[],
|
||||
): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a new image by applying a Laplacian blur kernel to each pixel.
|
||||
*/
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection,
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0.
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel.
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping).
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
|
||||
const removeSideColumns = 56; /* must be even */
|
||||
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding.
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
if (direction === "straight") {
|
||||
// Copy original image into the center of the padded image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} else if (direction === "left") {
|
||||
// If the face is facing left, we only take the right side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} else if (direction === "right") {
|
||||
// If the face is facing right, we only take the left side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const matrixVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean.
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance.
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
};
|
||||
|
||||
const syncFileFaceEmbeddings = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
alignedFacesInput: Float32Array,
|
||||
|
|
Loading…
Add table
Reference in a new issue