Inline
This commit is contained in:
parent
db05afb9ff
commit
7160ae700f
4 changed files with 135 additions and 177 deletions
|
@ -201,7 +201,6 @@ export interface MLSyncContext {
|
|||
faceDetectionService: FaceDetectionService;
|
||||
faceCropService: FaceCropService;
|
||||
faceEmbeddingService: FaceEmbeddingService;
|
||||
blurDetectionService: BlurDetectionService;
|
||||
|
||||
localFilesMap: Map<number, EnteFile>;
|
||||
outOfSyncFiles: EnteFile[];
|
||||
|
@ -267,11 +266,6 @@ export interface FaceEmbeddingService {
|
|||
getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
|
||||
}
|
||||
|
||||
export interface BlurDetectionService {
|
||||
method: Versioned<BlurDetectionMethod>;
|
||||
detectBlur(alignedFaces: Float32Array, faces: Face[]): number[];
|
||||
}
|
||||
|
||||
export interface MachineLearningWorker {
|
||||
closeLocalSyncContext(): Promise<void>;
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ import {
|
|||
getFaceId,
|
||||
getLocalFile,
|
||||
} from "../face/image";
|
||||
import { detectBlur } from "./laplacianBlurDetectionService";
|
||||
|
||||
class FaceService {
|
||||
async syncFileFaceDetections(
|
||||
|
@ -85,10 +86,7 @@ class FaceService {
|
|||
syncContext.faceEmbeddingService.faceSize,
|
||||
imageBitmap,
|
||||
);
|
||||
const blurValues = syncContext.blurDetectionService.detectBlur(
|
||||
faceImages,
|
||||
newMlFile.faces,
|
||||
);
|
||||
const blurValues = detectBlur(faceImages, newMlFile.faces);
|
||||
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
|
||||
|
||||
imageBitmap.close();
|
||||
|
|
|
@ -1,176 +1,158 @@
|
|||
import {
|
||||
BlurDetectionMethod,
|
||||
BlurDetectionService,
|
||||
Face,
|
||||
Versioned,
|
||||
} from "services/face/types";
|
||||
import { Face } from "services/face/types";
|
||||
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
|
||||
import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
|
||||
|
||||
class LaplacianBlurDetectionService implements BlurDetectionService {
|
||||
public method: Versioned<BlurDetectionMethod>;
|
||||
|
||||
public constructor() {
|
||||
this.method = {
|
||||
value: "Laplacian",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public detectBlur(alignedFaces: Float32Array, faces: Face[]): number[] {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
export const detectBlur = (
|
||||
alignedFaces: Float32Array,
|
||||
faces: Face[],
|
||||
): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = getFaceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = this.applyLaplacian(faceImage, direction);
|
||||
const variance = this.calculateVariance(laplacian);
|
||||
blurValues.push(variance);
|
||||
}
|
||||
return blurValues;
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
const variance = calculateVariance(laplacian);
|
||||
blurValues.push(variance);
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
private calculateVariance(matrix: number[][]): number {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
const calculateVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
// Calculate the mean
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
mean /= totalElements;
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
// Calculate the variance
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
variance /= totalElements;
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
return variance;
|
||||
};
|
||||
|
||||
const padImage = (
|
||||
image: number[][],
|
||||
removeSideColumns: number = 56,
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] => {
|
||||
// Exception is removeSideColumns is not even
|
||||
if (removeSideColumns % 2 != 0) {
|
||||
throw new Error("removeSideColumns must be even");
|
||||
}
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
private padImage(
|
||||
image: number[][],
|
||||
removeSideColumns: number = 56,
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] {
|
||||
// Exception is removeSideColumns is not even
|
||||
if (removeSideColumns % 2 != 0) {
|
||||
throw new Error("removeSideColumns must be even");
|
||||
}
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
// Create a new matrix with extra padding
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
// Create a new matrix with extra padding
|
||||
const paddedImage: number[][] = Array.from(
|
||||
{ length: paddedNumRows },
|
||||
() => new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
// Copy original image into the center of the padded image
|
||||
if (direction === "straight") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} // If the face is facing left, we only take the right side of the face image
|
||||
else if (direction === "left") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} // If the face is facing right, we only take the left side of the face image
|
||||
else if (direction === "right") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] =
|
||||
paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
}
|
||||
|
||||
private applyLaplacian(
|
||||
image: number[][],
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] {
|
||||
const paddedImage: number[][] = this.padImage(
|
||||
image,
|
||||
undefined,
|
||||
direction,
|
||||
);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
// Copy original image into the center of the padded image
|
||||
if (direction === "straight") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping)
|
||||
outputImage[i][j] = sum;
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} // If the face is facing left, we only take the right side of the face image
|
||||
else if (direction === "left") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} // If the face is facing right, we only take the left side of the face image
|
||||
else if (direction === "right") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
}
|
||||
}
|
||||
|
||||
export default new LaplacianBlurDetectionService();
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, undefined, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping)
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const getFaceDirection = (face: Face): FaceDirection => {
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
|
|
|
@ -12,8 +12,6 @@ import downloadManager from "services/download";
|
|||
import { putEmbedding } from "services/embeddingService";
|
||||
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
|
||||
import {
|
||||
BlurDetectionMethod,
|
||||
BlurDetectionService,
|
||||
Face,
|
||||
FaceCropService,
|
||||
FaceDetection,
|
||||
|
@ -34,7 +32,6 @@ import { getLocalFiles } from "services/fileService";
|
|||
import { EnteFile } from "types/file";
|
||||
import { isInternalUserForML } from "utils/user";
|
||||
import FaceService from "./faceService";
|
||||
import laplacianBlurDetectionService from "./laplacianBlurDetectionService";
|
||||
import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
|
||||
|
||||
import { fetchImageBitmapForContext } from "../face/image";
|
||||
|
@ -123,16 +120,6 @@ export class MLFactory {
|
|||
throw Error("Unknon face detection method: " + method);
|
||||
}
|
||||
|
||||
public static getBlurDetectionService(
|
||||
method: BlurDetectionMethod,
|
||||
): BlurDetectionService {
|
||||
if (method === "Laplacian") {
|
||||
return laplacianBlurDetectionService;
|
||||
}
|
||||
|
||||
throw Error("Unknon blur detection method: " + method);
|
||||
}
|
||||
|
||||
public static getFaceEmbeddingService(
|
||||
method: FaceEmbeddingMethod,
|
||||
): FaceEmbeddingService {
|
||||
|
@ -150,7 +137,6 @@ export class LocalMLSyncContext implements MLSyncContext {
|
|||
|
||||
public faceDetectionService: FaceDetectionService;
|
||||
public faceCropService: FaceCropService;
|
||||
public blurDetectionService: BlurDetectionService;
|
||||
public faceEmbeddingService: FaceEmbeddingService;
|
||||
|
||||
public localFilesMap: Map<number, EnteFile>;
|
||||
|
@ -179,8 +165,6 @@ export class LocalMLSyncContext implements MLSyncContext {
|
|||
|
||||
this.faceDetectionService =
|
||||
MLFactory.getFaceDetectionService("YoloFace");
|
||||
this.blurDetectionService =
|
||||
MLFactory.getBlurDetectionService("Laplacian");
|
||||
this.faceEmbeddingService =
|
||||
MLFactory.getFaceEmbeddingService("MobileFaceNet");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue