[web] ML cleanup - Part 3/x (#1737)
This commit is contained in:
commit
a0cb8b850e
41 changed files with 1236 additions and 2063 deletions
|
@ -46,7 +46,7 @@ import {
|
|||
clipImageEmbedding,
|
||||
clipTextEmbeddingIfAvailable,
|
||||
} from "./services/ml-clip";
|
||||
import { detectFaces, faceEmbedding } from "./services/ml-face";
|
||||
import { detectFaces, faceEmbeddings } from "./services/ml-face";
|
||||
import { encryptionKey, saveEncryptionKey } from "./services/store";
|
||||
import {
|
||||
clearPendingUploads,
|
||||
|
@ -182,8 +182,8 @@ export const attachIPCHandlers = () => {
|
|||
detectFaces(input),
|
||||
);
|
||||
|
||||
ipcMain.handle("faceEmbedding", (_, input: Float32Array) =>
|
||||
faceEmbedding(input),
|
||||
ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
|
||||
faceEmbeddings(input),
|
||||
);
|
||||
|
||||
ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>
|
||||
|
|
|
@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
|
|||
5286998 /* 5 MB */,
|
||||
);
|
||||
|
||||
export const faceEmbedding = async (input: Float32Array) => {
|
||||
export const faceEmbeddings = async (input: Float32Array) => {
|
||||
// Dimension of each face (alias)
|
||||
const mobileFaceNetFaceSize = 112;
|
||||
// Smaller alias
|
||||
|
|
|
@ -162,8 +162,8 @@ const clipTextEmbeddingIfAvailable = (text: string) =>
|
|||
const detectFaces = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("detectFaces", input);
|
||||
|
||||
const faceEmbedding = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("faceEmbedding", input);
|
||||
const faceEmbeddings = (input: Float32Array) =>
|
||||
ipcRenderer.invoke("faceEmbeddings", input);
|
||||
|
||||
const legacyFaceCrop = (faceID: string) =>
|
||||
ipcRenderer.invoke("legacyFaceCrop", faceID);
|
||||
|
@ -343,7 +343,7 @@ contextBridge.exposeInMainWorld("electron", {
|
|||
clipImageEmbedding,
|
||||
clipTextEmbeddingIfAvailable,
|
||||
detectFaces,
|
||||
faceEmbedding,
|
||||
faceEmbeddings,
|
||||
legacyFaceCrop,
|
||||
|
||||
// - Watch
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
"chrono-node": "^2.2.6",
|
||||
"date-fns": "^2",
|
||||
"debounce": "^2.0.0",
|
||||
"density-clustering": "^1.3.0",
|
||||
"eventemitter3": "^4.0.7",
|
||||
"exifr": "^7.1.3",
|
||||
"fast-srp-hap": "^2.0.4",
|
||||
|
|
|
@ -5,7 +5,7 @@ import { t } from "i18next";
|
|||
import { AppContext } from "pages/_app";
|
||||
import { useContext } from "react";
|
||||
import { components } from "react-select";
|
||||
import { IndexStatus } from "services/ml/db";
|
||||
import { IndexStatus } from "services/face/db";
|
||||
import { Suggestion, SuggestionType } from "types/search";
|
||||
|
||||
const { Menu } = components;
|
||||
|
|
|
@ -9,8 +9,8 @@ import { useCallback, useContext, useEffect, useRef, useState } from "react";
|
|||
import { components } from "react-select";
|
||||
import AsyncSelect from "react-select/async";
|
||||
import { InputActionMeta } from "react-select/src/types";
|
||||
import { Person } from "services/face/types";
|
||||
import { City } from "services/locationSearchService";
|
||||
import { Person } from "services/ml/types";
|
||||
import {
|
||||
getAutoCompleteSuggestions,
|
||||
getDefaultOptions,
|
||||
|
|
|
@ -3,8 +3,8 @@ import { Skeleton, styled } from "@mui/material";
|
|||
import { Legend } from "components/PhotoViewer/styledComponents/Legend";
|
||||
import { t } from "i18next";
|
||||
import React, { useEffect, useState } from "react";
|
||||
import mlIDbStorage from "services/ml/db";
|
||||
import { Face, Person, type MlFileData } from "services/ml/types";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { Face, Person, type MlFileData } from "services/face/types";
|
||||
import { EnteFile } from "types/file";
|
||||
|
||||
const FaceChipContainer = styled("div")`
|
||||
|
@ -167,10 +167,7 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
|
|||
.legacyFaceCrop(faceID)
|
||||
/*
|
||||
cachedOrNew("face-crops", cacheKey, async () => {
|
||||
const user = await ensureLocalUser();
|
||||
return machineLearningService.regenerateFaceCrop(
|
||||
user.token,
|
||||
user.id,
|
||||
faceId,
|
||||
);
|
||||
})*/
|
||||
|
|
|
@ -102,6 +102,10 @@ export const syncCLIPEmbeddings = async () => {
|
|||
if (!response.diff?.length) {
|
||||
return;
|
||||
}
|
||||
// Note: in rare cases we might get a diff entry for an embedding
|
||||
// corresponding to a file which has been deleted (but whose
|
||||
// embedding is enqueued for deletion). Client should expect such a
|
||||
// scenario (all it has to do is just ignore them).
|
||||
const newEmbeddings = await Promise.all(
|
||||
response.diff.map(async (embedding) => {
|
||||
try {
|
||||
|
|
|
@ -1,31 +1,8 @@
|
|||
import { Matrix } from "ml-matrix";
|
||||
import { Point } from "services/ml/geom";
|
||||
import {
|
||||
FaceAlignment,
|
||||
FaceAlignmentMethod,
|
||||
FaceAlignmentService,
|
||||
FaceDetection,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
import { Point } from "services/face/geom";
|
||||
import { FaceAlignment, FaceDetection } from "services/face/types";
|
||||
import { getSimilarityTransformation } from "similarity-transformation";
|
||||
|
||||
class ArcfaceAlignmentService implements FaceAlignmentService {
|
||||
public method: Versioned<FaceAlignmentMethod>;
|
||||
|
||||
constructor() {
|
||||
this.method = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public getFaceAlignment(faceDetection: FaceDetection): FaceAlignment {
|
||||
return getArcfaceAlignment(faceDetection);
|
||||
}
|
||||
}
|
||||
|
||||
export default new ArcfaceAlignmentService();
|
||||
|
||||
const ARCFACE_LANDMARKS = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
|
@ -43,9 +20,12 @@ const ARC_FACE_5_LANDMARKS = [
|
|||
[70.7299, 92.2041],
|
||||
] as Array<[number, number]>;
|
||||
|
||||
export function getArcfaceAlignment(
|
||||
faceDetection: FaceDetection,
|
||||
): FaceAlignment {
|
||||
/**
|
||||
* Compute and return an {@link FaceAlignment} for the given face detection.
|
||||
*
|
||||
* @param faceDetection A geometry indicating a face detected in an image.
|
||||
*/
|
||||
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
|
||||
const landmarkCount = faceDetection.landmarks.length;
|
||||
return getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection,
|
||||
|
@ -54,12 +34,11 @@ export function getArcfaceAlignment(
|
|||
ARCFACE_LANDMARKS_FACE_SIZE,
|
||||
),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
function getFaceAlignmentUsingSimilarityTransform(
|
||||
faceDetection: FaceDetection,
|
||||
alignedLandmarks: Array<[number, number]>,
|
||||
// alignmentMethod: Versioned<FaceAlignmentMethod>
|
||||
): FaceAlignment {
|
||||
const landmarksMat = new Matrix(
|
||||
faceDetection.landmarks
|
||||
|
@ -90,7 +69,6 @@ function getFaceAlignmentUsingSimilarityTransform(
|
|||
simTransform.rotation.get(0, 1),
|
||||
simTransform.rotation.get(0, 0),
|
||||
);
|
||||
// log.info({ affineMatrix, meanTranslation, centerMat, center, toMean: simTransform.toMean, fromMean: simTransform.fromMean, size });
|
||||
|
||||
return {
|
||||
affineMatrix,
|
187
web/apps/photos/src/services/face/blur.ts
Normal file
187
web/apps/photos/src/services/face/blur.ts
Normal file
|
@ -0,0 +1,187 @@
|
|||
import { Face } from "services/face/types";
|
||||
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
|
||||
import { mobileFaceNetFaceSize } from "./embed";
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
export const detectBlur = (
|
||||
alignedFaces: Float32Array,
|
||||
faces: Face[],
|
||||
): number[] => {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = faceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
}
|
||||
return blurValues;
|
||||
};
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const faceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a new image by applying a Laplacian blur kernel to each pixel.
|
||||
*/
|
||||
const applyLaplacian = (
|
||||
image: number[][],
|
||||
direction: FaceDirection,
|
||||
): number[][] => {
|
||||
const paddedImage: number[][] = padImage(image, direction);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0.
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel.
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping).
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
};
|
||||
|
||||
const padImage = (image: number[][], direction: FaceDirection): number[][] => {
|
||||
const removeSideColumns = 56; /* must be even */
|
||||
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding.
|
||||
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
|
||||
new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
if (direction === "straight") {
|
||||
// Copy original image into the center of the padded image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} else if (direction === "left") {
|
||||
// If the face is facing left, we only take the right side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} else if (direction === "right") {
|
||||
// If the face is facing right, we only take the left side of the face image.
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
};
|
||||
|
||||
const matrixVariance = (matrix: number[][]): number => {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean.
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance.
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
};
|
34
web/apps/photos/src/services/face/cluster.ts
Normal file
34
web/apps/photos/src/services/face/cluster.ts
Normal file
|
@ -0,0 +1,34 @@
|
|||
import { Hdbscan, type DebugInfo } from "hdbscan";
|
||||
import { type Cluster } from "services/face/types";
|
||||
|
||||
export interface ClusterFacesResult {
|
||||
clusters: Array<Cluster>;
|
||||
noise: Cluster;
|
||||
debugInfo?: DebugInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cluster the given {@link faceEmbeddings}.
|
||||
*
|
||||
* @param faceEmbeddings An array of embeddings produced by our face indexing
|
||||
* pipeline. Each embedding is for a face detected in an image (a single image
|
||||
* may have multiple faces detected within it).
|
||||
*/
|
||||
export const clusterFaces = async (
|
||||
faceEmbeddings: Array<Array<number>>,
|
||||
): Promise<ClusterFacesResult> => {
|
||||
const hdbscan = new Hdbscan({
|
||||
input: faceEmbeddings,
|
||||
minClusterSize: 3,
|
||||
minSamples: 5,
|
||||
clusterSelectionEpsilon: 0.6,
|
||||
clusterSelectionMethod: "leaf",
|
||||
debug: true,
|
||||
});
|
||||
|
||||
return {
|
||||
clusters: hdbscan.getClusters(),
|
||||
noise: hdbscan.getNoise(),
|
||||
debugInfo: hdbscan.getDebugInfo(),
|
||||
};
|
||||
};
|
32
web/apps/photos/src/services/face/crop.ts
Normal file
32
web/apps/photos/src/services/face/crop.ts
Normal file
|
@ -0,0 +1,32 @@
|
|||
import { Box, enlargeBox } from "services/face/geom";
|
||||
import { FaceCrop, FaceDetection } from "services/face/types";
|
||||
import { cropWithRotation } from "utils/image";
|
||||
import { faceAlignment } from "./align";
|
||||
|
||||
export const getFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
faceDetection: FaceDetection,
|
||||
): FaceCrop => {
|
||||
const alignment = faceAlignment(faceDetection);
|
||||
|
||||
const padding = 0.25;
|
||||
const maxSize = 256;
|
||||
|
||||
const alignmentBox = new Box({
|
||||
x: alignment.center.x - alignment.size / 2,
|
||||
y: alignment.center.y - alignment.size / 2,
|
||||
width: alignment.size,
|
||||
height: alignment.size,
|
||||
}).round();
|
||||
const scaleForPadding = 1 + padding * 2;
|
||||
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
|
||||
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
|
||||
width: maxSize,
|
||||
height: maxSize,
|
||||
});
|
||||
|
||||
return {
|
||||
image: faceImageBitmap,
|
||||
imageBox: paddedBox,
|
||||
};
|
||||
};
|
|
@ -9,12 +9,11 @@ import {
|
|||
openDB,
|
||||
} from "idb";
|
||||
import isElectron from "is-electron";
|
||||
import { Face, MLLibraryData, MlFileData, Person } from "services/face/types";
|
||||
import {
|
||||
DEFAULT_ML_SEARCH_CONFIG,
|
||||
DEFAULT_ML_SYNC_CONFIG,
|
||||
MAX_ML_SYNC_ERROR_COUNT,
|
||||
} from "services/machineLearning/machineLearningService";
|
||||
import { Face, MLLibraryData, MlFileData, Person } from "services/ml/types";
|
||||
|
||||
export interface IndexStatus {
|
||||
outOfSyncFilesExists: boolean;
|
||||
|
@ -26,7 +25,6 @@ export interface IndexStatus {
|
|||
|
||||
interface Config {}
|
||||
|
||||
export const ML_SYNC_CONFIG_NAME = "ml-sync";
|
||||
export const ML_SEARCH_CONFIG_NAME = "ml-search";
|
||||
|
||||
const MLDATA_DB_NAME = "mldata";
|
||||
|
@ -141,10 +139,11 @@ class MLIDbStorage {
|
|||
DEFAULT_ML_SYNC_JOB_CONFIG,
|
||||
"ml-sync-job",
|
||||
);
|
||||
*/
|
||||
|
||||
await tx
|
||||
.objectStore("configs")
|
||||
.add(DEFAULT_ML_SYNC_CONFIG, ML_SYNC_CONFIG_NAME);
|
||||
*/
|
||||
}
|
||||
if (oldVersion < 3) {
|
||||
await tx
|
||||
|
@ -163,6 +162,10 @@ class MLIDbStorage {
|
|||
.objectStore("configs")
|
||||
.delete(ML_SEARCH_CONFIG_NAME);
|
||||
|
||||
await tx
|
||||
.objectStore("configs")
|
||||
.delete(""ml-sync"");
|
||||
|
||||
await tx
|
||||
.objectStore("configs")
|
||||
.delete("ml-sync-job");
|
316
web/apps/photos/src/services/face/detect.ts
Normal file
316
web/apps/photos/src/services/face/detect.ts
Normal file
|
@ -0,0 +1,316 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { euclidean } from "hdbscan";
|
||||
import {
|
||||
Box,
|
||||
Dimensions,
|
||||
Point,
|
||||
boxFromBoundingBox,
|
||||
newBox,
|
||||
} from "services/face/geom";
|
||||
import { FaceDetection } from "services/face/types";
|
||||
import {
|
||||
Matrix,
|
||||
applyToPoint,
|
||||
compose,
|
||||
scale,
|
||||
translate,
|
||||
} from "transformation-matrix";
|
||||
import {
|
||||
clamp,
|
||||
getPixelBilinear,
|
||||
normalizePixelBetween0And1,
|
||||
} from "utils/image";
|
||||
|
||||
/**
|
||||
* Detect faces in the given {@link imageBitmap}.
|
||||
*
|
||||
* The model used is YOLO, running in an ONNX runtime.
|
||||
*/
|
||||
export const detectFaces = async (
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> => {
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const outputData = await workerBridge.detectFaces(data);
|
||||
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
};
|
||||
|
||||
const preprocessImageBitmapToFloat32ChannelsFirst = (
|
||||
imageBitmap: ImageBitmap,
|
||||
requiredWidth: number,
|
||||
requiredHeight: number,
|
||||
maintainAspectRatio: boolean = true,
|
||||
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
|
||||
) => {
|
||||
// Create an OffscreenCanvas and set its size.
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
let scaleW = requiredWidth / imageBitmap.width;
|
||||
let scaleH = requiredHeight / imageBitmap.height;
|
||||
if (maintainAspectRatio) {
|
||||
const scale = Math.min(
|
||||
requiredWidth / imageBitmap.width,
|
||||
requiredHeight / imageBitmap.height,
|
||||
);
|
||||
scaleW = scale;
|
||||
scaleH = scale;
|
||||
}
|
||||
const scaledWidth = clamp(
|
||||
Math.round(imageBitmap.width * scaleW),
|
||||
0,
|
||||
requiredWidth,
|
||||
);
|
||||
const scaledHeight = clamp(
|
||||
Math.round(imageBitmap.height * scaleH),
|
||||
0,
|
||||
requiredHeight,
|
||||
);
|
||||
|
||||
const processedImage = new Float32Array(
|
||||
1 * 3 * requiredWidth * requiredHeight,
|
||||
);
|
||||
|
||||
// Populate the Float32Array with normalized pixel values
|
||||
let pixelIndex = 0;
|
||||
const channelOffsetGreen = requiredHeight * requiredWidth;
|
||||
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
|
||||
for (let h = 0; h < requiredHeight; h++) {
|
||||
for (let w = 0; w < requiredWidth; w++) {
|
||||
let pixel: {
|
||||
r: number;
|
||||
g: number;
|
||||
b: number;
|
||||
};
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = { r: 114, g: 114, b: 114 };
|
||||
} else {
|
||||
pixel = getPixelBilinear(
|
||||
w / scaleW,
|
||||
h / scaleH,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
}
|
||||
processedImage[pixelIndex] = normFunction(pixel.r);
|
||||
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
|
||||
pixel.g,
|
||||
);
|
||||
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
|
||||
pixel.b,
|
||||
);
|
||||
pixelIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
data: processedImage,
|
||||
originalSize: {
|
||||
width: imageBitmap.width,
|
||||
height: imageBitmap.height,
|
||||
},
|
||||
newSize: { width: scaledWidth, height: scaledHeight },
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rowOutput A Float32Array of shape [25200, 16], where each row
|
||||
* represents a bounding box.
|
||||
*/
|
||||
const getFacesFromYOLOOutput = (
|
||||
rowOutput: Float32Array,
|
||||
minScore: number,
|
||||
): Array<FaceDetection> => {
|
||||
const faces: Array<FaceDetection> = [];
|
||||
// Iterate over each row.
|
||||
for (let i = 0; i < rowOutput.length; i += 16) {
|
||||
const score = rowOutput[i + 4];
|
||||
if (score < minScore) {
|
||||
continue;
|
||||
}
|
||||
// The first 4 values represent the bounding box's coordinates:
|
||||
//
|
||||
// (x1, y1, x2, y2)
|
||||
//
|
||||
const xCenter = rowOutput[i];
|
||||
const yCenter = rowOutput[i + 1];
|
||||
const width = rowOutput[i + 2];
|
||||
const height = rowOutput[i + 3];
|
||||
const xMin = xCenter - width / 2.0; // topLeft
|
||||
const yMin = yCenter - height / 2.0; // topLeft
|
||||
|
||||
const leftEyeX = rowOutput[i + 5];
|
||||
const leftEyeY = rowOutput[i + 6];
|
||||
const rightEyeX = rowOutput[i + 7];
|
||||
const rightEyeY = rowOutput[i + 8];
|
||||
const noseX = rowOutput[i + 9];
|
||||
const noseY = rowOutput[i + 10];
|
||||
const leftMouthX = rowOutput[i + 11];
|
||||
const leftMouthY = rowOutput[i + 12];
|
||||
const rightMouthX = rowOutput[i + 13];
|
||||
const rightMouthY = rowOutput[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
x: xMin,
|
||||
y: yMin,
|
||||
width: width,
|
||||
height: height,
|
||||
});
|
||||
const probability = score as number;
|
||||
const landmarks = [
|
||||
new Point(leftEyeX, leftEyeY),
|
||||
new Point(rightEyeX, rightEyeY),
|
||||
new Point(noseX, noseY),
|
||||
new Point(leftMouthX, leftMouthY),
|
||||
new Point(rightMouthX, rightMouthY),
|
||||
];
|
||||
faces.push({ box, landmarks, probability });
|
||||
}
|
||||
return faces;
|
||||
};
|
||||
|
||||
export const getRelativeDetection = (
|
||||
faceDetection: FaceDetection,
|
||||
dimensions: Dimensions,
|
||||
): FaceDetection => {
|
||||
const oldBox: Box = faceDetection.box;
|
||||
const box = new Box({
|
||||
x: oldBox.x / dimensions.width,
|
||||
y: oldBox.y / dimensions.height,
|
||||
width: oldBox.width / dimensions.width,
|
||||
height: oldBox.height / dimensions.height,
|
||||
});
|
||||
const oldLandmarks: Point[] = faceDetection.landmarks;
|
||||
const landmarks = oldLandmarks.map((l) => {
|
||||
return new Point(l.x / dimensions.width, l.y / dimensions.height);
|
||||
});
|
||||
const probability = faceDetection.probability;
|
||||
return { box, landmarks, probability };
|
||||
};
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
* This function sorts the detections by their probability in descending order,
|
||||
* then iterates over them.
|
||||
*
|
||||
* For each detection, it calculates the Euclidean distance to all other
|
||||
* detections.
|
||||
*
|
||||
* If the distance is less than or equal to the specified threshold
|
||||
* (`withinDistance`), the other detection is considered a duplicate and is
|
||||
* removed.
|
||||
*
|
||||
* @param detections - An array of face detections to remove duplicates from.
|
||||
*
|
||||
* @param withinDistance - The maximum Euclidean distance between two detections
|
||||
* for them to be considered duplicates.
|
||||
*
|
||||
* @returns An array of face detections with duplicates removed.
|
||||
*/
|
||||
const removeDuplicateDetections = (
|
||||
detections: Array<FaceDetection>,
|
||||
withinDistance: number,
|
||||
) => {
|
||||
detections.sort((a, b) => b.probability - a.probability);
|
||||
const isSelected = new Map<number, boolean>();
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
if (isSelected.get(i) === false) {
|
||||
continue;
|
||||
}
|
||||
isSelected.set(i, true);
|
||||
for (let j = i + 1; j < detections.length; j++) {
|
||||
if (isSelected.get(j) === false) {
|
||||
continue;
|
||||
}
|
||||
const centeri = getDetectionCenter(detections[i]);
|
||||
const centerj = getDetectionCenter(detections[j]);
|
||||
const dist = euclidean(
|
||||
[centeri.x, centeri.y],
|
||||
[centerj.x, centerj.y],
|
||||
);
|
||||
if (dist <= withinDistance) {
|
||||
isSelected.set(j, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uniques: Array<FaceDetection> = [];
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
isSelected.get(i) && uniques.push(detections[i]);
|
||||
}
|
||||
return uniques;
|
||||
};
|
||||
|
||||
function getDetectionCenter(detection: FaceDetection) {
|
||||
const center = new Point(0, 0);
|
||||
// TODO: first 4 landmarks is applicable to blazeface only
|
||||
// this needs to consider eyes, nose and mouth landmarks to take center
|
||||
detection.landmarks?.slice(0, 4).forEach((p) => {
|
||||
center.x += p.x;
|
||||
center.y += p.y;
|
||||
});
|
||||
|
||||
return new Point(center.x / 4, center.y / 4);
|
||||
}
|
||||
|
||||
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
|
||||
return compose(
|
||||
translate(toBox.x, toBox.y),
|
||||
scale(toBox.width / inBox.width, toBox.height / inBox.height),
|
||||
);
|
||||
}
|
||||
|
||||
function transformPoint(point: Point, transform: Matrix) {
|
||||
const txdPoint = applyToPoint(transform, point);
|
||||
return new Point(txdPoint.x, txdPoint.y);
|
||||
}
|
||||
|
||||
function transformPoints(points: Point[], transform: Matrix) {
|
||||
return points?.map((p) => transformPoint(p, transform));
|
||||
}
|
||||
|
||||
function transformBox(box: Box, transform: Matrix) {
|
||||
const topLeft = transformPoint(box.topLeft, transform);
|
||||
const bottomRight = transformPoint(box.bottomRight, transform);
|
||||
|
||||
return boxFromBoundingBox({
|
||||
left: topLeft.x,
|
||||
top: topLeft.y,
|
||||
right: bottomRight.x,
|
||||
bottom: bottomRight.y,
|
||||
});
|
||||
}
|
26
web/apps/photos/src/services/face/embed.ts
Normal file
26
web/apps/photos/src/services/face/embed.ts
Normal file
|
@ -0,0 +1,26 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { FaceEmbedding } from "services/face/types";
|
||||
|
||||
export const mobileFaceNetFaceSize = 112;
|
||||
|
||||
/**
|
||||
* Compute embeddings for the given {@link faceData}.
|
||||
*
|
||||
* The model used is MobileFaceNet, running in an ONNX runtime.
|
||||
*/
|
||||
export const faceEmbeddings = async (
|
||||
faceData: Float32Array,
|
||||
): Promise<Array<FaceEmbedding>> => {
|
||||
const outputData = await workerBridge.faceEmbeddings(faceData);
|
||||
|
||||
const embeddingSize = 192;
|
||||
const embeddings = new Array<FaceEmbedding>(
|
||||
outputData.length / embeddingSize,
|
||||
);
|
||||
for (let i = 0; i < embeddings.length; i++) {
|
||||
embeddings[i] = new Float32Array(
|
||||
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
|
||||
);
|
||||
}
|
||||
return embeddings;
|
||||
};
|
194
web/apps/photos/src/services/face/f-index.ts
Normal file
194
web/apps/photos/src/services/face/f-index.ts
Normal file
|
@ -0,0 +1,194 @@
|
|||
import { openCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import { faceAlignment } from "services/face/align";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { detectFaces, getRelativeDetection } from "services/face/detect";
|
||||
import { faceEmbeddings, mobileFaceNetFaceSize } from "services/face/embed";
|
||||
import {
|
||||
DetectedFace,
|
||||
Face,
|
||||
MLSyncFileContext,
|
||||
type FaceAlignment,
|
||||
} from "services/face/types";
|
||||
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
|
||||
import { detectBlur } from "./blur";
|
||||
import { getFaceCrop } from "./crop";
|
||||
import {
|
||||
fetchImageBitmap,
|
||||
fetchImageBitmapForContext,
|
||||
getFaceId,
|
||||
getLocalFile,
|
||||
} from "./image";
|
||||
|
||||
export const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
const startTime = Date.now();
|
||||
|
||||
await syncFileFaceDetections(fileContext);
|
||||
|
||||
if (newMlFile.faces && newMlFile.faces.length > 0) {
|
||||
await syncFileFaceCrops(fileContext);
|
||||
|
||||
const alignedFacesData = await syncFileFaceAlignments(fileContext);
|
||||
|
||||
await syncFileFaceEmbeddings(fileContext, alignedFacesData);
|
||||
|
||||
await syncFileFaceMakeRelativeDetections(fileContext);
|
||||
}
|
||||
log.debug(
|
||||
() =>
|
||||
`Face detection for file ${fileContext.enteFile.id} took ${Math.round(Date.now() - startTime)} ms`,
|
||||
);
|
||||
};
|
||||
|
||||
const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceDetectionMethod = {
|
||||
value: "YoloFace",
|
||||
version: 1,
|
||||
};
|
||||
fileContext.newDetection = true;
|
||||
const imageBitmap = await fetchImageBitmapForContext(fileContext);
|
||||
const faceDetections = await detectFaces(imageBitmap);
|
||||
// TODO: reenable faces filtering based on width
|
||||
const detectedFaces = faceDetections?.map((detection) => {
|
||||
return {
|
||||
fileId: fileContext.enteFile.id,
|
||||
detection,
|
||||
} as DetectedFace;
|
||||
});
|
||||
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
|
||||
...detectedFace,
|
||||
id: getFaceId(detectedFace, newMlFile.imageDimensions),
|
||||
}));
|
||||
// ?.filter((f) =>
|
||||
// f.box.width > syncContext.config.faceDetection.minFaceSize
|
||||
// );
|
||||
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
|
||||
};
|
||||
|
||||
const syncFileFaceCrops = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
const imageBitmap = await fetchImageBitmapForContext(fileContext);
|
||||
newMlFile.faceCropMethod = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
|
||||
for (const face of newMlFile.faces) {
|
||||
await saveFaceCrop(imageBitmap, face);
|
||||
}
|
||||
};
|
||||
|
||||
const syncFileFaceAlignments = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
): Promise<Float32Array> => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceAlignmentMethod = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
fileContext.newAlignment = true;
|
||||
const imageBitmap =
|
||||
fileContext.imageBitmap ||
|
||||
(await fetchImageBitmapForContext(fileContext));
|
||||
|
||||
// Execute the face alignment calculations
|
||||
for (const face of newMlFile.faces) {
|
||||
face.alignment = faceAlignment(face.detection);
|
||||
}
|
||||
// Extract face images and convert to Float32Array
|
||||
const faceAlignments = newMlFile.faces.map((f) => f.alignment);
|
||||
const faceImages = await extractFaceImagesToFloat32(
|
||||
faceAlignments,
|
||||
mobileFaceNetFaceSize,
|
||||
imageBitmap,
|
||||
);
|
||||
const blurValues = detectBlur(faceImages, newMlFile.faces);
|
||||
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
|
||||
|
||||
imageBitmap.close();
|
||||
log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
|
||||
|
||||
return faceImages;
|
||||
};
|
||||
|
||||
const syncFileFaceEmbeddings = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
alignedFacesInput: Float32Array,
|
||||
) => {
|
||||
const { newMlFile } = fileContext;
|
||||
newMlFile.faceEmbeddingMethod = {
|
||||
value: "MobileFaceNet",
|
||||
version: 2,
|
||||
};
|
||||
// TODO: when not storing face crops, image will be needed to extract faces
|
||||
// fileContext.imageBitmap ||
|
||||
// (await this.getImageBitmap(fileContext));
|
||||
|
||||
const embeddings = await faceEmbeddings(alignedFacesInput);
|
||||
newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
|
||||
|
||||
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
|
||||
};
|
||||
|
||||
const syncFileFaceMakeRelativeDetections = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
) => {
|
||||
const { newMlFile } = fileContext;
|
||||
for (let i = 0; i < newMlFile.faces.length; i++) {
|
||||
const face = newMlFile.faces[i];
|
||||
if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
|
||||
face.detection = getRelativeDetection(
|
||||
face.detection,
|
||||
newMlFile.imageDimensions,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
|
||||
const faceCrop = getFaceCrop(imageBitmap, face.detection);
|
||||
|
||||
const blob = await imageBitmapToBlob(faceCrop.image);
|
||||
|
||||
const cache = await openCache("face-crops");
|
||||
await cache.put(face.id, blob);
|
||||
|
||||
faceCrop.image.close();
|
||||
|
||||
return blob;
|
||||
};
|
||||
|
||||
export const regenerateFaceCrop = async (faceID: string) => {
|
||||
const fileID = Number(faceID.split("-")[0]);
|
||||
const personFace = await mlIDbStorage.getFace(fileID, faceID);
|
||||
if (!personFace) {
|
||||
throw Error("Face not found");
|
||||
}
|
||||
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
return await saveFaceCrop(imageBitmap, personFace);
|
||||
};
|
||||
|
||||
async function extractFaceImagesToFloat32(
|
||||
faceAlignments: Array<FaceAlignment>,
|
||||
faceSize: number,
|
||||
image: ImageBitmap,
|
||||
): Promise<Float32Array> {
|
||||
const faceData = new Float32Array(
|
||||
faceAlignments.length * faceSize * faceSize * 3,
|
||||
);
|
||||
for (let i = 0; i < faceAlignments.length; i++) {
|
||||
const alignedFace = faceAlignments[i];
|
||||
const faceDataOffset = i * faceSize * faceSize * 3;
|
||||
warpAffineFloat32List(
|
||||
image,
|
||||
alignedFace,
|
||||
faceSize,
|
||||
faceData,
|
||||
faceDataOffset,
|
||||
);
|
||||
}
|
||||
return faceData;
|
||||
}
|
|
@ -1,14 +1,10 @@
|
|||
import log from "@/next/log";
|
||||
import { APPS } from "@ente/shared/apps/constants";
|
||||
import { expose } from "comlink";
|
||||
import downloadManager from "services/download";
|
||||
import mlService from "services/machineLearning/machineLearningService";
|
||||
import { MachineLearningWorker } from "services/ml/types";
|
||||
import { EnteFile } from "types/file";
|
||||
|
||||
export class DedicatedMLWorker implements MachineLearningWorker {
|
||||
constructor() {
|
||||
log.info("DedicatedMLWorker constructor called");
|
||||
}
|
||||
|
||||
export class DedicatedMLWorker {
|
||||
public async closeLocalSyncContext() {
|
||||
return mlService.closeLocalSyncContext();
|
||||
}
|
||||
|
@ -19,23 +15,17 @@ export class DedicatedMLWorker implements MachineLearningWorker {
|
|||
enteFile: EnteFile,
|
||||
localFile: globalThis.File,
|
||||
) {
|
||||
return mlService.syncLocalFile(token, userID, enteFile, localFile);
|
||||
mlService.syncLocalFile(token, userID, enteFile, localFile);
|
||||
}
|
||||
|
||||
public async sync(token: string, userID: number) {
|
||||
await downloadManager.init(APPS.PHOTOS, { token });
|
||||
return mlService.sync(token, userID);
|
||||
}
|
||||
|
||||
public async regenerateFaceCrop(
|
||||
token: string,
|
||||
userID: number,
|
||||
faceID: string,
|
||||
) {
|
||||
return mlService.regenerateFaceCrop(token, userID, faceID);
|
||||
}
|
||||
|
||||
public close() {
|
||||
self.close();
|
||||
public async regenerateFaceCrop(token: string, faceID: string) {
|
||||
await downloadManager.init(APPS.PHOTOS, { token });
|
||||
return mlService.regenerateFaceCrop(faceID);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,65 +2,47 @@ import { FILE_TYPE } from "@/media/file-type";
|
|||
import { decodeLivePhoto } from "@/media/live-photo";
|
||||
import log from "@/next/log";
|
||||
import DownloadManager from "services/download";
|
||||
import { Dimensions } from "services/face/geom";
|
||||
import { DetectedFace, MLSyncFileContext } from "services/face/types";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import { Dimensions } from "services/ml/geom";
|
||||
import {
|
||||
DetectedFace,
|
||||
MLSyncContext,
|
||||
MLSyncFileContext,
|
||||
} from "services/ml/types";
|
||||
import { EnteFile } from "types/file";
|
||||
import { getRenderableImage } from "utils/file";
|
||||
import { clamp } from "utils/image";
|
||||
|
||||
class ReaderService {
|
||||
async getImageBitmap(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
) {
|
||||
try {
|
||||
if (fileContext.imageBitmap) {
|
||||
return fileContext.imageBitmap;
|
||||
}
|
||||
if (fileContext.localFile) {
|
||||
if (
|
||||
fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE
|
||||
) {
|
||||
throw new Error(
|
||||
"Local file of only image type is supported",
|
||||
);
|
||||
}
|
||||
fileContext.imageBitmap = await getLocalFileImageBitmap(
|
||||
fileContext.enteFile,
|
||||
fileContext.localFile,
|
||||
);
|
||||
} else if (
|
||||
syncContext.config.imageSource === "Original" &&
|
||||
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
|
||||
fileContext.enteFile.metadata.fileType,
|
||||
)
|
||||
) {
|
||||
fileContext.imageBitmap = await fetchImageBitmap(
|
||||
fileContext.enteFile,
|
||||
);
|
||||
} else {
|
||||
fileContext.imageBitmap = await getThumbnailImageBitmap(
|
||||
fileContext.enteFile,
|
||||
);
|
||||
}
|
||||
|
||||
fileContext.newMlFile.imageSource = syncContext.config.imageSource;
|
||||
const { width, height } = fileContext.imageBitmap;
|
||||
fileContext.newMlFile.imageDimensions = { width, height };
|
||||
|
||||
return fileContext.imageBitmap;
|
||||
} catch (e) {
|
||||
log.error("failed to create image bitmap", e);
|
||||
throw e;
|
||||
}
|
||||
export const fetchImageBitmapForContext = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
) => {
|
||||
if (fileContext.imageBitmap) {
|
||||
return fileContext.imageBitmap;
|
||||
}
|
||||
}
|
||||
export default new ReaderService();
|
||||
if (fileContext.localFile) {
|
||||
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
|
||||
throw new Error("Local file of only image type is supported");
|
||||
}
|
||||
fileContext.imageBitmap = await getLocalFileImageBitmap(
|
||||
fileContext.enteFile,
|
||||
fileContext.localFile,
|
||||
);
|
||||
} else if (
|
||||
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
|
||||
fileContext.enteFile.metadata.fileType,
|
||||
)
|
||||
) {
|
||||
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
|
||||
} else {
|
||||
// TODO-ML(MR): We don't do it on videos, when will we ever come
|
||||
// here?
|
||||
fileContext.imageBitmap = await getThumbnailImageBitmap(
|
||||
fileContext.enteFile,
|
||||
);
|
||||
}
|
||||
|
||||
fileContext.newMlFile.imageSource = "Original";
|
||||
const { width, height } = fileContext.imageBitmap;
|
||||
fileContext.newMlFile.imageDimensions = { width, height };
|
||||
|
||||
return fileContext.imageBitmap;
|
||||
};
|
||||
|
||||
export async function getLocalFile(fileId: number) {
|
||||
const localFiles = await getLocalFiles();
|
|
@ -1,5 +1,5 @@
|
|||
import { ComlinkWorker } from "@/next/worker/comlink-worker";
|
||||
import type { DedicatedMLWorker } from "services/ml/face.worker";
|
||||
import type { DedicatedMLWorker } from "services/face/face.worker";
|
||||
|
||||
const createFaceWebWorker = () =>
|
||||
new Worker(new URL("face.worker.ts", import.meta.url));
|
111
web/apps/photos/src/services/face/people.ts
Normal file
111
web/apps/photos/src/services/face/people.ts
Normal file
|
@ -0,0 +1,111 @@
|
|||
import log from "@/next/log";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { Face, Person } from "services/face/types";
|
||||
import { type MLSyncContext } from "services/machineLearning/machineLearningService";
|
||||
import { clusterFaces } from "./cluster";
|
||||
import { saveFaceCrop } from "./f-index";
|
||||
import { fetchImageBitmap, getLocalFile } from "./image";
|
||||
|
||||
export const syncPeopleIndex = async (syncContext: MLSyncContext) => {
|
||||
const filesVersion = await mlIDbStorage.getIndexVersion("files");
|
||||
if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: have faces addresable through fileId + faceId
|
||||
// to avoid index based addressing, which is prone to wrong results
|
||||
// one way could be to match nearest face within threshold in the file
|
||||
const allFacesMap =
|
||||
syncContext.allSyncedFacesMap ??
|
||||
(syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap());
|
||||
const allFaces = [...allFacesMap.values()].flat();
|
||||
|
||||
await runFaceClustering(syncContext, allFaces);
|
||||
await syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
|
||||
|
||||
await mlIDbStorage.setIndexVersion("people", filesVersion);
|
||||
};
|
||||
|
||||
const runFaceClustering = async (
|
||||
syncContext: MLSyncContext,
|
||||
allFaces: Array<Face>,
|
||||
) => {
|
||||
// await this.init();
|
||||
|
||||
if (!allFaces || allFaces.length < 50) {
|
||||
log.info(
|
||||
`Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("Running clustering allFaces: ", allFaces.length);
|
||||
syncContext.mlLibraryData.faceClusteringResults = await clusterFaces(
|
||||
allFaces.map((f) => Array.from(f.embedding)),
|
||||
);
|
||||
syncContext.mlLibraryData.faceClusteringMethod = {
|
||||
value: "Hdbscan",
|
||||
version: 1,
|
||||
};
|
||||
log.info(
|
||||
"[MLService] Got face clustering results: ",
|
||||
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
|
||||
);
|
||||
|
||||
// syncContext.faceClustersWithNoise = {
|
||||
// clusters: syncContext.faceClusteringResults.clusters.map(
|
||||
// (faces) => ({
|
||||
// faces,
|
||||
// })
|
||||
// ),
|
||||
// noise: syncContext.faceClusteringResults.noise,
|
||||
// };
|
||||
};
|
||||
|
||||
const syncPeopleFromClusters = async (
|
||||
syncContext: MLSyncContext,
|
||||
allFacesMap: Map<number, Array<Face>>,
|
||||
allFaces: Array<Face>,
|
||||
) => {
|
||||
const clusters = syncContext.mlLibraryData.faceClusteringResults?.clusters;
|
||||
if (!clusters || clusters.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const face of allFaces) {
|
||||
face.personId = undefined;
|
||||
}
|
||||
await mlIDbStorage.clearAllPeople();
|
||||
for (const [index, cluster] of clusters.entries()) {
|
||||
const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
|
||||
|
||||
// TODO: take default display face from last leaves of hdbscan clusters
|
||||
const personFace = faces.reduce((best, face) =>
|
||||
face.detection.probability > best.detection.probability
|
||||
? face
|
||||
: best,
|
||||
);
|
||||
|
||||
if (personFace && !personFace.crop?.cacheKey) {
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
await saveFaceCrop(imageBitmap, personFace);
|
||||
}
|
||||
|
||||
const person: Person = {
|
||||
id: index,
|
||||
files: faces.map((f) => f.fileId),
|
||||
displayFaceId: personFace?.id,
|
||||
faceCropCacheKey: personFace?.crop?.cacheKey,
|
||||
};
|
||||
|
||||
await mlIDbStorage.putPerson(person);
|
||||
|
||||
faces.forEach((face) => {
|
||||
face.personId = person.id;
|
||||
});
|
||||
// log.info("Creating person: ", person, faces);
|
||||
}
|
||||
|
||||
await mlIDbStorage.updateFaces(allFacesMap);
|
||||
};
|
161
web/apps/photos/src/services/face/types.ts
Normal file
161
web/apps/photos/src/services/face/types.ts
Normal file
|
@ -0,0 +1,161 @@
|
|||
import type { ClusterFacesResult } from "services/face/cluster";
|
||||
import { Dimensions } from "services/face/geom";
|
||||
import { EnteFile } from "types/file";
|
||||
import { Box, Point } from "./geom";
|
||||
|
||||
export interface MLSyncResult {
|
||||
nOutOfSyncFiles: number;
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
nFaceClusters: number;
|
||||
nFaceNoise: number;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export declare type FaceDescriptor = Float32Array;
|
||||
|
||||
export declare type Cluster = Array<number>;
|
||||
|
||||
export interface FacesCluster {
|
||||
faces: Cluster;
|
||||
summary?: FaceDescriptor;
|
||||
}
|
||||
|
||||
export interface FacesClustersWithNoise {
|
||||
clusters: Array<FacesCluster>;
|
||||
noise: Cluster;
|
||||
}
|
||||
|
||||
export interface NearestCluster {
|
||||
cluster: FacesCluster;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
export declare type Landmark = Point;
|
||||
|
||||
export declare type ImageType = "Original" | "Preview";
|
||||
|
||||
export declare type FaceDetectionMethod = "YoloFace";
|
||||
|
||||
export declare type FaceCropMethod = "ArcFace";
|
||||
|
||||
export declare type FaceAlignmentMethod = "ArcFace";
|
||||
|
||||
export declare type FaceEmbeddingMethod = "MobileFaceNet";
|
||||
|
||||
export declare type BlurDetectionMethod = "Laplacian";
|
||||
|
||||
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
|
||||
|
||||
export class AlignedBox {
|
||||
box: Box;
|
||||
rotation: number;
|
||||
}
|
||||
|
||||
export interface Versioned<T> {
|
||||
value: T;
|
||||
version: number;
|
||||
}
|
||||
|
||||
export interface FaceDetection {
|
||||
// box and landmarks is relative to image dimentions stored at mlFileData
|
||||
box: Box;
|
||||
landmarks?: Array<Landmark>;
|
||||
probability?: number;
|
||||
}
|
||||
|
||||
export interface DetectedFace {
|
||||
fileId: number;
|
||||
detection: FaceDetection;
|
||||
}
|
||||
|
||||
export interface DetectedFaceWithId extends DetectedFace {
|
||||
id: string;
|
||||
}
|
||||
|
||||
export interface FaceCrop {
|
||||
image: ImageBitmap;
|
||||
// imageBox is relative to image dimentions stored at mlFileData
|
||||
imageBox: Box;
|
||||
}
|
||||
|
||||
export interface StoredFaceCrop {
|
||||
cacheKey: string;
|
||||
imageBox: Box;
|
||||
}
|
||||
|
||||
export interface CroppedFace extends DetectedFaceWithId {
|
||||
crop?: StoredFaceCrop;
|
||||
}
|
||||
|
||||
export interface FaceAlignment {
|
||||
// TODO: remove affine matrix as rotation, size and center
|
||||
// are simple to store and use, affine matrix adds complexity while getting crop
|
||||
affineMatrix: Array<Array<number>>;
|
||||
rotation: number;
|
||||
// size and center is relative to image dimentions stored at mlFileData
|
||||
size: number;
|
||||
center: Point;
|
||||
}
|
||||
|
||||
export interface AlignedFace extends CroppedFace {
|
||||
alignment?: FaceAlignment;
|
||||
blurValue?: number;
|
||||
}
|
||||
|
||||
export declare type FaceEmbedding = Float32Array;
|
||||
|
||||
export interface FaceWithEmbedding extends AlignedFace {
|
||||
embedding?: FaceEmbedding;
|
||||
}
|
||||
|
||||
export interface Face extends FaceWithEmbedding {
|
||||
personId?: number;
|
||||
}
|
||||
|
||||
export interface Person {
|
||||
id: number;
|
||||
name?: string;
|
||||
files: Array<number>;
|
||||
displayFaceId?: string;
|
||||
faceCropCacheKey?: string;
|
||||
}
|
||||
|
||||
export interface MlFileData {
|
||||
fileId: number;
|
||||
faces?: Face[];
|
||||
imageSource?: ImageType;
|
||||
imageDimensions?: Dimensions;
|
||||
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
|
||||
faceCropMethod?: Versioned<FaceCropMethod>;
|
||||
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
|
||||
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
|
||||
mlVersion: number;
|
||||
errorCount: number;
|
||||
lastErrorMessage?: string;
|
||||
}
|
||||
|
||||
export interface MLSearchConfig {
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface MLSyncFileContext {
|
||||
enteFile: EnteFile;
|
||||
localFile?: globalThis.File;
|
||||
|
||||
oldMlFile?: MlFileData;
|
||||
newMlFile?: MlFileData;
|
||||
|
||||
imageBitmap?: ImageBitmap;
|
||||
|
||||
newDetection?: boolean;
|
||||
newAlignment?: boolean;
|
||||
}
|
||||
|
||||
export interface MLLibraryData {
|
||||
faceClusteringMethod?: Versioned<ClusteringMethod>;
|
||||
faceClusteringResults?: ClusterFacesResult;
|
||||
faceClustersWithNoise?: FacesClustersWithNoise;
|
||||
}
|
||||
|
||||
export declare type MLIndex = "files" | "people";
|
|
@ -1,60 +0,0 @@
|
|||
import { Box, enlargeBox } from "services/ml/geom";
|
||||
import {
|
||||
FaceAlignment,
|
||||
FaceCrop,
|
||||
FaceCropConfig,
|
||||
FaceCropMethod,
|
||||
FaceCropService,
|
||||
FaceDetection,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
import { cropWithRotation } from "utils/image";
|
||||
import { getArcfaceAlignment } from "./arcfaceAlignmentService";
|
||||
|
||||
class ArcFaceCropService implements FaceCropService {
|
||||
public method: Versioned<FaceCropMethod>;
|
||||
|
||||
constructor() {
|
||||
this.method = {
|
||||
value: "ArcFace",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public async getFaceCrop(
|
||||
imageBitmap: ImageBitmap,
|
||||
faceDetection: FaceDetection,
|
||||
config: FaceCropConfig,
|
||||
): Promise<FaceCrop> {
|
||||
const alignedFace = getArcfaceAlignment(faceDetection);
|
||||
const faceCrop = getFaceCrop(imageBitmap, alignedFace, config);
|
||||
|
||||
return faceCrop;
|
||||
}
|
||||
}
|
||||
|
||||
export default new ArcFaceCropService();
|
||||
|
||||
export function getFaceCrop(
|
||||
imageBitmap: ImageBitmap,
|
||||
alignment: FaceAlignment,
|
||||
config: FaceCropConfig,
|
||||
): FaceCrop {
|
||||
const alignmentBox = new Box({
|
||||
x: alignment.center.x - alignment.size / 2,
|
||||
y: alignment.center.y - alignment.size / 2,
|
||||
width: alignment.size,
|
||||
height: alignment.size,
|
||||
}).round();
|
||||
const scaleForPadding = 1 + config.padding * 2;
|
||||
const paddedBox = enlargeBox(alignmentBox, scaleForPadding).round();
|
||||
const faceImageBitmap = cropWithRotation(imageBitmap, paddedBox, 0, {
|
||||
width: config.maxSize,
|
||||
height: config.maxSize,
|
||||
});
|
||||
|
||||
return {
|
||||
image: faceImageBitmap,
|
||||
imageBox: paddedBox,
|
||||
};
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
import { DBSCAN, KMEANS, OPTICS } from "density-clustering";
|
||||
import { Hdbscan } from "hdbscan";
|
||||
import { HdbscanInput } from "hdbscan/dist/types";
|
||||
import {
|
||||
ClusteringConfig,
|
||||
ClusteringInput,
|
||||
ClusteringMethod,
|
||||
ClusteringResults,
|
||||
HdbscanResults,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
|
||||
class ClusteringService {
|
||||
private dbscan: DBSCAN;
|
||||
private optics: OPTICS;
|
||||
private kmeans: KMEANS;
|
||||
|
||||
constructor() {
|
||||
this.dbscan = new DBSCAN();
|
||||
this.optics = new OPTICS();
|
||||
this.kmeans = new KMEANS();
|
||||
}
|
||||
|
||||
public clusterUsingDBSCAN(
|
||||
dataset: Array<Array<number>>,
|
||||
epsilon: number = 1.0,
|
||||
minPts: number = 2,
|
||||
): ClusteringResults {
|
||||
// log.info("distanceFunction", DBSCAN._);
|
||||
const clusters = this.dbscan.run(dataset, epsilon, minPts);
|
||||
const noise = this.dbscan.noise;
|
||||
return { clusters, noise };
|
||||
}
|
||||
|
||||
public clusterUsingOPTICS(
|
||||
dataset: Array<Array<number>>,
|
||||
epsilon: number = 1.0,
|
||||
minPts: number = 2,
|
||||
) {
|
||||
const clusters = this.optics.run(dataset, epsilon, minPts);
|
||||
return { clusters, noise: [] };
|
||||
}
|
||||
|
||||
public clusterUsingKMEANS(
|
||||
dataset: Array<Array<number>>,
|
||||
numClusters: number = 5,
|
||||
) {
|
||||
const clusters = this.kmeans.run(dataset, numClusters);
|
||||
return { clusters, noise: [] };
|
||||
}
|
||||
|
||||
public clusterUsingHdbscan(hdbscanInput: HdbscanInput): HdbscanResults {
|
||||
if (hdbscanInput.input.length < 10) {
|
||||
throw Error("too few samples to run Hdbscan");
|
||||
}
|
||||
|
||||
const hdbscan = new Hdbscan(hdbscanInput);
|
||||
const clusters = hdbscan.getClusters();
|
||||
const noise = hdbscan.getNoise();
|
||||
const debugInfo = hdbscan.getDebugInfo();
|
||||
|
||||
return { clusters, noise, debugInfo };
|
||||
}
|
||||
|
||||
public cluster(
|
||||
method: Versioned<ClusteringMethod>,
|
||||
input: ClusteringInput,
|
||||
config: ClusteringConfig,
|
||||
) {
|
||||
if (method.value === "Hdbscan") {
|
||||
return this.clusterUsingHdbscan({
|
||||
input,
|
||||
minClusterSize: config.minClusterSize,
|
||||
debug: config.generateDebugInfo,
|
||||
});
|
||||
} else if (method.value === "Dbscan") {
|
||||
return this.clusterUsingDBSCAN(
|
||||
input,
|
||||
config.maxDistanceInsideCluster,
|
||||
config.minClusterSize,
|
||||
);
|
||||
} else {
|
||||
throw Error("Unknown clustering method: " + method.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default ClusteringService;
|
|
@ -1,37 +0,0 @@
|
|||
import { DBSCAN } from "density-clustering";
|
||||
import {
|
||||
ClusteringConfig,
|
||||
ClusteringInput,
|
||||
ClusteringMethod,
|
||||
ClusteringService,
|
||||
HdbscanResults,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
|
||||
class DbscanClusteringService implements ClusteringService {
|
||||
public method: Versioned<ClusteringMethod>;
|
||||
|
||||
constructor() {
|
||||
this.method = {
|
||||
value: "Dbscan",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public async cluster(
|
||||
input: ClusteringInput,
|
||||
config: ClusteringConfig,
|
||||
): Promise<HdbscanResults> {
|
||||
// log.info('Clustering input: ', input);
|
||||
const dbscan = new DBSCAN();
|
||||
const clusters = dbscan.run(
|
||||
input,
|
||||
config.clusterSelectionEpsilon,
|
||||
config.minClusterSize,
|
||||
);
|
||||
const noise = dbscan.noise;
|
||||
return { clusters, noise };
|
||||
}
|
||||
}
|
||||
|
||||
export default new DbscanClusteringService();
|
|
@ -1,359 +0,0 @@
|
|||
import { openCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import mlIDbStorage from "services/ml/db";
|
||||
import {
|
||||
DetectedFace,
|
||||
Face,
|
||||
MLSyncContext,
|
||||
MLSyncFileContext,
|
||||
type FaceAlignment,
|
||||
type Versioned,
|
||||
} from "services/ml/types";
|
||||
import { imageBitmapToBlob, warpAffineFloat32List } from "utils/image";
|
||||
import ReaderService, {
|
||||
fetchImageBitmap,
|
||||
getFaceId,
|
||||
getLocalFile,
|
||||
} from "./readerService";
|
||||
|
||||
class FaceService {
|
||||
async syncFileFaceDetections(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
) {
|
||||
const { oldMlFile, newMlFile } = fileContext;
|
||||
if (
|
||||
!isDifferentOrOld(
|
||||
oldMlFile?.faceDetectionMethod,
|
||||
syncContext.faceDetectionService.method,
|
||||
) &&
|
||||
oldMlFile?.imageSource === syncContext.config.imageSource
|
||||
) {
|
||||
newMlFile.faces = oldMlFile?.faces?.map((existingFace) => ({
|
||||
id: existingFace.id,
|
||||
fileId: existingFace.fileId,
|
||||
detection: existingFace.detection,
|
||||
}));
|
||||
|
||||
newMlFile.imageSource = oldMlFile.imageSource;
|
||||
newMlFile.imageDimensions = oldMlFile.imageDimensions;
|
||||
newMlFile.faceDetectionMethod = oldMlFile.faceDetectionMethod;
|
||||
return;
|
||||
}
|
||||
|
||||
newMlFile.faceDetectionMethod = syncContext.faceDetectionService.method;
|
||||
fileContext.newDetection = true;
|
||||
const imageBitmap = await ReaderService.getImageBitmap(
|
||||
syncContext,
|
||||
fileContext,
|
||||
);
|
||||
const timerId = `faceDetection-${fileContext.enteFile.id}`;
|
||||
console.time(timerId);
|
||||
const faceDetections =
|
||||
await syncContext.faceDetectionService.detectFaces(imageBitmap);
|
||||
console.timeEnd(timerId);
|
||||
console.log("faceDetections: ", faceDetections?.length);
|
||||
|
||||
// TODO: reenable faces filtering based on width
|
||||
const detectedFaces = faceDetections?.map((detection) => {
|
||||
return {
|
||||
fileId: fileContext.enteFile.id,
|
||||
detection,
|
||||
} as DetectedFace;
|
||||
});
|
||||
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
|
||||
...detectedFace,
|
||||
id: getFaceId(detectedFace, newMlFile.imageDimensions),
|
||||
}));
|
||||
// ?.filter((f) =>
|
||||
// f.box.width > syncContext.config.faceDetection.minFaceSize
|
||||
// );
|
||||
log.info("[MLService] Detected Faces: ", newMlFile.faces?.length);
|
||||
}
|
||||
|
||||
async syncFileFaceCrops(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
) {
|
||||
const { oldMlFile, newMlFile } = fileContext;
|
||||
if (
|
||||
// !syncContext.config.faceCrop.enabled ||
|
||||
!fileContext.newDetection &&
|
||||
!isDifferentOrOld(
|
||||
oldMlFile?.faceCropMethod,
|
||||
syncContext.faceCropService.method,
|
||||
) &&
|
||||
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
|
||||
) {
|
||||
for (const [index, face] of newMlFile.faces.entries()) {
|
||||
face.crop = oldMlFile.faces[index].crop;
|
||||
}
|
||||
newMlFile.faceCropMethod = oldMlFile.faceCropMethod;
|
||||
return;
|
||||
}
|
||||
|
||||
const imageBitmap = await ReaderService.getImageBitmap(
|
||||
syncContext,
|
||||
fileContext,
|
||||
);
|
||||
newMlFile.faceCropMethod = syncContext.faceCropService.method;
|
||||
|
||||
for (const face of newMlFile.faces) {
|
||||
await this.saveFaceCrop(imageBitmap, face, syncContext);
|
||||
}
|
||||
}
|
||||
|
||||
async syncFileFaceAlignments(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
): Promise<Float32Array> {
|
||||
const { oldMlFile, newMlFile } = fileContext;
|
||||
if (
|
||||
!fileContext.newDetection &&
|
||||
!isDifferentOrOld(
|
||||
oldMlFile?.faceAlignmentMethod,
|
||||
syncContext.faceAlignmentService.method,
|
||||
) &&
|
||||
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
|
||||
) {
|
||||
for (const [index, face] of newMlFile.faces.entries()) {
|
||||
face.alignment = oldMlFile.faces[index].alignment;
|
||||
}
|
||||
newMlFile.faceAlignmentMethod = oldMlFile.faceAlignmentMethod;
|
||||
return;
|
||||
}
|
||||
|
||||
newMlFile.faceAlignmentMethod = syncContext.faceAlignmentService.method;
|
||||
fileContext.newAlignment = true;
|
||||
const imageBitmap =
|
||||
fileContext.imageBitmap ||
|
||||
(await ReaderService.getImageBitmap(syncContext, fileContext));
|
||||
|
||||
// Execute the face alignment calculations
|
||||
for (const face of newMlFile.faces) {
|
||||
face.alignment = syncContext.faceAlignmentService.getFaceAlignment(
|
||||
face.detection,
|
||||
);
|
||||
}
|
||||
// Extract face images and convert to Float32Array
|
||||
const faceAlignments = newMlFile.faces.map((f) => f.alignment);
|
||||
const faceImages = await extractFaceImagesToFloat32(
|
||||
faceAlignments,
|
||||
syncContext.faceEmbeddingService.faceSize,
|
||||
imageBitmap,
|
||||
);
|
||||
const blurValues = syncContext.blurDetectionService.detectBlur(
|
||||
faceImages,
|
||||
newMlFile.faces,
|
||||
);
|
||||
newMlFile.faces.forEach((f, i) => (f.blurValue = blurValues[i]));
|
||||
|
||||
imageBitmap.close();
|
||||
log.info("[MLService] alignedFaces: ", newMlFile.faces?.length);
|
||||
|
||||
return faceImages;
|
||||
}
|
||||
|
||||
async syncFileFaceEmbeddings(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
alignedFacesInput: Float32Array,
|
||||
) {
|
||||
const { oldMlFile, newMlFile } = fileContext;
|
||||
if (
|
||||
!fileContext.newAlignment &&
|
||||
!isDifferentOrOld(
|
||||
oldMlFile?.faceEmbeddingMethod,
|
||||
syncContext.faceEmbeddingService.method,
|
||||
) &&
|
||||
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
|
||||
) {
|
||||
for (const [index, face] of newMlFile.faces.entries()) {
|
||||
face.embedding = oldMlFile.faces[index].embedding;
|
||||
}
|
||||
newMlFile.faceEmbeddingMethod = oldMlFile.faceEmbeddingMethod;
|
||||
return;
|
||||
}
|
||||
|
||||
newMlFile.faceEmbeddingMethod = syncContext.faceEmbeddingService.method;
|
||||
// TODO: when not storing face crops, image will be needed to extract faces
|
||||
// fileContext.imageBitmap ||
|
||||
// (await this.getImageBitmap(syncContext, fileContext));
|
||||
|
||||
const embeddings =
|
||||
await syncContext.faceEmbeddingService.getFaceEmbeddings(
|
||||
alignedFacesInput,
|
||||
);
|
||||
newMlFile.faces.forEach((f, i) => (f.embedding = embeddings[i]));
|
||||
|
||||
log.info("[MLService] facesWithEmbeddings: ", newMlFile.faces.length);
|
||||
}
|
||||
|
||||
async syncFileFaceMakeRelativeDetections(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
) {
|
||||
const { oldMlFile, newMlFile } = fileContext;
|
||||
if (
|
||||
!fileContext.newAlignment &&
|
||||
!isDifferentOrOld(
|
||||
oldMlFile?.faceEmbeddingMethod,
|
||||
syncContext.faceEmbeddingService.method,
|
||||
) &&
|
||||
areFaceIdsSame(newMlFile.faces, oldMlFile?.faces)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < newMlFile.faces.length; i++) {
|
||||
const face = newMlFile.faces[i];
|
||||
if (face.detection.box.x + face.detection.box.width < 2) continue; // Skip if somehow already relative
|
||||
face.detection =
|
||||
syncContext.faceDetectionService.getRelativeDetection(
|
||||
face.detection,
|
||||
newMlFile.imageDimensions,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async saveFaceCrop(
|
||||
imageBitmap: ImageBitmap,
|
||||
face: Face,
|
||||
syncContext: MLSyncContext,
|
||||
) {
|
||||
const faceCrop = await syncContext.faceCropService.getFaceCrop(
|
||||
imageBitmap,
|
||||
face.detection,
|
||||
syncContext.config.faceCrop,
|
||||
);
|
||||
|
||||
const blobOptions = syncContext.config.faceCrop.blobOptions;
|
||||
const blob = await imageBitmapToBlob(faceCrop.image, blobOptions);
|
||||
|
||||
const cache = await openCache("face-crops");
|
||||
await cache.put(face.id, blob);
|
||||
|
||||
faceCrop.image.close();
|
||||
|
||||
return blob;
|
||||
}
|
||||
|
||||
async getAllSyncedFacesMap(syncContext: MLSyncContext) {
|
||||
if (syncContext.allSyncedFacesMap) {
|
||||
return syncContext.allSyncedFacesMap;
|
||||
}
|
||||
|
||||
syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap();
|
||||
return syncContext.allSyncedFacesMap;
|
||||
}
|
||||
|
||||
public async runFaceClustering(
|
||||
syncContext: MLSyncContext,
|
||||
allFaces: Array<Face>,
|
||||
) {
|
||||
// await this.init();
|
||||
|
||||
const clusteringConfig = syncContext.config.faceClustering;
|
||||
|
||||
if (!allFaces || allFaces.length < clusteringConfig.minInputSize) {
|
||||
log.info(
|
||||
"[MLService] Too few faces to cluster, not running clustering: ",
|
||||
allFaces.length,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("Running clustering allFaces: ", allFaces.length);
|
||||
syncContext.mlLibraryData.faceClusteringResults =
|
||||
await syncContext.faceClusteringService.cluster(
|
||||
allFaces.map((f) => Array.from(f.embedding)),
|
||||
syncContext.config.faceClustering,
|
||||
);
|
||||
syncContext.mlLibraryData.faceClusteringMethod =
|
||||
syncContext.faceClusteringService.method;
|
||||
log.info(
|
||||
"[MLService] Got face clustering results: ",
|
||||
JSON.stringify(syncContext.mlLibraryData.faceClusteringResults),
|
||||
);
|
||||
|
||||
// syncContext.faceClustersWithNoise = {
|
||||
// clusters: syncContext.faceClusteringResults.clusters.map(
|
||||
// (faces) => ({
|
||||
// faces,
|
||||
// })
|
||||
// ),
|
||||
// noise: syncContext.faceClusteringResults.noise,
|
||||
// };
|
||||
}
|
||||
|
||||
public async regenerateFaceCrop(
|
||||
syncContext: MLSyncContext,
|
||||
faceID: string,
|
||||
) {
|
||||
const fileID = Number(faceID.split("-")[0]);
|
||||
const personFace = await mlIDbStorage.getFace(fileID, faceID);
|
||||
if (!personFace) {
|
||||
throw Error("Face not found");
|
||||
}
|
||||
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
return await this.saveFaceCrop(imageBitmap, personFace, syncContext);
|
||||
}
|
||||
}
|
||||
|
||||
export default new FaceService();
|
||||
|
||||
export function areFaceIdsSame(ofFaces: Array<Face>, toFaces: Array<Face>) {
|
||||
if (
|
||||
(ofFaces === null || ofFaces === undefined) &&
|
||||
(toFaces === null || toFaces === undefined)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return primitiveArrayEquals(
|
||||
ofFaces?.map((f) => f.id),
|
||||
toFaces?.map((f) => f.id),
|
||||
);
|
||||
}
|
||||
|
||||
function primitiveArrayEquals(a, b) {
|
||||
return (
|
||||
Array.isArray(a) &&
|
||||
Array.isArray(b) &&
|
||||
a.length === b.length &&
|
||||
a.every((val, index) => val === b[index])
|
||||
);
|
||||
}
|
||||
|
||||
export function isDifferentOrOld(
|
||||
method: Versioned<string>,
|
||||
thanMethod: Versioned<string>,
|
||||
) {
|
||||
return (
|
||||
!method ||
|
||||
method.value !== thanMethod.value ||
|
||||
method.version < thanMethod.version
|
||||
);
|
||||
}
|
||||
|
||||
async function extractFaceImagesToFloat32(
|
||||
faceAlignments: Array<FaceAlignment>,
|
||||
faceSize: number,
|
||||
image: ImageBitmap,
|
||||
): Promise<Float32Array> {
|
||||
const faceData = new Float32Array(
|
||||
faceAlignments.length * faceSize * faceSize * 3,
|
||||
);
|
||||
for (let i = 0; i < faceAlignments.length; i++) {
|
||||
const alignedFace = faceAlignments[i];
|
||||
const faceDataOffset = i * faceSize * faceSize * 3;
|
||||
warpAffineFloat32List(
|
||||
image,
|
||||
alignedFace,
|
||||
faceSize,
|
||||
faceData,
|
||||
faceDataOffset,
|
||||
);
|
||||
}
|
||||
return faceData;
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
import { Hdbscan } from "hdbscan";
|
||||
import {
|
||||
ClusteringConfig,
|
||||
ClusteringInput,
|
||||
ClusteringMethod,
|
||||
ClusteringService,
|
||||
HdbscanResults,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
|
||||
class HdbscanClusteringService implements ClusteringService {
|
||||
public method: Versioned<ClusteringMethod>;
|
||||
|
||||
constructor() {
|
||||
this.method = {
|
||||
value: "Hdbscan",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public async cluster(
|
||||
input: ClusteringInput,
|
||||
config: ClusteringConfig,
|
||||
): Promise<HdbscanResults> {
|
||||
// log.info('Clustering input: ', input);
|
||||
const hdbscan = new Hdbscan({
|
||||
input,
|
||||
|
||||
minClusterSize: config.minClusterSize,
|
||||
minSamples: config.minSamples,
|
||||
clusterSelectionEpsilon: config.clusterSelectionEpsilon,
|
||||
clusterSelectionMethod: config.clusterSelectionMethod,
|
||||
debug: config.generateDebugInfo,
|
||||
});
|
||||
|
||||
return {
|
||||
clusters: hdbscan.getClusters(),
|
||||
noise: hdbscan.getNoise(),
|
||||
debugInfo: hdbscan.getDebugInfo(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default new HdbscanClusteringService();
|
|
@ -1,211 +0,0 @@
|
|||
import {
|
||||
BlurDetectionMethod,
|
||||
BlurDetectionService,
|
||||
Face,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
|
||||
import { mobileFaceNetFaceSize } from "./mobileFaceNetEmbeddingService";
|
||||
|
||||
class LaplacianBlurDetectionService implements BlurDetectionService {
|
||||
public method: Versioned<BlurDetectionMethod>;
|
||||
|
||||
public constructor() {
|
||||
this.method = {
|
||||
value: "Laplacian",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public detectBlur(alignedFaces: Float32Array, faces: Face[]): number[] {
|
||||
const numFaces = Math.round(
|
||||
alignedFaces.length /
|
||||
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
|
||||
);
|
||||
const blurValues: number[] = [];
|
||||
for (let i = 0; i < numFaces; i++) {
|
||||
const face = faces[i];
|
||||
const direction = getFaceDirection(face);
|
||||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
);
|
||||
const laplacian = this.applyLaplacian(faceImage, direction);
|
||||
const variance = this.calculateVariance(laplacian);
|
||||
blurValues.push(variance);
|
||||
}
|
||||
return blurValues;
|
||||
}
|
||||
|
||||
private calculateVariance(matrix: number[][]): number {
|
||||
const numRows = matrix.length;
|
||||
const numCols = matrix[0].length;
|
||||
const totalElements = numRows * numCols;
|
||||
|
||||
// Calculate the mean
|
||||
let mean: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
mean += value;
|
||||
});
|
||||
});
|
||||
mean /= totalElements;
|
||||
|
||||
// Calculate the variance
|
||||
let variance: number = 0;
|
||||
matrix.forEach((row) => {
|
||||
row.forEach((value) => {
|
||||
const diff: number = value - mean;
|
||||
variance += diff * diff;
|
||||
});
|
||||
});
|
||||
variance /= totalElements;
|
||||
|
||||
return variance;
|
||||
}
|
||||
|
||||
private padImage(
|
||||
image: number[][],
|
||||
removeSideColumns: number = 56,
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] {
|
||||
// Exception is removeSideColumns is not even
|
||||
if (removeSideColumns % 2 != 0) {
|
||||
throw new Error("removeSideColumns must be even");
|
||||
}
|
||||
const numRows = image.length;
|
||||
const numCols = image[0].length;
|
||||
const paddedNumCols = numCols + 2 - removeSideColumns;
|
||||
const paddedNumRows = numRows + 2;
|
||||
|
||||
// Create a new matrix with extra padding
|
||||
const paddedImage: number[][] = Array.from(
|
||||
{ length: paddedNumRows },
|
||||
() => new Array(paddedNumCols).fill(0),
|
||||
);
|
||||
|
||||
// Copy original image into the center of the padded image
|
||||
if (direction === "straight") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] =
|
||||
image[i][j + Math.round(removeSideColumns / 2)];
|
||||
}
|
||||
}
|
||||
} // If the face is facing left, we only take the right side of the face image
|
||||
else if (direction === "left") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
|
||||
}
|
||||
}
|
||||
} // If the face is facing right, we only take the left side of the face image
|
||||
else if (direction === "right") {
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < paddedNumCols - 2; j++) {
|
||||
paddedImage[i + 1][j + 1] = image[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reflect padding
|
||||
// Top and bottom rows
|
||||
for (let j = 1; j <= paddedNumCols - 2; j++) {
|
||||
paddedImage[0][j] = paddedImage[2][j]; // Top row
|
||||
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
|
||||
}
|
||||
// Left and right columns
|
||||
for (let i = 0; i < numRows + 2; i++) {
|
||||
paddedImage[i][0] = paddedImage[i][2]; // Left column
|
||||
paddedImage[i][paddedNumCols - 1] =
|
||||
paddedImage[i][paddedNumCols - 3]; // Right column
|
||||
}
|
||||
|
||||
return paddedImage;
|
||||
}
|
||||
|
||||
private applyLaplacian(
|
||||
image: number[][],
|
||||
direction: FaceDirection = "straight",
|
||||
): number[][] {
|
||||
const paddedImage: number[][] = this.padImage(
|
||||
image,
|
||||
undefined,
|
||||
direction,
|
||||
);
|
||||
const numRows = paddedImage.length - 2;
|
||||
const numCols = paddedImage[0].length - 2;
|
||||
|
||||
// Create an output image initialized to 0
|
||||
const outputImage: number[][] = Array.from({ length: numRows }, () =>
|
||||
new Array(numCols).fill(0),
|
||||
);
|
||||
|
||||
// Define the Laplacian kernel
|
||||
const kernel: number[][] = [
|
||||
[0, 1, 0],
|
||||
[1, -4, 1],
|
||||
[0, 1, 0],
|
||||
];
|
||||
|
||||
// Apply the kernel to each pixel
|
||||
for (let i = 0; i < numRows; i++) {
|
||||
for (let j = 0; j < numCols; j++) {
|
||||
let sum = 0;
|
||||
for (let ki = 0; ki < 3; ki++) {
|
||||
for (let kj = 0; kj < 3; kj++) {
|
||||
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
|
||||
}
|
||||
}
|
||||
// Adjust the output value if necessary (e.g., clipping)
|
||||
outputImage[i][j] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
return outputImage;
|
||||
}
|
||||
}
|
||||
|
||||
export default new LaplacianBlurDetectionService();
|
||||
|
||||
type FaceDirection = "left" | "right" | "straight";
|
||||
|
||||
const getFaceDirection = (face: Face): FaceDirection => {
|
||||
const landmarks = face.detection.landmarks;
|
||||
const leftEye = landmarks[0];
|
||||
const rightEye = landmarks[1];
|
||||
const nose = landmarks[2];
|
||||
const leftMouth = landmarks[3];
|
||||
const rightMouth = landmarks[4];
|
||||
|
||||
const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
|
||||
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
|
||||
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);
|
||||
|
||||
const faceIsUpright =
|
||||
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
|
||||
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);
|
||||
|
||||
const noseStickingOutLeft =
|
||||
nose.x < Math.min(leftEye.x, rightEye.x) &&
|
||||
nose.x < Math.min(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseStickingOutRight =
|
||||
nose.x > Math.max(leftEye.x, rightEye.x) &&
|
||||
nose.x > Math.max(leftMouth.x, rightMouth.x);
|
||||
|
||||
const noseCloseToLeftEye =
|
||||
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
|
||||
const noseCloseToRightEye =
|
||||
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;
|
||||
|
||||
// if (faceIsUpright && (noseStickingOutLeft || noseCloseToLeftEye)) {
|
||||
if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
|
||||
return "left";
|
||||
// } else if (faceIsUpright && (noseStickingOutRight || noseCloseToRightEye)) {
|
||||
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
|
||||
return "right";
|
||||
}
|
||||
|
||||
return "straight";
|
||||
};
|
|
@ -1,108 +1,45 @@
|
|||
import { haveWindow } from "@/next/env";
|
||||
import log from "@/next/log";
|
||||
import { ComlinkWorker } from "@/next/worker/comlink-worker";
|
||||
import { APPS } from "@ente/shared/apps/constants";
|
||||
import ComlinkCryptoWorker, {
|
||||
getDedicatedCryptoWorker,
|
||||
} from "@ente/shared/crypto";
|
||||
import { DedicatedCryptoWorker } from "@ente/shared/crypto/internal/crypto.worker";
|
||||
import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
|
||||
import PQueue from "p-queue";
|
||||
import downloadManager from "services/download";
|
||||
import { putEmbedding } from "services/embeddingService";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import mlIDbStorage, {
|
||||
ML_SEARCH_CONFIG_NAME,
|
||||
ML_SYNC_CONFIG_NAME,
|
||||
} from "services/ml/db";
|
||||
import mlIDbStorage, { ML_SEARCH_CONFIG_NAME } from "services/face/db";
|
||||
import {
|
||||
BlurDetectionMethod,
|
||||
BlurDetectionService,
|
||||
ClusteringMethod,
|
||||
ClusteringService,
|
||||
Face,
|
||||
FaceAlignmentMethod,
|
||||
FaceAlignmentService,
|
||||
FaceCropMethod,
|
||||
FaceCropService,
|
||||
FaceDetection,
|
||||
FaceDetectionMethod,
|
||||
FaceDetectionService,
|
||||
FaceEmbeddingMethod,
|
||||
FaceEmbeddingService,
|
||||
Landmark,
|
||||
MLLibraryData,
|
||||
MLSearchConfig,
|
||||
MLSyncConfig,
|
||||
MLSyncContext,
|
||||
MLSyncFileContext,
|
||||
MLSyncResult,
|
||||
MlFileData,
|
||||
} from "services/ml/types";
|
||||
} from "services/face/types";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import { EnteFile } from "types/file";
|
||||
import { isInternalUserForML } from "utils/user";
|
||||
import arcfaceAlignmentService from "./arcfaceAlignmentService";
|
||||
import arcfaceCropService from "./arcfaceCropService";
|
||||
import dbscanClusteringService from "./dbscanClusteringService";
|
||||
import FaceService from "./faceService";
|
||||
import hdbscanClusteringService from "./hdbscanClusteringService";
|
||||
import laplacianBlurDetectionService from "./laplacianBlurDetectionService";
|
||||
import mobileFaceNetEmbeddingService from "./mobileFaceNetEmbeddingService";
|
||||
import PeopleService from "./peopleService";
|
||||
import ReaderService from "./readerService";
|
||||
import yoloFaceDetectionService from "./yoloFaceDetectionService";
|
||||
import { regenerateFaceCrop, syncFileAnalyzeFaces } from "../face/f-index";
|
||||
import { fetchImageBitmapForContext } from "../face/image";
|
||||
import { syncPeopleIndex } from "../face/people";
|
||||
|
||||
export const DEFAULT_ML_SYNC_CONFIG: MLSyncConfig = {
|
||||
batchSize: 200,
|
||||
imageSource: "Original",
|
||||
faceDetection: {
|
||||
method: "YoloFace",
|
||||
},
|
||||
faceCrop: {
|
||||
enabled: true,
|
||||
method: "ArcFace",
|
||||
padding: 0.25,
|
||||
maxSize: 256,
|
||||
blobOptions: {
|
||||
type: "image/jpeg",
|
||||
quality: 0.8,
|
||||
},
|
||||
},
|
||||
faceAlignment: {
|
||||
method: "ArcFace",
|
||||
},
|
||||
blurDetection: {
|
||||
method: "Laplacian",
|
||||
threshold: 15,
|
||||
},
|
||||
faceEmbedding: {
|
||||
method: "MobileFaceNet",
|
||||
faceSize: 112,
|
||||
generateTsne: true,
|
||||
},
|
||||
faceClustering: {
|
||||
method: "Hdbscan",
|
||||
minClusterSize: 3,
|
||||
minSamples: 5,
|
||||
clusterSelectionEpsilon: 0.6,
|
||||
clusterSelectionMethod: "leaf",
|
||||
minInputSize: 50,
|
||||
// maxDistanceInsideCluster: 0.4,
|
||||
generateDebugInfo: true,
|
||||
},
|
||||
mlVersion: 3,
|
||||
};
|
||||
/**
|
||||
* TODO-ML(MR): What and why.
|
||||
* Also, needs to be 1 (in sync with mobile) when we move out of beta.
|
||||
*/
|
||||
export const defaultMLVersion = 3;
|
||||
|
||||
const batchSize = 200;
|
||||
|
||||
export const MAX_ML_SYNC_ERROR_COUNT = 1;
|
||||
|
||||
export const DEFAULT_ML_SEARCH_CONFIG: MLSearchConfig = {
|
||||
enabled: false,
|
||||
};
|
||||
|
||||
export const MAX_ML_SYNC_ERROR_COUNT = 1;
|
||||
|
||||
export async function getMLSyncConfig() {
|
||||
return mlIDbStorage.getConfig(ML_SYNC_CONFIG_NAME, DEFAULT_ML_SYNC_CONFIG);
|
||||
}
|
||||
|
||||
export async function getMLSearchConfig() {
|
||||
if (isInternalUserForML()) {
|
||||
return mlIDbStorage.getConfig(
|
||||
|
@ -119,95 +56,30 @@ export async function updateMLSearchConfig(newConfig: MLSearchConfig) {
|
|||
return mlIDbStorage.putConfig(ML_SEARCH_CONFIG_NAME, newConfig);
|
||||
}
|
||||
|
||||
export class MLFactory {
|
||||
public static getFaceDetectionService(
|
||||
method: FaceDetectionMethod,
|
||||
): FaceDetectionService {
|
||||
if (method === "YoloFace") {
|
||||
return yoloFaceDetectionService;
|
||||
}
|
||||
export interface MLSyncContext {
|
||||
token: string;
|
||||
userID: number;
|
||||
|
||||
throw Error("Unknon face detection method: " + method);
|
||||
}
|
||||
localFilesMap: Map<number, EnteFile>;
|
||||
outOfSyncFiles: EnteFile[];
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
allSyncedFacesMap?: Map<number, Array<Face>>;
|
||||
|
||||
public static getFaceCropService(method: FaceCropMethod) {
|
||||
if (method === "ArcFace") {
|
||||
return arcfaceCropService;
|
||||
}
|
||||
error?: Error;
|
||||
|
||||
throw Error("Unknon face crop method: " + method);
|
||||
}
|
||||
// oldMLLibraryData: MLLibraryData;
|
||||
mlLibraryData: MLLibraryData;
|
||||
|
||||
public static getFaceAlignmentService(
|
||||
method: FaceAlignmentMethod,
|
||||
): FaceAlignmentService {
|
||||
if (method === "ArcFace") {
|
||||
return arcfaceAlignmentService;
|
||||
}
|
||||
syncQueue: PQueue;
|
||||
|
||||
throw Error("Unknon face alignment method: " + method);
|
||||
}
|
||||
|
||||
public static getBlurDetectionService(
|
||||
method: BlurDetectionMethod,
|
||||
): BlurDetectionService {
|
||||
if (method === "Laplacian") {
|
||||
return laplacianBlurDetectionService;
|
||||
}
|
||||
|
||||
throw Error("Unknon blur detection method: " + method);
|
||||
}
|
||||
|
||||
public static getFaceEmbeddingService(
|
||||
method: FaceEmbeddingMethod,
|
||||
): FaceEmbeddingService {
|
||||
if (method === "MobileFaceNet") {
|
||||
return mobileFaceNetEmbeddingService;
|
||||
}
|
||||
|
||||
throw Error("Unknon face embedding method: " + method);
|
||||
}
|
||||
|
||||
public static getClusteringService(
|
||||
method: ClusteringMethod,
|
||||
): ClusteringService {
|
||||
if (method === "Hdbscan") {
|
||||
return hdbscanClusteringService;
|
||||
}
|
||||
if (method === "Dbscan") {
|
||||
return dbscanClusteringService;
|
||||
}
|
||||
|
||||
throw Error("Unknon clustering method: " + method);
|
||||
}
|
||||
|
||||
public static getMLSyncContext(
|
||||
token: string,
|
||||
userID: number,
|
||||
config: MLSyncConfig,
|
||||
shouldUpdateMLVersion: boolean = true,
|
||||
) {
|
||||
return new LocalMLSyncContext(
|
||||
token,
|
||||
userID,
|
||||
config,
|
||||
shouldUpdateMLVersion,
|
||||
);
|
||||
}
|
||||
getEnteWorker(id: number): Promise<any>;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export class LocalMLSyncContext implements MLSyncContext {
|
||||
public token: string;
|
||||
public userID: number;
|
||||
public config: MLSyncConfig;
|
||||
public shouldUpdateMLVersion: boolean;
|
||||
|
||||
public faceDetectionService: FaceDetectionService;
|
||||
public faceCropService: FaceCropService;
|
||||
public faceAlignmentService: FaceAlignmentService;
|
||||
public blurDetectionService: BlurDetectionService;
|
||||
public faceEmbeddingService: FaceEmbeddingService;
|
||||
public faceClusteringService: ClusteringService;
|
||||
|
||||
public localFilesMap: Map<number, EnteFile>;
|
||||
public outOfSyncFiles: EnteFile[];
|
||||
|
@ -229,36 +101,9 @@ export class LocalMLSyncContext implements MLSyncContext {
|
|||
>;
|
||||
private enteWorkers: Array<any>;
|
||||
|
||||
constructor(
|
||||
token: string,
|
||||
userID: number,
|
||||
config: MLSyncConfig,
|
||||
shouldUpdateMLVersion: boolean = true,
|
||||
concurrency?: number,
|
||||
) {
|
||||
constructor(token: string, userID: number, concurrency?: number) {
|
||||
this.token = token;
|
||||
this.userID = userID;
|
||||
this.config = config;
|
||||
this.shouldUpdateMLVersion = shouldUpdateMLVersion;
|
||||
|
||||
this.faceDetectionService = MLFactory.getFaceDetectionService(
|
||||
this.config.faceDetection.method,
|
||||
);
|
||||
this.faceCropService = MLFactory.getFaceCropService(
|
||||
this.config.faceCrop.method,
|
||||
);
|
||||
this.faceAlignmentService = MLFactory.getFaceAlignmentService(
|
||||
this.config.faceAlignment.method,
|
||||
);
|
||||
this.blurDetectionService = MLFactory.getBlurDetectionService(
|
||||
this.config.blurDetection.method,
|
||||
);
|
||||
this.faceEmbeddingService = MLFactory.getFaceEmbeddingService(
|
||||
this.config.faceEmbedding.method,
|
||||
);
|
||||
this.faceClusteringService = MLFactory.getClusteringService(
|
||||
this.config.faceClustering.method,
|
||||
);
|
||||
|
||||
this.outOfSyncFiles = [];
|
||||
this.nSyncedFiles = 0;
|
||||
|
@ -311,8 +156,6 @@ class MachineLearningService {
|
|||
throw Error("Token needed by ml service to sync file");
|
||||
}
|
||||
|
||||
await downloadManager.init(APPS.PHOTOS, { token });
|
||||
|
||||
const syncContext = await this.getSyncContext(token, userID);
|
||||
|
||||
await this.syncLocalFiles(syncContext);
|
||||
|
@ -323,12 +166,10 @@ class MachineLearningService {
|
|||
await this.syncFiles(syncContext);
|
||||
}
|
||||
|
||||
// TODO: running index before all files are on latest ml version
|
||||
// may be need to just take synced files on latest ml version for indexing
|
||||
if (
|
||||
syncContext.outOfSyncFiles.length <= 0 ||
|
||||
(syncContext.nSyncedFiles === syncContext.config.batchSize &&
|
||||
Math.random() < 0.2)
|
||||
// TODO-ML(MR): Forced disable.
|
||||
(syncContext.nSyncedFiles === batchSize && Math.random() < 0)
|
||||
) {
|
||||
await this.syncIndex(syncContext);
|
||||
}
|
||||
|
@ -349,14 +190,8 @@ class MachineLearningService {
|
|||
return mlSyncResult;
|
||||
}
|
||||
|
||||
public async regenerateFaceCrop(
|
||||
token: string,
|
||||
userID: number,
|
||||
faceID: string,
|
||||
) {
|
||||
await downloadManager.init(APPS.PHOTOS, { token });
|
||||
const syncContext = await this.getSyncContext(token, userID);
|
||||
return FaceService.regenerateFaceCrop(syncContext, faceID);
|
||||
public async regenerateFaceCrop(faceID: string) {
|
||||
return regenerateFaceCrop(faceID);
|
||||
}
|
||||
|
||||
private newMlData(fileId: number) {
|
||||
|
@ -434,8 +269,8 @@ class MachineLearningService {
|
|||
private async getOutOfSyncFiles(syncContext: MLSyncContext) {
|
||||
const startTime = Date.now();
|
||||
const fileIds = await mlIDbStorage.getFileIds(
|
||||
syncContext.config.batchSize,
|
||||
syncContext.config.mlVersion,
|
||||
batchSize,
|
||||
defaultMLVersion,
|
||||
MAX_ML_SYNC_ERROR_COUNT,
|
||||
);
|
||||
|
||||
|
@ -481,9 +316,10 @@ class MachineLearningService {
|
|||
if (!this.syncContext) {
|
||||
log.info("Creating syncContext");
|
||||
|
||||
this.syncContext = getMLSyncConfig().then((mlSyncConfig) =>
|
||||
MLFactory.getMLSyncContext(token, userID, mlSyncConfig, true),
|
||||
);
|
||||
// TODO-ML(MR): Keep as promise for now.
|
||||
this.syncContext = new Promise((resolve) => {
|
||||
resolve(new LocalMLSyncContext(token, userID));
|
||||
});
|
||||
} else {
|
||||
log.info("reusing existing syncContext");
|
||||
}
|
||||
|
@ -491,11 +327,13 @@ class MachineLearningService {
|
|||
}
|
||||
|
||||
private async getLocalSyncContext(token: string, userID: number) {
|
||||
// TODO-ML(MR): This is updating the file ML version. verify.
|
||||
if (!this.localSyncContext) {
|
||||
log.info("Creating localSyncContext");
|
||||
this.localSyncContext = getMLSyncConfig().then((mlSyncConfig) =>
|
||||
MLFactory.getMLSyncContext(token, userID, mlSyncConfig, false),
|
||||
);
|
||||
// TODO-ML(MR):
|
||||
this.localSyncContext = new Promise((resolve) => {
|
||||
resolve(new LocalMLSyncContext(token, userID));
|
||||
});
|
||||
} else {
|
||||
log.info("reusing existing localSyncContext");
|
||||
}
|
||||
|
@ -516,24 +354,22 @@ class MachineLearningService {
|
|||
userID: number,
|
||||
enteFile: EnteFile,
|
||||
localFile?: globalThis.File,
|
||||
): Promise<MlFileData | Error> {
|
||||
) {
|
||||
const syncContext = await this.getLocalSyncContext(token, userID);
|
||||
|
||||
try {
|
||||
const mlFileData = await this.syncFileWithErrorHandler(
|
||||
await this.syncFileWithErrorHandler(
|
||||
syncContext,
|
||||
enteFile,
|
||||
localFile,
|
||||
);
|
||||
|
||||
if (syncContext.nSyncedFiles >= syncContext.config.batchSize) {
|
||||
if (syncContext.nSyncedFiles >= batchSize) {
|
||||
await this.closeLocalSyncContext();
|
||||
}
|
||||
// await syncContext.dispose();
|
||||
return mlFileData;
|
||||
} catch (e) {
|
||||
console.error("Error while syncing local file: ", enteFile.id, e);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -541,16 +377,12 @@ class MachineLearningService {
|
|||
syncContext: MLSyncContext,
|
||||
enteFile: EnteFile,
|
||||
localFile?: globalThis.File,
|
||||
): Promise<MlFileData> {
|
||||
) {
|
||||
try {
|
||||
console.log(
|
||||
`Indexing ${enteFile.title ?? "<untitled>"} ${enteFile.id}`,
|
||||
);
|
||||
const mlFileData = await this.syncFile(
|
||||
syncContext,
|
||||
enteFile,
|
||||
localFile,
|
||||
);
|
||||
const mlFileData = await this.syncFile(enteFile, localFile);
|
||||
syncContext.nSyncedFaces += mlFileData.faces?.length || 0;
|
||||
syncContext.nSyncedFiles += 1;
|
||||
return mlFileData;
|
||||
|
@ -583,35 +415,20 @@ class MachineLearningService {
|
|||
}
|
||||
}
|
||||
|
||||
private async syncFile(
|
||||
syncContext: MLSyncContext,
|
||||
enteFile: EnteFile,
|
||||
localFile?: globalThis.File,
|
||||
) {
|
||||
console.log("Syncing for file" + enteFile.title);
|
||||
private async syncFile(enteFile: EnteFile, localFile?: globalThis.File) {
|
||||
log.debug(() => ({ a: "Syncing file", enteFile }));
|
||||
const fileContext: MLSyncFileContext = { enteFile, localFile };
|
||||
const oldMlFile =
|
||||
(fileContext.oldMlFile = await this.getMLFileData(enteFile.id)) ??
|
||||
this.newMlData(enteFile.id);
|
||||
if (
|
||||
fileContext.oldMlFile?.mlVersion === syncContext.config.mlVersion
|
||||
// TODO: reset mlversion of all files when user changes image source
|
||||
) {
|
||||
return fileContext.oldMlFile;
|
||||
const oldMlFile = await this.getMLFileData(enteFile.id);
|
||||
if (oldMlFile && oldMlFile.mlVersion) {
|
||||
return oldMlFile;
|
||||
}
|
||||
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
|
||||
|
||||
if (syncContext.shouldUpdateMLVersion) {
|
||||
newMlFile.mlVersion = syncContext.config.mlVersion;
|
||||
} else if (fileContext.oldMlFile?.mlVersion) {
|
||||
newMlFile.mlVersion = fileContext.oldMlFile.mlVersion;
|
||||
}
|
||||
const newMlFile = (fileContext.newMlFile = this.newMlData(enteFile.id));
|
||||
newMlFile.mlVersion = defaultMLVersion;
|
||||
|
||||
try {
|
||||
await ReaderService.getImageBitmap(syncContext, fileContext);
|
||||
await Promise.all([
|
||||
this.syncFileAnalyzeFaces(syncContext, fileContext),
|
||||
]);
|
||||
await fetchImageBitmapForContext(fileContext);
|
||||
await syncFileAnalyzeFaces(fileContext);
|
||||
newMlFile.errorCount = 0;
|
||||
newMlFile.lastErrorMessage = undefined;
|
||||
await this.persistOnServer(newMlFile, enteFile);
|
||||
|
@ -685,44 +502,11 @@ class MachineLearningService {
|
|||
public async syncIndex(syncContext: MLSyncContext) {
|
||||
await this.getMLLibraryData(syncContext);
|
||||
|
||||
await PeopleService.syncPeopleIndex(syncContext);
|
||||
// TODO-ML(MR): Ensure this doesn't run until fixed.
|
||||
await syncPeopleIndex(syncContext);
|
||||
|
||||
await this.persistMLLibraryData(syncContext);
|
||||
}
|
||||
|
||||
private async syncFileAnalyzeFaces(
|
||||
syncContext: MLSyncContext,
|
||||
fileContext: MLSyncFileContext,
|
||||
) {
|
||||
const { newMlFile } = fileContext;
|
||||
const startTime = Date.now();
|
||||
await FaceService.syncFileFaceDetections(syncContext, fileContext);
|
||||
|
||||
if (newMlFile.faces && newMlFile.faces.length > 0) {
|
||||
await FaceService.syncFileFaceCrops(syncContext, fileContext);
|
||||
|
||||
const alignedFacesData = await FaceService.syncFileFaceAlignments(
|
||||
syncContext,
|
||||
fileContext,
|
||||
);
|
||||
|
||||
await FaceService.syncFileFaceEmbeddings(
|
||||
syncContext,
|
||||
fileContext,
|
||||
alignedFacesData,
|
||||
);
|
||||
|
||||
await FaceService.syncFileFaceMakeRelativeDetections(
|
||||
syncContext,
|
||||
fileContext,
|
||||
);
|
||||
}
|
||||
log.info(
|
||||
`face detection time taken ${fileContext.enteFile.id}`,
|
||||
Date.now() - startTime,
|
||||
"ms",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export default new MachineLearningService();
|
||||
|
|
|
@ -5,24 +5,15 @@ import { eventBus, Events } from "@ente/shared/events";
|
|||
import { getToken, getUserID } from "@ente/shared/storage/localStorage/helpers";
|
||||
import debounce from "debounce";
|
||||
import PQueue from "p-queue";
|
||||
import mlIDbStorage from "services/ml/db";
|
||||
import { createFaceComlinkWorker } from "services/ml/face";
|
||||
import type { DedicatedMLWorker } from "services/ml/face.worker";
|
||||
import { MLSyncResult } from "services/ml/types";
|
||||
import { createFaceComlinkWorker } from "services/face";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import type { DedicatedMLWorker } from "services/face/face.worker";
|
||||
import { MLSyncResult } from "services/face/types";
|
||||
import { EnteFile } from "types/file";
|
||||
import { logQueueStats } from "./machineLearningService";
|
||||
|
||||
const LIVE_SYNC_IDLE_DEBOUNCE_SEC = 30;
|
||||
const LIVE_SYNC_QUEUE_TIMEOUT_SEC = 300;
|
||||
const LOCAL_FILES_UPDATED_DEBOUNCE_SEC = 30;
|
||||
|
||||
export type JobState = "Scheduled" | "Running" | "NotScheduled";
|
||||
|
||||
export interface JobConfig {
|
||||
intervalSec: number;
|
||||
backoffMultiplier: number;
|
||||
}
|
||||
|
||||
export interface MLSyncJobResult {
|
||||
shouldBackoff: boolean;
|
||||
mlSyncResult: MLSyncResult;
|
||||
|
@ -118,18 +109,18 @@ class MLWorkManager {
|
|||
this.liveSyncQueue = new PQueue({
|
||||
concurrency: 1,
|
||||
// TODO: temp, remove
|
||||
timeout: LIVE_SYNC_QUEUE_TIMEOUT_SEC * 1000,
|
||||
timeout: 300 * 1000,
|
||||
throwOnTimeout: true,
|
||||
});
|
||||
this.mlSearchEnabled = false;
|
||||
|
||||
this.debouncedLiveSyncIdle = debounce(
|
||||
() => this.onLiveSyncIdle(),
|
||||
LIVE_SYNC_IDLE_DEBOUNCE_SEC * 1000,
|
||||
30 * 1000,
|
||||
);
|
||||
this.debouncedFilesUpdated = debounce(
|
||||
() => this.mlSearchEnabled && this.localFilesUpdatedHandler(),
|
||||
LOCAL_FILES_UPDATED_DEBOUNCE_SEC * 1000,
|
||||
30 * 1000,
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -241,19 +232,13 @@ class MLWorkManager {
|
|||
}
|
||||
|
||||
public async syncLocalFile(enteFile: EnteFile, localFile: globalThis.File) {
|
||||
const result = await this.liveSyncQueue.add(async () => {
|
||||
await this.liveSyncQueue.add(async () => {
|
||||
this.stopSyncJob();
|
||||
const token = getToken();
|
||||
const userID = getUserID();
|
||||
const mlWorker = await this.getLiveSyncWorker();
|
||||
return mlWorker.syncLocalFile(token, userID, enteFile, localFile);
|
||||
});
|
||||
|
||||
if (result instanceof Error) {
|
||||
// TODO: redirect/refresh to gallery in case of session_expired
|
||||
// may not be required as uploader should anyways take care of this
|
||||
console.error("Error while syncing local file: ", result);
|
||||
}
|
||||
}
|
||||
|
||||
// Sync Job
|
||||
|
@ -326,11 +311,11 @@ class MLWorkManager {
|
|||
}
|
||||
}
|
||||
|
||||
public stopSyncJob(terminateWorker: boolean = true) {
|
||||
public stopSyncJob() {
|
||||
try {
|
||||
log.info("MLWorkManager.stopSyncJob");
|
||||
this.mlSyncJob?.stop();
|
||||
terminateWorker && this.terminateSyncJobWorker();
|
||||
this.terminateSyncJobWorker();
|
||||
} catch (e) {
|
||||
log.error("Failed to stop MLSync Job", e);
|
||||
}
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import {
|
||||
FaceEmbedding,
|
||||
FaceEmbeddingMethod,
|
||||
FaceEmbeddingService,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
|
||||
export const mobileFaceNetFaceSize = 112;
|
||||
|
||||
class MobileFaceNetEmbeddingService implements FaceEmbeddingService {
|
||||
public method: Versioned<FaceEmbeddingMethod>;
|
||||
public faceSize: number;
|
||||
|
||||
public constructor() {
|
||||
this.method = {
|
||||
value: "MobileFaceNet",
|
||||
version: 2,
|
||||
};
|
||||
this.faceSize = mobileFaceNetFaceSize;
|
||||
}
|
||||
|
||||
public async getFaceEmbeddings(
|
||||
faceData: Float32Array,
|
||||
): Promise<Array<FaceEmbedding>> {
|
||||
const outputData = await workerBridge.faceEmbedding(faceData);
|
||||
|
||||
const embeddingSize = 192;
|
||||
const embeddings = new Array<FaceEmbedding>(
|
||||
outputData.length / embeddingSize,
|
||||
);
|
||||
for (let i = 0; i < embeddings.length; i++) {
|
||||
embeddings[i] = new Float32Array(
|
||||
outputData.slice(i * embeddingSize, (i + 1) * embeddingSize),
|
||||
);
|
||||
}
|
||||
return embeddings;
|
||||
}
|
||||
}
|
||||
|
||||
export default new MobileFaceNetEmbeddingService();
|
|
@ -1,113 +0,0 @@
|
|||
import log from "@/next/log";
|
||||
import mlIDbStorage from "services/ml/db";
|
||||
import { Face, MLSyncContext, Person } from "services/ml/types";
|
||||
import FaceService, { isDifferentOrOld } from "./faceService";
|
||||
import { fetchImageBitmap, getLocalFile } from "./readerService";
|
||||
|
||||
class PeopleService {
|
||||
async syncPeopleIndex(syncContext: MLSyncContext) {
|
||||
const filesVersion = await mlIDbStorage.getIndexVersion("files");
|
||||
if (
|
||||
filesVersion <= (await mlIDbStorage.getIndexVersion("people")) &&
|
||||
!isDifferentOrOld(
|
||||
syncContext.mlLibraryData?.faceClusteringMethod,
|
||||
syncContext.faceClusteringService.method,
|
||||
)
|
||||
) {
|
||||
log.info(
|
||||
"[MLService] Skipping people index as already synced to latest version",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: have faces addresable through fileId + faceId
|
||||
// to avoid index based addressing, which is prone to wrong results
|
||||
// one way could be to match nearest face within threshold in the file
|
||||
const allFacesMap = await FaceService.getAllSyncedFacesMap(syncContext);
|
||||
const allFaces = getAllFacesFromMap(allFacesMap);
|
||||
|
||||
await FaceService.runFaceClustering(syncContext, allFaces);
|
||||
await this.syncPeopleFromClusters(syncContext, allFacesMap, allFaces);
|
||||
|
||||
await mlIDbStorage.setIndexVersion("people", filesVersion);
|
||||
}
|
||||
|
||||
private async syncPeopleFromClusters(
|
||||
syncContext: MLSyncContext,
|
||||
allFacesMap: Map<number, Array<Face>>,
|
||||
allFaces: Array<Face>,
|
||||
) {
|
||||
const clusters =
|
||||
syncContext.mlLibraryData.faceClusteringResults?.clusters;
|
||||
if (!clusters || clusters.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const face of allFaces) {
|
||||
face.personId = undefined;
|
||||
}
|
||||
await mlIDbStorage.clearAllPeople();
|
||||
for (const [index, cluster] of clusters.entries()) {
|
||||
const faces = cluster.map((f) => allFaces[f]).filter((f) => f);
|
||||
|
||||
// TODO: take default display face from last leaves of hdbscan clusters
|
||||
const personFace = findFirstIfSorted(
|
||||
faces,
|
||||
(a, b) => b.detection.probability - a.detection.probability,
|
||||
);
|
||||
|
||||
if (personFace && !personFace.crop?.cacheKey) {
|
||||
const file = await getLocalFile(personFace.fileId);
|
||||
const imageBitmap = await fetchImageBitmap(file);
|
||||
await FaceService.saveFaceCrop(
|
||||
imageBitmap,
|
||||
personFace,
|
||||
syncContext,
|
||||
);
|
||||
}
|
||||
|
||||
const person: Person = {
|
||||
id: index,
|
||||
files: faces.map((f) => f.fileId),
|
||||
displayFaceId: personFace?.id,
|
||||
faceCropCacheKey: personFace?.crop?.cacheKey,
|
||||
};
|
||||
|
||||
await mlIDbStorage.putPerson(person);
|
||||
|
||||
faces.forEach((face) => {
|
||||
face.personId = person.id;
|
||||
});
|
||||
// log.info("Creating person: ", person, faces);
|
||||
}
|
||||
|
||||
await mlIDbStorage.updateFaces(allFacesMap);
|
||||
}
|
||||
}
|
||||
|
||||
export default new PeopleService();
|
||||
|
||||
function findFirstIfSorted<T>(
|
||||
elements: Array<T>,
|
||||
comparator: (a: T, b: T) => number,
|
||||
) {
|
||||
if (!elements || elements.length < 1) {
|
||||
return;
|
||||
}
|
||||
let first = elements[0];
|
||||
|
||||
for (let i = 1; i < elements.length; i++) {
|
||||
const comp = comparator(elements[i], first);
|
||||
if (comp < 0) {
|
||||
first = elements[i];
|
||||
}
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
function getAllFacesFromMap(allFacesMap: Map<number, Array<Face>>) {
|
||||
const allFaces = [...allFacesMap.values()].flat();
|
||||
|
||||
return allFaces;
|
||||
}
|
|
@ -1,332 +0,0 @@
|
|||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { euclidean } from "hdbscan";
|
||||
import {
|
||||
Box,
|
||||
Dimensions,
|
||||
Point,
|
||||
boxFromBoundingBox,
|
||||
newBox,
|
||||
} from "services/ml/geom";
|
||||
import {
|
||||
FaceDetection,
|
||||
FaceDetectionMethod,
|
||||
FaceDetectionService,
|
||||
Versioned,
|
||||
} from "services/ml/types";
|
||||
import {
|
||||
Matrix,
|
||||
applyToPoint,
|
||||
compose,
|
||||
scale,
|
||||
translate,
|
||||
} from "transformation-matrix";
|
||||
import {
|
||||
clamp,
|
||||
getPixelBilinear,
|
||||
normalizePixelBetween0And1,
|
||||
} from "utils/image";
|
||||
|
||||
class YoloFaceDetectionService implements FaceDetectionService {
|
||||
public method: Versioned<FaceDetectionMethod>;
|
||||
|
||||
public constructor() {
|
||||
this.method = {
|
||||
value: "YoloFace",
|
||||
version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
public async detectFaces(
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> {
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
const preprocessResult =
|
||||
this.preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const outputData = await workerBridge.detectFaces(data);
|
||||
const faces = this.getFacesFromYoloOutput(
|
||||
outputData as Float32Array,
|
||||
0.7,
|
||||
);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const transform = computeTransformToBox(inBox, toBox);
|
||||
const faceDetections: Array<FaceDetection> = faces?.map((f) => {
|
||||
const box = transformBox(f.box, transform);
|
||||
const normLandmarks = f.landmarks;
|
||||
const landmarks = transformPoints(normLandmarks, transform);
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: f.probability as number,
|
||||
} as FaceDetection;
|
||||
});
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
}
|
||||
|
||||
private preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap: ImageBitmap,
|
||||
requiredWidth: number,
|
||||
requiredHeight: number,
|
||||
maintainAspectRatio: boolean = true,
|
||||
normFunction: (
|
||||
pixelValue: number,
|
||||
) => number = normalizePixelBetween0And1,
|
||||
) {
|
||||
// Create an OffscreenCanvas and set its size
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
let scaleW = requiredWidth / imageBitmap.width;
|
||||
let scaleH = requiredHeight / imageBitmap.height;
|
||||
if (maintainAspectRatio) {
|
||||
const scale = Math.min(
|
||||
requiredWidth / imageBitmap.width,
|
||||
requiredHeight / imageBitmap.height,
|
||||
);
|
||||
scaleW = scale;
|
||||
scaleH = scale;
|
||||
}
|
||||
const scaledWidth = clamp(
|
||||
Math.round(imageBitmap.width * scaleW),
|
||||
0,
|
||||
requiredWidth,
|
||||
);
|
||||
const scaledHeight = clamp(
|
||||
Math.round(imageBitmap.height * scaleH),
|
||||
0,
|
||||
requiredHeight,
|
||||
);
|
||||
|
||||
const processedImage = new Float32Array(
|
||||
1 * 3 * requiredWidth * requiredHeight,
|
||||
);
|
||||
|
||||
// Populate the Float32Array with normalized pixel values
|
||||
let pixelIndex = 0;
|
||||
const channelOffsetGreen = requiredHeight * requiredWidth;
|
||||
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
|
||||
for (let h = 0; h < requiredHeight; h++) {
|
||||
for (let w = 0; w < requiredWidth; w++) {
|
||||
let pixel: {
|
||||
r: number;
|
||||
g: number;
|
||||
b: number;
|
||||
};
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = { r: 114, g: 114, b: 114 };
|
||||
} else {
|
||||
pixel = getPixelBilinear(
|
||||
w / scaleW,
|
||||
h / scaleH,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
}
|
||||
processedImage[pixelIndex] = normFunction(pixel.r);
|
||||
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
|
||||
pixel.g,
|
||||
);
|
||||
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
|
||||
pixel.b,
|
||||
);
|
||||
pixelIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
data: processedImage,
|
||||
originalSize: {
|
||||
width: imageBitmap.width,
|
||||
height: imageBitmap.height,
|
||||
},
|
||||
newSize: { width: scaledWidth, height: scaledHeight },
|
||||
};
|
||||
}
|
||||
|
||||
// The rowOutput is a Float32Array of shape [25200, 16], where each row represents a bounding box.
|
||||
private getFacesFromYoloOutput(
|
||||
rowOutput: Float32Array,
|
||||
minScore: number,
|
||||
): Array<FaceDetection> {
|
||||
const faces: Array<FaceDetection> = [];
|
||||
// iterate over each row
|
||||
for (let i = 0; i < rowOutput.length; i += 16) {
|
||||
const score = rowOutput[i + 4];
|
||||
if (score < minScore) {
|
||||
continue;
|
||||
}
|
||||
// The first 4 values represent the bounding box's coordinates (x1, y1, x2, y2)
|
||||
const xCenter = rowOutput[i];
|
||||
const yCenter = rowOutput[i + 1];
|
||||
const width = rowOutput[i + 2];
|
||||
const height = rowOutput[i + 3];
|
||||
const xMin = xCenter - width / 2.0; // topLeft
|
||||
const yMin = yCenter - height / 2.0; // topLeft
|
||||
|
||||
const leftEyeX = rowOutput[i + 5];
|
||||
const leftEyeY = rowOutput[i + 6];
|
||||
const rightEyeX = rowOutput[i + 7];
|
||||
const rightEyeY = rowOutput[i + 8];
|
||||
const noseX = rowOutput[i + 9];
|
||||
const noseY = rowOutput[i + 10];
|
||||
const leftMouthX = rowOutput[i + 11];
|
||||
const leftMouthY = rowOutput[i + 12];
|
||||
const rightMouthX = rowOutput[i + 13];
|
||||
const rightMouthY = rowOutput[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
x: xMin,
|
||||
y: yMin,
|
||||
width: width,
|
||||
height: height,
|
||||
});
|
||||
const probability = score as number;
|
||||
const landmarks = [
|
||||
new Point(leftEyeX, leftEyeY),
|
||||
new Point(rightEyeX, rightEyeY),
|
||||
new Point(noseX, noseY),
|
||||
new Point(leftMouthX, leftMouthY),
|
||||
new Point(rightMouthX, rightMouthY),
|
||||
];
|
||||
const face: FaceDetection = {
|
||||
box,
|
||||
landmarks,
|
||||
probability,
|
||||
// detectionMethod: this.method,
|
||||
};
|
||||
faces.push(face);
|
||||
}
|
||||
return faces;
|
||||
}
|
||||
|
||||
public getRelativeDetection(
|
||||
faceDetection: FaceDetection,
|
||||
dimensions: Dimensions,
|
||||
): FaceDetection {
|
||||
const oldBox: Box = faceDetection.box;
|
||||
const box = new Box({
|
||||
x: oldBox.x / dimensions.width,
|
||||
y: oldBox.y / dimensions.height,
|
||||
width: oldBox.width / dimensions.width,
|
||||
height: oldBox.height / dimensions.height,
|
||||
});
|
||||
const oldLandmarks: Point[] = faceDetection.landmarks;
|
||||
const landmarks = oldLandmarks.map((l) => {
|
||||
return new Point(l.x / dimensions.width, l.y / dimensions.height);
|
||||
});
|
||||
return {
|
||||
box,
|
||||
landmarks,
|
||||
probability: faceDetection.probability,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default new YoloFaceDetectionService();
|
||||
|
||||
/**
|
||||
* Removes duplicate face detections from an array of detections.
|
||||
*
|
||||
* This function sorts the detections by their probability in descending order, then iterates over them.
|
||||
* For each detection, it calculates the Euclidean distance to all other detections.
|
||||
* If the distance is less than or equal to the specified threshold (`withinDistance`), the other detection is considered a duplicate and is removed.
|
||||
*
|
||||
* @param detections - An array of face detections to remove duplicates from.
|
||||
* @param withinDistance - The maximum Euclidean distance between two detections for them to be considered duplicates.
|
||||
*
|
||||
* @returns An array of face detections with duplicates removed.
|
||||
*/
|
||||
function removeDuplicateDetections(
|
||||
detections: Array<FaceDetection>,
|
||||
withinDistance: number,
|
||||
) {
|
||||
// console.time('removeDuplicates');
|
||||
detections.sort((a, b) => b.probability - a.probability);
|
||||
const isSelected = new Map<number, boolean>();
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
if (isSelected.get(i) === false) {
|
||||
continue;
|
||||
}
|
||||
isSelected.set(i, true);
|
||||
for (let j = i + 1; j < detections.length; j++) {
|
||||
if (isSelected.get(j) === false) {
|
||||
continue;
|
||||
}
|
||||
const centeri = getDetectionCenter(detections[i]);
|
||||
const centerj = getDetectionCenter(detections[j]);
|
||||
const dist = euclidean(
|
||||
[centeri.x, centeri.y],
|
||||
[centerj.x, centerj.y],
|
||||
);
|
||||
if (dist <= withinDistance) {
|
||||
isSelected.set(j, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uniques: Array<FaceDetection> = [];
|
||||
for (let i = 0; i < detections.length; i++) {
|
||||
isSelected.get(i) && uniques.push(detections[i]);
|
||||
}
|
||||
// console.timeEnd('removeDuplicates');
|
||||
return uniques;
|
||||
}
|
||||
|
||||
function getDetectionCenter(detection: FaceDetection) {
|
||||
const center = new Point(0, 0);
|
||||
// TODO: first 4 landmarks is applicable to blazeface only
|
||||
// this needs to consider eyes, nose and mouth landmarks to take center
|
||||
detection.landmarks?.slice(0, 4).forEach((p) => {
|
||||
center.x += p.x;
|
||||
center.y += p.y;
|
||||
});
|
||||
|
||||
return new Point(center.x / 4, center.y / 4);
|
||||
}
|
||||
|
||||
function computeTransformToBox(inBox: Box, toBox: Box): Matrix {
|
||||
return compose(
|
||||
translate(toBox.x, toBox.y),
|
||||
scale(toBox.width / inBox.width, toBox.height / inBox.height),
|
||||
);
|
||||
}
|
||||
|
||||
function transformPoint(point: Point, transform: Matrix) {
|
||||
const txdPoint = applyToPoint(transform, point);
|
||||
return new Point(txdPoint.x, txdPoint.y);
|
||||
}
|
||||
|
||||
function transformPoints(points: Point[], transform: Matrix) {
|
||||
return points?.map((p) => transformPoint(p, transform));
|
||||
}
|
||||
|
||||
function transformBox(box: Box, transform: Matrix) {
|
||||
const topLeft = transformPoint(box.topLeft, transform);
|
||||
const bottomRight = transformPoint(box.bottomRight, transform);
|
||||
|
||||
return boxFromBoundingBox({
|
||||
left: topLeft.x,
|
||||
top: topLeft.y,
|
||||
right: bottomRight.x,
|
||||
bottom: bottomRight.y,
|
||||
});
|
||||
}
|
|
@ -1,331 +0,0 @@
|
|||
import { DebugInfo } from "hdbscan";
|
||||
import PQueue from "p-queue";
|
||||
import { Dimensions } from "services/ml/geom";
|
||||
import { EnteFile } from "types/file";
|
||||
import { Box, Point } from "./geom";
|
||||
|
||||
export interface MLSyncResult {
|
||||
nOutOfSyncFiles: number;
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
nFaceClusters: number;
|
||||
nFaceNoise: number;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export declare type FaceDescriptor = Float32Array;
|
||||
|
||||
export declare type Cluster = Array<number>;
|
||||
|
||||
export interface ClusteringResults {
|
||||
clusters: Array<Cluster>;
|
||||
noise: Cluster;
|
||||
}
|
||||
|
||||
export interface HdbscanResults extends ClusteringResults {
|
||||
debugInfo?: DebugInfo;
|
||||
}
|
||||
|
||||
export interface FacesCluster {
|
||||
faces: Cluster;
|
||||
summary?: FaceDescriptor;
|
||||
}
|
||||
|
||||
export interface FacesClustersWithNoise {
|
||||
clusters: Array<FacesCluster>;
|
||||
noise: Cluster;
|
||||
}
|
||||
|
||||
export interface NearestCluster {
|
||||
cluster: FacesCluster;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
export declare type Landmark = Point;
|
||||
|
||||
export declare type ImageType = "Original" | "Preview";
|
||||
|
||||
export declare type FaceDetectionMethod = "YoloFace";
|
||||
|
||||
export declare type FaceCropMethod = "ArcFace";
|
||||
|
||||
export declare type FaceAlignmentMethod = "ArcFace";
|
||||
|
||||
export declare type FaceEmbeddingMethod = "MobileFaceNet";
|
||||
|
||||
export declare type BlurDetectionMethod = "Laplacian";
|
||||
|
||||
export declare type ClusteringMethod = "Hdbscan" | "Dbscan";
|
||||
|
||||
export class AlignedBox {
|
||||
box: Box;
|
||||
rotation: number;
|
||||
}
|
||||
|
||||
export interface Versioned<T> {
|
||||
value: T;
|
||||
version: number;
|
||||
}
|
||||
|
||||
export interface FaceDetection {
|
||||
// box and landmarks is relative to image dimentions stored at mlFileData
|
||||
box: Box;
|
||||
landmarks?: Array<Landmark>;
|
||||
probability?: number;
|
||||
}
|
||||
|
||||
export interface DetectedFace {
|
||||
fileId: number;
|
||||
detection: FaceDetection;
|
||||
}
|
||||
|
||||
export interface DetectedFaceWithId extends DetectedFace {
|
||||
id: string;
|
||||
}
|
||||
|
||||
export interface FaceCrop {
|
||||
image: ImageBitmap;
|
||||
// imageBox is relative to image dimentions stored at mlFileData
|
||||
imageBox: Box;
|
||||
}
|
||||
|
||||
export interface StoredFaceCrop {
|
||||
cacheKey: string;
|
||||
imageBox: Box;
|
||||
}
|
||||
|
||||
export interface CroppedFace extends DetectedFaceWithId {
|
||||
crop?: StoredFaceCrop;
|
||||
}
|
||||
|
||||
export interface FaceAlignment {
|
||||
// TODO: remove affine matrix as rotation, size and center
|
||||
// are simple to store and use, affine matrix adds complexity while getting crop
|
||||
affineMatrix: Array<Array<number>>;
|
||||
rotation: number;
|
||||
// size and center is relative to image dimentions stored at mlFileData
|
||||
size: number;
|
||||
center: Point;
|
||||
}
|
||||
|
||||
export interface AlignedFace extends CroppedFace {
|
||||
alignment?: FaceAlignment;
|
||||
blurValue?: number;
|
||||
}
|
||||
|
||||
export declare type FaceEmbedding = Float32Array;
|
||||
|
||||
export interface FaceWithEmbedding extends AlignedFace {
|
||||
embedding?: FaceEmbedding;
|
||||
}
|
||||
|
||||
export interface Face extends FaceWithEmbedding {
|
||||
personId?: number;
|
||||
}
|
||||
|
||||
export interface Person {
|
||||
id: number;
|
||||
name?: string;
|
||||
files: Array<number>;
|
||||
displayFaceId?: string;
|
||||
faceCropCacheKey?: string;
|
||||
}
|
||||
|
||||
export interface MlFileData {
|
||||
fileId: number;
|
||||
faces?: Face[];
|
||||
imageSource?: ImageType;
|
||||
imageDimensions?: Dimensions;
|
||||
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
|
||||
faceCropMethod?: Versioned<FaceCropMethod>;
|
||||
faceAlignmentMethod?: Versioned<FaceAlignmentMethod>;
|
||||
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
|
||||
mlVersion: number;
|
||||
errorCount: number;
|
||||
lastErrorMessage?: string;
|
||||
}
|
||||
|
||||
export interface FaceDetectionConfig {
|
||||
method: FaceDetectionMethod;
|
||||
}
|
||||
|
||||
export interface FaceCropConfig {
|
||||
enabled: boolean;
|
||||
method: FaceCropMethod;
|
||||
padding: number;
|
||||
maxSize: number;
|
||||
blobOptions: {
|
||||
type: string;
|
||||
quality: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface FaceAlignmentConfig {
|
||||
method: FaceAlignmentMethod;
|
||||
}
|
||||
|
||||
export interface BlurDetectionConfig {
|
||||
method: BlurDetectionMethod;
|
||||
threshold: number;
|
||||
}
|
||||
|
||||
export interface FaceEmbeddingConfig {
|
||||
method: FaceEmbeddingMethod;
|
||||
faceSize: number;
|
||||
generateTsne?: boolean;
|
||||
}
|
||||
|
||||
export interface FaceClusteringConfig extends ClusteringConfig {}
|
||||
|
||||
export declare type TSNEMetric = "euclidean" | "manhattan";
|
||||
|
||||
export interface TSNEConfig {
|
||||
samples: number;
|
||||
dim: number;
|
||||
perplexity?: number;
|
||||
earlyExaggeration?: number;
|
||||
learningRate?: number;
|
||||
nIter?: number;
|
||||
metric?: TSNEMetric;
|
||||
}
|
||||
|
||||
export interface MLSyncConfig {
|
||||
batchSize: number;
|
||||
imageSource: ImageType;
|
||||
faceDetection: FaceDetectionConfig;
|
||||
faceCrop: FaceCropConfig;
|
||||
faceAlignment: FaceAlignmentConfig;
|
||||
blurDetection: BlurDetectionConfig;
|
||||
faceEmbedding: FaceEmbeddingConfig;
|
||||
faceClustering: FaceClusteringConfig;
|
||||
mlVersion: number;
|
||||
}
|
||||
|
||||
export interface MLSearchConfig {
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface MLSyncContext {
|
||||
token: string;
|
||||
userID: number;
|
||||
config: MLSyncConfig;
|
||||
shouldUpdateMLVersion: boolean;
|
||||
|
||||
faceDetectionService: FaceDetectionService;
|
||||
faceCropService: FaceCropService;
|
||||
faceAlignmentService: FaceAlignmentService;
|
||||
faceEmbeddingService: FaceEmbeddingService;
|
||||
blurDetectionService: BlurDetectionService;
|
||||
faceClusteringService: ClusteringService;
|
||||
|
||||
localFilesMap: Map<number, EnteFile>;
|
||||
outOfSyncFiles: EnteFile[];
|
||||
nSyncedFiles: number;
|
||||
nSyncedFaces: number;
|
||||
allSyncedFacesMap?: Map<number, Array<Face>>;
|
||||
|
||||
error?: Error;
|
||||
|
||||
// oldMLLibraryData: MLLibraryData;
|
||||
mlLibraryData: MLLibraryData;
|
||||
|
||||
syncQueue: PQueue;
|
||||
|
||||
getEnteWorker(id: number): Promise<any>;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface MLSyncFileContext {
|
||||
enteFile: EnteFile;
|
||||
localFile?: globalThis.File;
|
||||
|
||||
oldMlFile?: MlFileData;
|
||||
newMlFile?: MlFileData;
|
||||
|
||||
imageBitmap?: ImageBitmap;
|
||||
|
||||
newDetection?: boolean;
|
||||
newAlignment?: boolean;
|
||||
}
|
||||
|
||||
export interface MLLibraryData {
|
||||
faceClusteringMethod?: Versioned<ClusteringMethod>;
|
||||
faceClusteringResults?: ClusteringResults;
|
||||
faceClustersWithNoise?: FacesClustersWithNoise;
|
||||
}
|
||||
|
||||
export declare type MLIndex = "files" | "people";
|
||||
|
||||
export interface FaceDetectionService {
|
||||
method: Versioned<FaceDetectionMethod>;
|
||||
|
||||
detectFaces(image: ImageBitmap): Promise<Array<FaceDetection>>;
|
||||
getRelativeDetection(
|
||||
faceDetection: FaceDetection,
|
||||
imageDimensions: Dimensions,
|
||||
): FaceDetection;
|
||||
}
|
||||
|
||||
export interface FaceCropService {
|
||||
method: Versioned<FaceCropMethod>;
|
||||
|
||||
getFaceCrop(
|
||||
imageBitmap: ImageBitmap,
|
||||
face: FaceDetection,
|
||||
config: FaceCropConfig,
|
||||
): Promise<FaceCrop>;
|
||||
}
|
||||
|
||||
export interface FaceAlignmentService {
|
||||
method: Versioned<FaceAlignmentMethod>;
|
||||
getFaceAlignment(faceDetection: FaceDetection): FaceAlignment;
|
||||
}
|
||||
|
||||
export interface FaceEmbeddingService {
|
||||
method: Versioned<FaceEmbeddingMethod>;
|
||||
faceSize: number;
|
||||
|
||||
getFaceEmbeddings(faceImages: Float32Array): Promise<Array<FaceEmbedding>>;
|
||||
}
|
||||
|
||||
export interface BlurDetectionService {
|
||||
method: Versioned<BlurDetectionMethod>;
|
||||
detectBlur(alignedFaces: Float32Array, faces: Face[]): number[];
|
||||
}
|
||||
|
||||
export interface ClusteringService {
|
||||
method: Versioned<ClusteringMethod>;
|
||||
|
||||
cluster(
|
||||
input: ClusteringInput,
|
||||
config: ClusteringConfig,
|
||||
): Promise<ClusteringResults>;
|
||||
}
|
||||
|
||||
export interface ClusteringConfig {
|
||||
method: ClusteringMethod;
|
||||
minClusterSize: number;
|
||||
minSamples?: number;
|
||||
clusterSelectionEpsilon?: number;
|
||||
clusterSelectionMethod?: "eom" | "leaf";
|
||||
maxDistanceInsideCluster?: number;
|
||||
minInputSize?: number;
|
||||
generateDebugInfo?: boolean;
|
||||
}
|
||||
|
||||
export declare type ClusteringInput = Array<Array<number>>;
|
||||
|
||||
export interface MachineLearningWorker {
|
||||
closeLocalSyncContext(): Promise<void>;
|
||||
|
||||
syncLocalFile(
|
||||
token: string,
|
||||
userID: number,
|
||||
enteFile: EnteFile,
|
||||
localFile: globalThis.File,
|
||||
): Promise<MlFileData | Error>;
|
||||
|
||||
sync(token: string, userID: number): Promise<MLSyncResult>;
|
||||
|
||||
close(): void;
|
||||
}
|
|
@ -2,9 +2,9 @@ import { FILE_TYPE } from "@/media/file-type";
|
|||
import log from "@/next/log";
|
||||
import * as chrono from "chrono-node";
|
||||
import { t } from "i18next";
|
||||
import { getMLSyncConfig } from "services/machineLearning/machineLearningService";
|
||||
import mlIDbStorage from "services/ml/db";
|
||||
import { Person } from "services/ml/types";
|
||||
import mlIDbStorage from "services/face/db";
|
||||
import { Person } from "services/face/types";
|
||||
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
|
||||
import { Collection } from "types/collection";
|
||||
import { EntityType, LocationTag, LocationTagData } from "types/entity";
|
||||
import { EnteFile } from "types/file";
|
||||
|
@ -175,8 +175,7 @@ export async function getAllPeopleSuggestion(): Promise<Array<Suggestion>> {
|
|||
|
||||
export async function getIndexStatusSuggestion(): Promise<Suggestion> {
|
||||
try {
|
||||
const config = await getMLSyncConfig();
|
||||
const indexStatus = await mlIDbStorage.getIndexStatus(config.mlVersion);
|
||||
const indexStatus = await mlIDbStorage.getIndexStatus(defaultMLVersion);
|
||||
|
||||
let label;
|
||||
if (!indexStatus.localFilesSynced) {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import { FILE_TYPE } from "@/media/file-type";
|
||||
import { IndexStatus } from "services/face/db";
|
||||
import { Person } from "services/face/types";
|
||||
import { City } from "services/locationSearchService";
|
||||
import { IndexStatus } from "services/ml/db";
|
||||
import { Person } from "services/ml/types";
|
||||
import { LocationTagData } from "types/entity";
|
||||
import { EnteFile } from "types/file";
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
// these utils only work in env where OffscreenCanvas is available
|
||||
|
||||
import { Matrix, inverse } from "ml-matrix";
|
||||
import { Box, Dimensions, enlargeBox } from "services/ml/geom";
|
||||
import { FaceAlignment } from "services/ml/types";
|
||||
import { Box, Dimensions, enlargeBox } from "services/face/geom";
|
||||
import { FaceAlignment } from "services/face/types";
|
||||
|
||||
export function normalizePixelBetween0And1(pixelValue: number) {
|
||||
return pixelValue / 255.0;
|
||||
|
@ -450,17 +450,17 @@ export interface BlobOptions {
|
|||
quality?: number;
|
||||
}
|
||||
|
||||
export async function imageBitmapToBlob(
|
||||
imageBitmap: ImageBitmap,
|
||||
options?: BlobOptions,
|
||||
) {
|
||||
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
|
||||
const offscreen = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
|
||||
return offscreen.convertToBlob(options);
|
||||
return offscreen.convertToBlob({
|
||||
type: "image/jpeg",
|
||||
quality: 0.8,
|
||||
});
|
||||
}
|
||||
|
||||
export async function imageBitmapFromBlob(blob: Blob) {
|
||||
|
|
|
@ -174,3 +174,15 @@ some cases.
|
|||
- [sanitize-filename](https://github.com/parshap/node-sanitize-filename) is
|
||||
for converting arbitrary strings into strings that are suitable for being
|
||||
used as filenames.
|
||||
|
||||
## Face search
|
||||
|
||||
- [matrix](https://github.com/mljs/matrix) and
|
||||
[similarity-transformation](https://github.com/shaileshpandit/similarity-transformation-js)
|
||||
are used during face alignment.
|
||||
|
||||
- [transformation-matrix](https://github.com/chrvadala/transformation-matrix)
|
||||
is used during face detection.
|
||||
|
||||
- [hdbscan](https://github.com/shaileshpandit/hdbscan-js) is used for face
|
||||
clustering.
|
||||
|
|
|
@ -332,12 +332,12 @@ export interface Electron {
|
|||
detectFaces: (input: Float32Array) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Return a MobileFaceNet embedding for the given face data.
|
||||
* Return a MobileFaceNet embeddings for the given faces.
|
||||
*
|
||||
* Both the input and output are opaque binary data whose internal structure
|
||||
* is specific to our implementation and the model (MobileFaceNet) we use.
|
||||
*/
|
||||
faceEmbedding: (input: Float32Array) => Promise<Float32Array>;
|
||||
faceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
|
||||
|
||||
/**
|
||||
* Return a face crop stored by a previous version of ML.
|
||||
|
|
|
@ -47,8 +47,8 @@ const workerBridge = {
|
|||
convertToJPEG: (imageData: Uint8Array) =>
|
||||
ensureElectron().convertToJPEG(imageData),
|
||||
detectFaces: (input: Float32Array) => ensureElectron().detectFaces(input),
|
||||
faceEmbedding: (input: Float32Array) =>
|
||||
ensureElectron().faceEmbedding(input),
|
||||
faceEmbeddings: (input: Float32Array) =>
|
||||
ensureElectron().faceEmbeddings(input),
|
||||
};
|
||||
|
||||
export type WorkerBridge = typeof workerBridge;
|
||||
|
|
|
@ -1896,11 +1896,6 @@ delayed-stream@~1.0.0:
|
|||
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
|
||||
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
|
||||
|
||||
density-clustering@^1.3.0:
|
||||
version "1.3.0"
|
||||
resolved "https://registry.yarnpkg.com/density-clustering/-/density-clustering-1.3.0.tgz#dc9f59c8f0ab97e1624ac64930fd3194817dcac5"
|
||||
integrity sha512-icpmBubVTwLnsaor9qH/4tG5+7+f61VcqMN3V3pm9sxxSCt2Jcs0zWOgwZW9ARJYaKD3FumIgHiMOcIMRRAzFQ==
|
||||
|
||||
dequal@^2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/dequal/-/dequal-2.0.3.tgz#2644214f1997d39ed0ee0ece72335490a7ac67be"
|
||||
|
|
Loading…
Reference in a new issue