Convert point and box to regular objects
This commit is contained in:
parent
4dbc8ab31e
commit
2d5894c5d6
5 changed files with 146 additions and 167 deletions
94
web/apps/photos/src/services/face/crop.ts
Normal file
94
web/apps/photos/src/services/face/crop.ts
Normal file
|
@ -0,0 +1,94 @@
|
|||
import { blobCache } from "@/next/blob-cache";
|
||||
import type { Box, Face, FaceAlignment } from "./types";
|
||||
|
||||
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
|
||||
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
|
||||
const blob = await imageBitmapToBlob(faceCrop);
|
||||
faceCrop.close();
|
||||
|
||||
const cache = await blobCache("face-crops");
|
||||
await cache.put(face.id, blob);
|
||||
|
||||
return blob;
|
||||
};
|
||||
|
||||
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
|
||||
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
|
||||
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
|
||||
};
|
||||
|
||||
const extractFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
alignment: FaceAlignment,
|
||||
): ImageBitmap => {
|
||||
// TODO-ML: This algorithm is different from what is used by the mobile app.
|
||||
// Also, it needs to be something that can work fully using the embedding we
|
||||
// receive from remote - the `alignment.boundingBox` will not be available
|
||||
// to us in such cases.
|
||||
const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
|
||||
const outputSize = { width: paddedBox.width, height: paddedBox.height };
|
||||
|
||||
const maxDimension = 256;
|
||||
const scale = Math.min(
|
||||
maxDimension / paddedBox.width,
|
||||
maxDimension / paddedBox.height,
|
||||
);
|
||||
|
||||
if (scale < 1) {
|
||||
outputSize.width = Math.round(scale * paddedBox.width);
|
||||
outputSize.height = Math.round(scale * paddedBox.height);
|
||||
}
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
|
||||
const outputBox = {
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
};
|
||||
|
||||
const enlargedBox = enlargeBox(paddedBox, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
};
|
||||
|
||||
/** Round all the components of the box. */
|
||||
const roundBox = (box: Box): Box => {
|
||||
const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
|
||||
(val) => Math.round(val),
|
||||
);
|
||||
return { x, y, width, height };
|
||||
};
|
||||
|
||||
/** Increase the size of the given {@link box} by {@link factor}. */
|
||||
const enlargeBox = (box: Box, factor: number): Box => {
|
||||
const center = { x: box.x + box.width / 2, y: box.y + box.height / 2 };
|
||||
const newWidth = factor * box.width;
|
||||
const newHeight = factor * box.height;
|
||||
|
||||
return {
|
||||
x: center.x - newWidth / 2,
|
||||
y: center.y - newHeight / 2,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
};
|
||||
};
|
|
@ -1,16 +1,10 @@
|
|||
import { FILE_TYPE } from "@/media/file-type";
|
||||
import { blobCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
import { Matrix } from "ml-matrix";
|
||||
import {
|
||||
import type {
|
||||
Box,
|
||||
Dimensions,
|
||||
Point,
|
||||
enlargeBox,
|
||||
roundBox,
|
||||
} from "services/face/geom";
|
||||
import type {
|
||||
Face,
|
||||
FaceAlignment,
|
||||
FaceDetection,
|
||||
|
@ -26,6 +20,7 @@ import {
|
|||
translate,
|
||||
} from "transformation-matrix";
|
||||
import type { EnteFile } from "types/file";
|
||||
import { saveFaceCrop } from "./crop";
|
||||
import { fetchImageBitmap, getLocalFileImageBitmap } from "./file";
|
||||
import {
|
||||
clamp,
|
||||
|
@ -148,8 +143,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => {
|
|||
const detectFaces = async (
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<FaceDetection[]> => {
|
||||
const rect = ({ width, height }: Dimensions) =>
|
||||
new Box({ x: 0, y: 0, width, height });
|
||||
const rect = ({ width, height }) => ({ x: 0, y: 0, width, height });
|
||||
|
||||
const { yoloInput, yoloSize } =
|
||||
convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
|
||||
|
@ -259,19 +253,19 @@ const filterExtractDetectionsFromYOLOOutput = (
|
|||
const rightMouthX = rows[i + 13];
|
||||
const rightMouthY = rows[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
const box = {
|
||||
x: xMin,
|
||||
y: yMin,
|
||||
width: width,
|
||||
height: height,
|
||||
});
|
||||
};
|
||||
const probability = score as number;
|
||||
const landmarks = [
|
||||
new Point(leftEyeX, leftEyeY),
|
||||
new Point(rightEyeX, rightEyeY),
|
||||
new Point(noseX, noseY),
|
||||
new Point(leftMouthX, leftMouthY),
|
||||
new Point(rightMouthX, rightMouthY),
|
||||
{ x: leftEyeX, y: leftEyeY },
|
||||
{ x: rightEyeX, y: rightEyeY },
|
||||
{ x: noseX, y: noseY },
|
||||
{ x: leftMouthX, y: leftMouthY },
|
||||
{ x: rightMouthX, y: rightMouthY },
|
||||
];
|
||||
faces.push({ box, landmarks, probability });
|
||||
}
|
||||
|
@ -291,7 +285,7 @@ const transformFaceDetections = (
|
|||
const transform = boxTransformationMatrix(inBox, toBox);
|
||||
return faceDetections.map((f) => ({
|
||||
box: transformBox(f.box, transform),
|
||||
landmarks: f.landmarks.map((p) => transformPoint(p, transform)),
|
||||
landmarks: f.landmarks.map((p) => applyToPoint(transform, p)),
|
||||
probability: f.probability,
|
||||
}));
|
||||
};
|
||||
|
@ -305,24 +299,19 @@ const boxTransformationMatrix = (
|
|||
scale(toBox.width / inBox.width, toBox.height / inBox.height),
|
||||
);
|
||||
|
||||
const transformPoint = (point: Point, transform: TransformationMatrix) => {
|
||||
const txdPoint = applyToPoint(transform, point);
|
||||
return new Point(txdPoint.x, txdPoint.y);
|
||||
};
|
||||
const transformBox = (box: Box, transform: TransformationMatrix): Box => {
|
||||
const topLeft = applyToPoint(transform, { x: box.x, y: box.y });
|
||||
const bottomRight = applyToPoint(transform, {
|
||||
x: box.x + box.width,
|
||||
y: box.y + box.height,
|
||||
});
|
||||
|
||||
const transformBox = (box: Box, transform: TransformationMatrix) => {
|
||||
const topLeft = transformPoint(new Point(box.x, box.y), transform);
|
||||
const bottomRight = transformPoint(
|
||||
new Point(box.x + box.width, box.y + box.height),
|
||||
transform,
|
||||
);
|
||||
|
||||
return new Box({
|
||||
return {
|
||||
x: topLeft.x,
|
||||
y: topLeft.y,
|
||||
width: bottomRight.x - topLeft.x,
|
||||
height: bottomRight.y - topLeft.y,
|
||||
});
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -470,14 +459,14 @@ const faceAlignmentUsingSimilarityTransform = (
|
|||
const size = 1 / simTransform.scale;
|
||||
const meanTranslation = simTransform.toMean.sub(0.5).mul(size);
|
||||
const centerMat = simTransform.fromMean.sub(meanTranslation);
|
||||
const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0));
|
||||
const center = { x: centerMat.get(0, 0), y: centerMat.get(1, 0) };
|
||||
|
||||
const boundingBox = new Box({
|
||||
const boundingBox = {
|
||||
x: center.x - size / 2,
|
||||
y: center.y - size / 2,
|
||||
width: size,
|
||||
height: size,
|
||||
});
|
||||
};
|
||||
|
||||
return { affineMatrix, boundingBox };
|
||||
};
|
||||
|
@ -720,85 +709,16 @@ const relativeDetection = (
|
|||
{ width, height }: Dimensions,
|
||||
): FaceDetection => {
|
||||
const oldBox: Box = faceDetection.box;
|
||||
const box = new Box({
|
||||
const box = {
|
||||
x: oldBox.x / width,
|
||||
y: oldBox.y / height,
|
||||
width: oldBox.width / width,
|
||||
height: oldBox.height / height,
|
||||
});
|
||||
const landmarks = faceDetection.landmarks.map((l) => {
|
||||
return new Point(l.x / width, l.y / height);
|
||||
});
|
||||
};
|
||||
const landmarks = faceDetection.landmarks.map((l) => ({
|
||||
x: l.x / width,
|
||||
y: l.y / height,
|
||||
}));
|
||||
const probability = faceDetection.probability;
|
||||
return { box, landmarks, probability };
|
||||
};
|
||||
|
||||
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
|
||||
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);
|
||||
const blob = await imageBitmapToBlob(faceCrop);
|
||||
faceCrop.close();
|
||||
|
||||
const cache = await blobCache("face-crops");
|
||||
await cache.put(face.id, blob);
|
||||
|
||||
return blob;
|
||||
};
|
||||
|
||||
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
|
||||
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
|
||||
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
|
||||
};
|
||||
|
||||
const extractFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
alignment: FaceAlignment,
|
||||
): ImageBitmap => {
|
||||
// TODO-ML: This algorithm is different from what is used by the mobile app.
|
||||
// Also, it needs to be something that can work fully using the embedding we
|
||||
// receive from remote - the `alignment.boundingBox` will not be available
|
||||
// to us in such cases.
|
||||
const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5));
|
||||
const outputSize = { width: paddedBox.width, height: paddedBox.height };
|
||||
|
||||
const maxDimension = 256;
|
||||
const scale = Math.min(
|
||||
maxDimension / paddedBox.width,
|
||||
maxDimension / paddedBox.height,
|
||||
);
|
||||
|
||||
if (scale < 1) {
|
||||
outputSize.width = Math.round(scale * paddedBox.width);
|
||||
outputSize.height = Math.round(scale * paddedBox.height);
|
||||
}
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
|
||||
const outputBox = new Box({
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
});
|
||||
|
||||
const enlargedBox = enlargeBox(paddedBox, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
};
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
export class Point {
|
||||
public x: number;
|
||||
public y: number;
|
||||
|
||||
constructor(x: number, y: number) {
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
}
|
||||
}
|
||||
|
||||
export interface Dimensions {
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface IRect {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export class Box implements IRect {
|
||||
public x: number;
|
||||
public y: number;
|
||||
public width: number;
|
||||
public height: number;
|
||||
|
||||
constructor({ x, y, width, height }: IRect) {
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
}
|
||||
}
|
||||
|
||||
/** Round all the components of the box. */
|
||||
export const roundBox = (box: Box): Box => {
|
||||
const [x, y, width, height] = [box.x, box.y, box.width, box.height].map(
|
||||
(val) => Math.round(val),
|
||||
);
|
||||
return new Box({ x, y, width, height });
|
||||
};
|
||||
|
||||
/** Increase the size of the given {@link box} by {@link factor}. */
|
||||
export const enlargeBox = (box: Box, factor: number) => {
|
||||
const center = new Point(box.x + box.width / 2, box.y + box.height / 2);
|
||||
const newWidth = factor * box.width;
|
||||
const newHeight = factor * box.height;
|
||||
|
||||
return new Box({
|
||||
x: center.x - newWidth / 2,
|
||||
y: center.y - newHeight / 2,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
});
|
||||
};
|
|
@ -2,7 +2,7 @@ import log from "@/next/log";
|
|||
import ComlinkCryptoWorker from "@ente/shared/crypto";
|
||||
import { putEmbedding } from "services/embeddingService";
|
||||
import type { EnteFile } from "types/file";
|
||||
import type { Point } from "./geom";
|
||||
import type { Point } from "./crop";
|
||||
import type { Face, FaceDetection, MlFileData } from "./types";
|
||||
|
||||
export const putFaceEmbedding = async (
|
||||
|
|
|
@ -1,4 +1,26 @@
|
|||
import { Box, Dimensions, Point } from "services/face/geom";
|
||||
/** The x and y coordinates of a point. */
|
||||
export interface Point {
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
|
||||
/** The dimensions of something, say an image. */
|
||||
export interface Dimensions {
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
/** A rectangle given by its top left coordinates and dimensions. */
|
||||
export interface Box {
|
||||
/** The x coordinate of the the top left (xMin). */
|
||||
x: number;
|
||||
/** The y coodinate of the top left (yMin). */
|
||||
y: number;
|
||||
/** The width of the box. */
|
||||
width: number;
|
||||
/** The height of the box. */
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface FaceDetection {
|
||||
// box and landmarks is relative to image dimentions stored at mlFileData
|
||||
|
|
Loading…
Add table
Reference in a new issue