diff --git a/web/apps/photos/src/services/face/crop.ts b/web/apps/photos/src/services/face/crop.ts new file mode 100644 index 000000000..369dfc654 --- /dev/null +++ b/web/apps/photos/src/services/face/crop.ts @@ -0,0 +1,94 @@ +import { blobCache } from "@/next/blob-cache"; +import type { Box, Face, FaceAlignment } from "./types"; + +export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => { + const faceCrop = extractFaceCrop(imageBitmap, face.alignment); + const blob = await imageBitmapToBlob(faceCrop); + faceCrop.close(); + + const cache = await blobCache("face-crops"); + await cache.put(face.id, blob); + + return blob; +}; + +const imageBitmapToBlob = (imageBitmap: ImageBitmap) => { + const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height); + canvas.getContext("2d").drawImage(imageBitmap, 0, 0); + return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 }); +}; + +const extractFaceCrop = ( + imageBitmap: ImageBitmap, + alignment: FaceAlignment, +): ImageBitmap => { + // TODO-ML: This algorithm is different from what is used by the mobile app. + // Also, it needs to be something that can work fully using the embedding we + // receive from remote - the `alignment.boundingBox` will not be available + // to us in such cases. + const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5)); + const outputSize = { width: paddedBox.width, height: paddedBox.height }; + + const maxDimension = 256; + const scale = Math.min( + maxDimension / paddedBox.width, + maxDimension / paddedBox.height, + ); + + if (scale < 1) { + outputSize.width = Math.round(scale * paddedBox.width); + outputSize.height = Math.round(scale * paddedBox.height); + } + + const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height); + const offscreenCtx = offscreen.getContext("2d"); + offscreenCtx.imageSmoothingQuality = "high"; + + offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2); + + const outputBox = { + x: -outputSize.width / 2, + y: -outputSize.height / 2, + width: outputSize.width, + height: outputSize.height, + }; + + const enlargedBox = enlargeBox(paddedBox, 1.5); + const enlargedOutputBox = enlargeBox(outputBox, 1.5); + + offscreenCtx.drawImage( + imageBitmap, + enlargedBox.x, + enlargedBox.y, + enlargedBox.width, + enlargedBox.height, + enlargedOutputBox.x, + enlargedOutputBox.y, + enlargedOutputBox.width, + enlargedOutputBox.height, + ); + + return offscreen.transferToImageBitmap(); +}; + +/** Round all the components of the box. */ +const roundBox = (box: Box): Box => { + const [x, y, width, height] = [box.x, box.y, box.width, box.height].map( + (val) => Math.round(val), + ); + return { x, y, width, height }; +}; + +/** Increase the size of the given {@link box} by {@link factor}. */ +const enlargeBox = (box: Box, factor: number): Box => { + const center = { x: box.x + box.width / 2, y: box.y + box.height / 2 }; + const newWidth = factor * box.width; + const newHeight = factor * box.height; + + return { + x: center.x - newWidth / 2, + y: center.y - newHeight / 2, + width: newWidth, + height: newHeight, + }; +}; diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 39456f7dd..2021a6935 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -1,16 +1,10 @@ import { FILE_TYPE } from "@/media/file-type"; -import { blobCache } from "@/next/blob-cache"; import log from "@/next/log"; import { workerBridge } from "@/next/worker/worker-bridge"; import { Matrix } from "ml-matrix"; -import { +import type { Box, Dimensions, - Point, - enlargeBox, - roundBox, -} from "services/face/geom"; -import type { Face, FaceAlignment, FaceDetection, @@ -26,6 +20,7 @@ import { translate, } from "transformation-matrix"; import type { EnteFile } from "types/file"; +import { saveFaceCrop } from "./crop"; import { fetchImageBitmap, getLocalFileImageBitmap } from "./file"; import { clamp, @@ -148,8 +143,7 @@ const indexFaces_ = async (enteFile: EnteFile, imageBitmap: ImageBitmap) => { const detectFaces = async ( imageBitmap: ImageBitmap, ): Promise => { - const rect = ({ width, height }: Dimensions) => - new Box({ x: 0, y: 0, width, height }); + const rect = ({ width, height }) => ({ x: 0, y: 0, width, height }); const { yoloInput, yoloSize } = convertToYOLOInputFloat32ChannelsFirst(imageBitmap); @@ -259,19 +253,19 @@ const filterExtractDetectionsFromYOLOOutput = ( const rightMouthX = rows[i + 13]; const rightMouthY = rows[i + 14]; - const box = new Box({ + const box = { x: xMin, y: yMin, width: width, height: height, - }); + }; const probability = score as number; const landmarks = [ - new Point(leftEyeX, leftEyeY), - new Point(rightEyeX, rightEyeY), - new Point(noseX, noseY), - new Point(leftMouthX, leftMouthY), - new Point(rightMouthX, rightMouthY), + { x: leftEyeX, y: leftEyeY }, + { x: rightEyeX, y: rightEyeY }, + { x: noseX, y: noseY }, + { x: leftMouthX, y: leftMouthY }, + { x: rightMouthX, y: rightMouthY }, ]; faces.push({ box, landmarks, probability }); } @@ -291,7 +285,7 @@ const transformFaceDetections = ( const transform = boxTransformationMatrix(inBox, toBox); return faceDetections.map((f) => ({ box: transformBox(f.box, transform), - landmarks: f.landmarks.map((p) => transformPoint(p, transform)), + landmarks: f.landmarks.map((p) => applyToPoint(transform, p)), probability: f.probability, })); }; @@ -305,24 +299,19 @@ const boxTransformationMatrix = ( scale(toBox.width / inBox.width, toBox.height / inBox.height), ); -const transformPoint = (point: Point, transform: TransformationMatrix) => { - const txdPoint = applyToPoint(transform, point); - return new Point(txdPoint.x, txdPoint.y); -}; +const transformBox = (box: Box, transform: TransformationMatrix): Box => { + const topLeft = applyToPoint(transform, { x: box.x, y: box.y }); + const bottomRight = applyToPoint(transform, { + x: box.x + box.width, + y: box.y + box.height, + }); -const transformBox = (box: Box, transform: TransformationMatrix) => { - const topLeft = transformPoint(new Point(box.x, box.y), transform); - const bottomRight = transformPoint( - new Point(box.x + box.width, box.y + box.height), - transform, - ); - - return new Box({ + return { x: topLeft.x, y: topLeft.y, width: bottomRight.x - topLeft.x, height: bottomRight.y - topLeft.y, - }); + }; }; /** @@ -470,14 +459,14 @@ const faceAlignmentUsingSimilarityTransform = ( const size = 1 / simTransform.scale; const meanTranslation = simTransform.toMean.sub(0.5).mul(size); const centerMat = simTransform.fromMean.sub(meanTranslation); - const center = new Point(centerMat.get(0, 0), centerMat.get(1, 0)); + const center = { x: centerMat.get(0, 0), y: centerMat.get(1, 0) }; - const boundingBox = new Box({ + const boundingBox = { x: center.x - size / 2, y: center.y - size / 2, width: size, height: size, - }); + }; return { affineMatrix, boundingBox }; }; @@ -720,85 +709,16 @@ const relativeDetection = ( { width, height }: Dimensions, ): FaceDetection => { const oldBox: Box = faceDetection.box; - const box = new Box({ + const box = { x: oldBox.x / width, y: oldBox.y / height, width: oldBox.width / width, height: oldBox.height / height, - }); - const landmarks = faceDetection.landmarks.map((l) => { - return new Point(l.x / width, l.y / height); - }); + }; + const landmarks = faceDetection.landmarks.map((l) => ({ + x: l.x / width, + y: l.y / height, + })); const probability = faceDetection.probability; return { box, landmarks, probability }; }; - -export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => { - const faceCrop = extractFaceCrop(imageBitmap, face.alignment); - const blob = await imageBitmapToBlob(faceCrop); - faceCrop.close(); - - const cache = await blobCache("face-crops"); - await cache.put(face.id, blob); - - return blob; -}; - -const imageBitmapToBlob = (imageBitmap: ImageBitmap) => { - const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height); - canvas.getContext("2d").drawImage(imageBitmap, 0, 0); - return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 }); -}; - -const extractFaceCrop = ( - imageBitmap: ImageBitmap, - alignment: FaceAlignment, -): ImageBitmap => { - // TODO-ML: This algorithm is different from what is used by the mobile app. - // Also, it needs to be something that can work fully using the embedding we - // receive from remote - the `alignment.boundingBox` will not be available - // to us in such cases. - const paddedBox = roundBox(enlargeBox(alignment.boundingBox, 1.5)); - const outputSize = { width: paddedBox.width, height: paddedBox.height }; - - const maxDimension = 256; - const scale = Math.min( - maxDimension / paddedBox.width, - maxDimension / paddedBox.height, - ); - - if (scale < 1) { - outputSize.width = Math.round(scale * paddedBox.width); - outputSize.height = Math.round(scale * paddedBox.height); - } - - const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height); - const offscreenCtx = offscreen.getContext("2d"); - offscreenCtx.imageSmoothingQuality = "high"; - - offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2); - - const outputBox = new Box({ - x: -outputSize.width / 2, - y: -outputSize.height / 2, - width: outputSize.width, - height: outputSize.height, - }); - - const enlargedBox = enlargeBox(paddedBox, 1.5); - const enlargedOutputBox = enlargeBox(outputBox, 1.5); - - offscreenCtx.drawImage( - imageBitmap, - enlargedBox.x, - enlargedBox.y, - enlargedBox.width, - enlargedBox.height, - enlargedOutputBox.x, - enlargedOutputBox.y, - enlargedOutputBox.width, - enlargedOutputBox.height, - ); - - return offscreen.transferToImageBitmap(); -}; diff --git a/web/apps/photos/src/services/face/geom.ts b/web/apps/photos/src/services/face/geom.ts deleted file mode 100644 index 5f6456ca6..000000000 --- a/web/apps/photos/src/services/face/geom.ts +++ /dev/null @@ -1,57 +0,0 @@ -export class Point { - public x: number; - public y: number; - - constructor(x: number, y: number) { - this.x = x; - this.y = y; - } -} - -export interface Dimensions { - width: number; - height: number; -} - -export interface IRect { - x: number; - y: number; - width: number; - height: number; -} - -export class Box implements IRect { - public x: number; - public y: number; - public width: number; - public height: number; - - constructor({ x, y, width, height }: IRect) { - this.x = x; - this.y = y; - this.width = width; - this.height = height; - } -} - -/** Round all the components of the box. */ -export const roundBox = (box: Box): Box => { - const [x, y, width, height] = [box.x, box.y, box.width, box.height].map( - (val) => Math.round(val), - ); - return new Box({ x, y, width, height }); -}; - -/** Increase the size of the given {@link box} by {@link factor}. */ -export const enlargeBox = (box: Box, factor: number) => { - const center = new Point(box.x + box.width / 2, box.y + box.height / 2); - const newWidth = factor * box.width; - const newHeight = factor * box.height; - - return new Box({ - x: center.x - newWidth / 2, - y: center.y - newHeight / 2, - width: newWidth, - height: newHeight, - }); -}; diff --git a/web/apps/photos/src/services/face/remote.ts b/web/apps/photos/src/services/face/remote.ts index c0a5189bc..7c11f5920 100644 --- a/web/apps/photos/src/services/face/remote.ts +++ b/web/apps/photos/src/services/face/remote.ts @@ -2,7 +2,7 @@ import log from "@/next/log"; import ComlinkCryptoWorker from "@ente/shared/crypto"; import { putEmbedding } from "services/embeddingService"; import type { EnteFile } from "types/file"; -import type { Point } from "./geom"; +import type { Point } from "./crop"; import type { Face, FaceDetection, MlFileData } from "./types"; export const putFaceEmbedding = async ( diff --git a/web/apps/photos/src/services/face/types.ts b/web/apps/photos/src/services/face/types.ts index 423f6afb7..0b1b2f975 100644 --- a/web/apps/photos/src/services/face/types.ts +++ b/web/apps/photos/src/services/face/types.ts @@ -1,4 +1,26 @@ -import { Box, Dimensions, Point } from "services/face/geom"; +/** The x and y coordinates of a point. */ +export interface Point { + x: number; + y: number; +} + +/** The dimensions of something, say an image. */ +export interface Dimensions { + width: number; + height: number; +} + +/** A rectangle given by its top left coordinates and dimensions. */ +export interface Box { + /** The x coordinate of the the top left (xMin). */ + x: number; + /** The y coodinate of the top left (yMin). */ + y: number; + /** The width of the box. */ + width: number; + /** The height of the box. */ + height: number; +} export interface FaceDetection { // box and landmarks is relative to image dimentions stored at mlFileData