[web] ML cleanup - Part 5/x (#1762)

2024-05-18 11:06:04 +05:30 · 2024-05-18 11:06:04 +05:30 · 996d9ccda5
commit 996d9ccda5
parent 1edafd3568 772215eddc
2 changed files with 305 additions and 464 deletions
--- a/web/apps/photos/src/services/face/f-index.ts
+++ b/web/apps/photos/src/services/face/f-index.ts
@ -1,3 +1,4 @@
+import { FILE_TYPE } from "@/media/file-type";
 import { openCache } from "@/next/blob-cache";
 import log from "@/next/log";
 import { workerBridge } from "@/next/worker/worker-bridge";
@ -20,12 +21,10 @@ import type { EnteFile } from "types/file";
 import {
    clamp,
    createGrayscaleIntMatrixFromNormalized2List,
-    cropWithRotation,
-    fetchImageBitmapForContext,
-    getFaceId,
-    getPixelBilinear,
-    imageBitmapToBlob,
-    normalizePixelBetween0And1,
+    fetchImageBitmap,
+    getLocalFileImageBitmap,
+    getThumbnailImageBitmap,
+    pixelRGBBilinear,
    warpAffineFloat32List,
 } from "./image";
 import { transformFaceDetections } from "./transform-box";
@ -70,6 +69,38 @@ export const indexFaces = async (
    return newMlFile;
 };

+const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => {
+    if (fileContext.imageBitmap) {
+        return fileContext.imageBitmap;
+    }
+    if (fileContext.localFile) {
+        if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
+            throw new Error("Local file of only image type is supported");
+        }
+        fileContext.imageBitmap = await getLocalFileImageBitmap(
+            fileContext.enteFile,
+            fileContext.localFile,
+        );
+    } else if (
+        [FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
+            fileContext.enteFile.metadata.fileType,
+        )
+    ) {
+        fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
+    } else {
+        // TODO-ML(MR): We don't do it on videos, when will we ever come
+        // here?
+        fileContext.imageBitmap = await getThumbnailImageBitmap(
+            fileContext.enteFile,
+        );
+    }
+
+    const { width, height } = fileContext.imageBitmap;
+    fileContext.newMlFile.imageDimensions = { width, height };
+
+    return fileContext.imageBitmap;
+};
+
 const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
    const { newMlFile } = fileContext;
    const startTime = Date.now();
@ -96,7 +127,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
    fileContext.newDetection = true;
    const imageBitmap = await fetchImageBitmapForContext(fileContext);
    const faceDetections = await detectFaces(imageBitmap);
-    // TODO: reenable faces filtering based on width
+    // TODO-ML(MR): reenable faces filtering based on width
    const detectedFaces = faceDetections?.map((detection) => {
        return {
            fileId: fileContext.enteFile.id,
@ -105,7 +136,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
    });
    newMlFile.faces = detectedFaces?.map((detectedFace) => ({
        ...detectedFace,
-        id: getFaceId(detectedFace, newMlFile.imageDimensions),
+        id: makeFaceID(detectedFace, newMlFile.imageDimensions),
    }));
    // ?.filter((f) =>
    //     f.box.width > syncContext.config.faceDetection.minFaceSize
@ -121,149 +152,104 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
 const detectFaces = async (
    imageBitmap: ImageBitmap,
 ): Promise<Array<FaceDetection>> => {
-    const maxFaceDistancePercent = Math.sqrt(2) / 100;
-    const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
-    const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
-        imageBitmap,
-        640,
-        640,
-    );
-    const data = preprocessResult.data;
-    const resized = preprocessResult.newSize;
-    const outputData = await workerBridge.detectFaces(data);
-    const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
-    const inBox = newBox(0, 0, resized.width, resized.height);
+    const { yoloInput, yoloSize } =
+        convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
+    const yoloOutput = await workerBridge.detectFaces(yoloInput);
+    const faces = faceDetectionsFromYOLOOutput(yoloOutput);
+    const inBox = newBox(0, 0, yoloSize.width, yoloSize.height);
    const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
    const faceDetections = transformFaceDetections(faces, inBox, toBox);
+
+    const maxFaceDistancePercent = Math.sqrt(2) / 100;
+    const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
    return removeDuplicateDetections(faceDetections, maxFaceDistance);
 };

-const preprocessImageBitmapToFloat32ChannelsFirst = (
-    imageBitmap: ImageBitmap,
-    requiredWidth: number,
-    requiredHeight: number,
-    maintainAspectRatio: boolean = true,
-    normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
-) => {
+/**
+ * Convert {@link imageBitmap} into the format that the YOLO face detection
+ * model expects.
+ */
+const convertToYOLOInputFloat32ChannelsFirst = (imageBitmap: ImageBitmap) => {
+    const requiredWidth = 640;
+    const requiredHeight = 640;
+
+    const width = imageBitmap.width;
+    const height = imageBitmap.height;
+
    // Create an OffscreenCanvas and set its size.
-    const offscreenCanvas = new OffscreenCanvas(
-        imageBitmap.width,
-        imageBitmap.height,
-    );
+    const offscreenCanvas = new OffscreenCanvas(width, height);
    const ctx = offscreenCanvas.getContext("2d");
-    ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
-    const imageData = ctx.getImageData(
-        0,
-        0,
-        imageBitmap.width,
-        imageBitmap.height,
-    );
+    ctx.drawImage(imageBitmap, 0, 0, width, height);
+    const imageData = ctx.getImageData(0, 0, width, height);
    const pixelData = imageData.data;

-    let scaleW = requiredWidth / imageBitmap.width;
-    let scaleH = requiredHeight / imageBitmap.height;
-    if (maintainAspectRatio) {
-        const scale = Math.min(
-            requiredWidth / imageBitmap.width,
-            requiredHeight / imageBitmap.height,
-        );
-        scaleW = scale;
-        scaleH = scale;
-    }
-    const scaledWidth = clamp(
-        Math.round(imageBitmap.width * scaleW),
-        0,
-        requiredWidth,
-    );
-    const scaledHeight = clamp(
-        Math.round(imageBitmap.height * scaleH),
-        0,
-        requiredHeight,
-    );
+    // Maintain aspect ratio.
+    const scale = Math.min(requiredWidth / width, requiredHeight / height);

-    const processedImage = new Float32Array(
-        1 * 3 * requiredWidth * requiredHeight,
-    );
+    const scaledWidth = clamp(Math.round(width * scale), 0, requiredWidth);
+    const scaledHeight = clamp(Math.round(height * scale), 0, requiredHeight);

-    // Populate the Float32Array with normalized pixel values
-    let pixelIndex = 0;
+    const yoloInput = new Float32Array(1 * 3 * requiredWidth * requiredHeight);
+    const yoloSize = { width: scaledWidth, height: scaledHeight };
+
+    // Populate the Float32Array with normalized pixel values.
+    let pi = 0;
    const channelOffsetGreen = requiredHeight * requiredWidth;
    const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
    for (let h = 0; h < requiredHeight; h++) {
        for (let w = 0; w < requiredWidth; w++) {
-            let pixel: {
-                r: number;
-                g: number;
-                b: number;
-            };
-            if (w >= scaledWidth || h >= scaledHeight) {
-                pixel = { r: 114, g: 114, b: 114 };
-            } else {
-                pixel = getPixelBilinear(
-                    w / scaleW,
-                    h / scaleH,
-                    pixelData,
-                    imageBitmap.width,
-                    imageBitmap.height,
-                );
-            }
-            processedImage[pixelIndex] = normFunction(pixel.r);
-            processedImage[pixelIndex + channelOffsetGreen] = normFunction(
-                pixel.g,
-            );
-            processedImage[pixelIndex + channelOffsetBlue] = normFunction(
-                pixel.b,
-            );
-            pixelIndex++;
+            const { r, g, b } =
+                w >= scaledWidth || h >= scaledHeight
+                    ? { r: 114, g: 114, b: 114 }
+                    : pixelRGBBilinear(
+                          w / scale,
+                          h / scale,
+                          pixelData,
+                          width,
+                          height,
+                      );
+            yoloInput[pi] = r / 255.0;
+            yoloInput[pi + channelOffsetGreen] = g / 255.0;
+            yoloInput[pi + channelOffsetBlue] = b / 255.0;
+            pi++;
        }
    }

-    return {
-        data: processedImage,
-        originalSize: {
-            width: imageBitmap.width,
-            height: imageBitmap.height,
-        },
-        newSize: { width: scaledWidth, height: scaledHeight },
-    };
+    return { yoloInput, yoloSize };
 };

 /**
- * @param rowOutput A Float32Array of shape [25200, 16], where each row
+ * Extract detected faces from the YOLO's output.
+ *
+ * Only detections that exceed a minimum score are returned.
+ *
+ * @param rows A Float32Array of shape [25200, 16], where each row
 * represents a bounding box.
 */
-const getFacesFromYOLOOutput = (
-    rowOutput: Float32Array,
-    minScore: number,
-): Array<FaceDetection> => {
-    const faces: Array<FaceDetection> = [];
+const faceDetectionsFromYOLOOutput = (rows: Float32Array): FaceDetection[] => {
+    const faces: FaceDetection[] = [];
    // Iterate over each row.
-    for (let i = 0; i < rowOutput.length; i += 16) {
-        const score = rowOutput[i + 4];
-        if (score < minScore) {
-            continue;
-        }
-        // The first 4 values represent the bounding box's coordinates:
-        //
-        //     (x1, y1, x2, y2)
-        //
-        const xCenter = rowOutput[i];
-        const yCenter = rowOutput[i + 1];
-        const width = rowOutput[i + 2];
-        const height = rowOutput[i + 3];
+    for (let i = 0; i < rows.length; i += 16) {
+        const score = rows[i + 4];
+        if (score < 0.7) continue;
+
+        const xCenter = rows[i];
+        const yCenter = rows[i + 1];
+        const width = rows[i + 2];
+        const height = rows[i + 3];
        const xMin = xCenter - width / 2.0; // topLeft
        const yMin = yCenter - height / 2.0; // topLeft

-        const leftEyeX = rowOutput[i + 5];
-        const leftEyeY = rowOutput[i + 6];
-        const rightEyeX = rowOutput[i + 7];
-        const rightEyeY = rowOutput[i + 8];
-        const noseX = rowOutput[i + 9];
-        const noseY = rowOutput[i + 10];
-        const leftMouthX = rowOutput[i + 11];
-        const leftMouthY = rowOutput[i + 12];
-        const rightMouthX = rowOutput[i + 13];
-        const rightMouthY = rowOutput[i + 14];
+        const leftEyeX = rows[i + 5];
+        const leftEyeY = rows[i + 6];
+        const rightEyeX = rows[i + 7];
+        const rightEyeY = rows[i + 8];
+        const noseX = rows[i + 9];
+        const noseY = rows[i + 10];
+        const leftMouthX = rows[i + 11];
+        const leftMouthY = rows[i + 12];
+        const rightMouthX = rows[i + 13];
+        const rightMouthY = rows[i + 14];

        const box = new Box({
            x: xMin,
@ -491,6 +477,43 @@ function normalizeLandmarks(
    ) as Array<[number, number]>;
 }

+async function extractFaceImagesToFloat32(
+    faceAlignments: Array<FaceAlignment>,
+    faceSize: number,
+    image: ImageBitmap,
+): Promise<Float32Array> {
+    const faceData = new Float32Array(
+        faceAlignments.length * faceSize * faceSize * 3,
+    );
+    for (let i = 0; i < faceAlignments.length; i++) {
+        const alignedFace = faceAlignments[i];
+        const faceDataOffset = i * faceSize * faceSize * 3;
+        warpAffineFloat32List(
+            image,
+            alignedFace,
+            faceSize,
+            faceData,
+            faceDataOffset,
+        );
+    }
+    return faceData;
+}
+
+const makeFaceID = (detectedFace: DetectedFace, imageDims: Dimensions) => {
+    const part = (v: number) => clamp(v, 0.0, 0.999999).toFixed(5).substring(2);
+    const xMin = part(detectedFace.detection.box.x / imageDims.width);
+    const yMin = part(detectedFace.detection.box.y / imageDims.height);
+    const xMax = part(
+        (detectedFace.detection.box.x + detectedFace.detection.box.width) /
+            imageDims.width,
+    );
+    const yMax = part(
+        (detectedFace.detection.box.y + detectedFace.detection.box.height) /
+            imageDims.height,
+    );
+    return [detectedFace.fileId, xMin, yMin, xMax, yMax].join("_");
+};
+
 /**
 * Laplacian blur detection.
 */
@ -506,6 +529,8 @@ const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
        const faceImage = createGrayscaleIntMatrixFromNormalized2List(
            alignedFaces,
            i,
+            mobileFaceNetFaceSize,
+            mobileFaceNetFaceSize,
        );
        const laplacian = applyLaplacian(faceImage, direction);
        blurValues.push(matrixVariance(laplacian));
@ -738,6 +763,12 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
    return blob;
 };

+const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
+    const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
+    canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
+    return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
+};
+
 const getFaceCrop = (
    imageBitmap: ImageBitmap,
    faceDetection: FaceDetection,
@ -766,24 +797,68 @@ const getFaceCrop = (
    };
 };

-async function extractFaceImagesToFloat32(
-    faceAlignments: Array<FaceAlignment>,
-    faceSize: number,
-    image: ImageBitmap,
-): Promise<Float32Array> {
-    const faceData = new Float32Array(
-        faceAlignments.length * faceSize * faceSize * 3,
-    );
-    for (let i = 0; i < faceAlignments.length; i++) {
-        const alignedFace = faceAlignments[i];
-        const faceDataOffset = i * faceSize * faceSize * 3;
-        warpAffineFloat32List(
-            image,
-            alignedFace,
-            faceSize,
-            faceData,
-            faceDataOffset,
+export function cropWithRotation(
+    imageBitmap: ImageBitmap,
+    cropBox: Box,
+    rotation?: number,
+    maxSize?: Dimensions,
+    minSize?: Dimensions,
+) {
+    const box = cropBox.round();
+
+    const outputSize = { width: box.width, height: box.height };
+    if (maxSize) {
+        const minScale = Math.min(
+            maxSize.width / box.width,
+            maxSize.height / box.height,
        );
+        if (minScale < 1) {
+            outputSize.width = Math.round(minScale * box.width);
+            outputSize.height = Math.round(minScale * box.height);
+        }
    }
-    return faceData;
+
+    if (minSize) {
+        const maxScale = Math.max(
+            minSize.width / box.width,
+            minSize.height / box.height,
+        );
+        if (maxScale > 1) {
+            outputSize.width = Math.round(maxScale * box.width);
+            outputSize.height = Math.round(maxScale * box.height);
+        }
+    }
+
+    // log.info({ imageBitmap, box, outputSize });
+
+    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
+    const offscreenCtx = offscreen.getContext("2d");
+    offscreenCtx.imageSmoothingQuality = "high";
+
+    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
+    rotation && offscreenCtx.rotate(rotation);
+
+    const outputBox = new Box({
+        x: -outputSize.width / 2,
+        y: -outputSize.height / 2,
+        width: outputSize.width,
+        height: outputSize.height,
+    });
+
+    const enlargedBox = enlargeBox(box, 1.5);
+    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
+
+    offscreenCtx.drawImage(
+        imageBitmap,
+        enlargedBox.x,
+        enlargedBox.y,
+        enlargedBox.width,
+        enlargedBox.height,
+        enlargedOutputBox.x,
+        enlargedOutputBox.y,
+        enlargedOutputBox.width,
+        enlargedOutputBox.height,
+    );
+
+    return offscreen.transferToImageBitmap();
 }
--- a/web/apps/photos/src/services/face/image.ts
+++ b/web/apps/photos/src/services/face/image.ts
@ -3,93 +3,22 @@ import { decodeLivePhoto } from "@/media/live-photo";
 import log from "@/next/log";
 import { Matrix, inverse } from "ml-matrix";
 import DownloadManager from "services/download";
-import { Box, Dimensions, enlargeBox } from "services/face/geom";
-import {
-    DetectedFace,
-    FaceAlignment,
-    MLSyncFileContext,
-} from "services/face/types";
+import { FaceAlignment } from "services/face/types";
 import { getLocalFiles } from "services/fileService";
 import { EnteFile } from "types/file";
 import { getRenderableImage } from "utils/file";

-export const fetchImageBitmapForContext = async (
-    fileContext: MLSyncFileContext,
-) => {
-    if (fileContext.imageBitmap) {
-        return fileContext.imageBitmap;
-    }
-    if (fileContext.localFile) {
-        if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
-            throw new Error("Local file of only image type is supported");
-        }
-        fileContext.imageBitmap = await getLocalFileImageBitmap(
-            fileContext.enteFile,
-            fileContext.localFile,
-        );
-    } else if (
-        [FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
-            fileContext.enteFile.metadata.fileType,
-        )
-    ) {
-        fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
-    } else {
-        // TODO-ML(MR): We don't do it on videos, when will we ever come
-        // here?
-        fileContext.imageBitmap = await getThumbnailImageBitmap(
-            fileContext.enteFile,
-        );
-    }
-
-    const { width, height } = fileContext.imageBitmap;
-    fileContext.newMlFile.imageDimensions = { width, height };
-
-    return fileContext.imageBitmap;
-};
+/**
+ * Clamp {@link value} to between {@link min} and {@link max}, inclusive.
+ */
+export const clamp = (value: number, min: number, max: number) =>
+    Math.min(max, Math.max(min, value));

 export async function getLocalFile(fileId: number) {
    const localFiles = await getLocalFiles();
    return localFiles.find((f) => f.id === fileId);
 }

-export function getFaceId(detectedFace: DetectedFace, imageDims: Dimensions) {
-    const xMin = clamp(
-        detectedFace.detection.box.x / imageDims.width,
-        0.0,
-        0.999999,
-    )
-        .toFixed(5)
-        .substring(2);
-    const yMin = clamp(
-        detectedFace.detection.box.y / imageDims.height,
-        0.0,
-        0.999999,
-    )
-        .toFixed(5)
-        .substring(2);
-    const xMax = clamp(
-        (detectedFace.detection.box.x + detectedFace.detection.box.width) /
-            imageDims.width,
-        0.0,
-        0.999999,
-    )
-        .toFixed(5)
-        .substring(2);
-    const yMax = clamp(
-        (detectedFace.detection.box.y + detectedFace.detection.box.height) /
-            imageDims.height,
-        0.0,
-        0.999999,
-    )
-        .toFixed(5)
-        .substring(2);
-
-    const rawFaceID = `${xMin}_${yMin}_${xMax}_${yMax}`;
-    const faceID = `${detectedFace.fileId}_${rawFaceID}`;
-
-    return faceID;
-}
-
 export const fetchImageBitmap = async (file: EnteFile) =>
    fetchRenderableBlob(file).then(createImageBitmap);

@ -123,49 +52,18 @@ export async function getLocalFileImageBitmap(
    return createImageBitmap(fileBlob);
 }

-export function normalizePixelBetween0And1(pixelValue: number) {
-    return pixelValue / 255.0;
-}
-
-export function normalizePixelBetweenMinus1And1(pixelValue: number) {
-    return pixelValue / 127.5 - 1.0;
-}
-
-export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
-    return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
-}
-
-export function readPixelColor(
-    imageData: Uint8ClampedArray,
-    width: number,
-    height: number,
-    x: number,
-    y: number,
-) {
-    if (x < 0 || x >= width || y < 0 || y >= height) {
-        return { r: 0, g: 0, b: 0, a: 0 };
-    }
-    const index = (y * width + x) * 4;
-    return {
-        r: imageData[index],
-        g: imageData[index + 1],
-        b: imageData[index + 2],
-        a: imageData[index + 3],
-    };
-}
-
-export function clamp(value: number, min: number, max: number) {
-    return Math.min(max, Math.max(min, value));
-}
-
-export function getPixelBicubic(
+/**
+ * Returns the pixel value (RGB) at the given coordinates ({@link fx},
+ * {@link fy}) using bicubic interpolation.
+ */
+export function pixelRGBBicubic(
    fx: number,
    fy: number,
    imageData: Uint8ClampedArray,
    imageWidth: number,
    imageHeight: number,
 ) {
-    // Clamp to image boundaries
+    // Clamp to image boundaries.
    fx = clamp(fx, 0, imageWidth - 1);
    fy = clamp(fy, 0, imageHeight - 1);

@ -180,40 +78,35 @@ export function getPixelBicubic(
    const dx = fx - x;
    const dy = fy - y;

-    function cubic(
+    const cubic = (
        dx: number,
        ipp: number,
        icp: number,
        inp: number,
        iap: number,
-    ) {
-        return (
-            icp +
-            0.5 *
-                (dx * (-ipp + inp) +
-                    dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
-                    dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
-        );
-    }
+    ) =>
+        icp +
+        0.5 *
+            (dx * (-ipp + inp) +
+                dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
+                dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap));

-    const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
+    const icc = pixelRGBA(imageData, imageWidth, imageHeight, x, y);

    const ipp =
        px < 0 || py < 0
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, py);
+            : pixelRGBA(imageData, imageWidth, imageHeight, px, py);
    const icp =
-        px < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, py);
+        px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, x, py);
    const inp =
        py < 0 || nx >= imageWidth
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, py);
+            : pixelRGBA(imageData, imageWidth, imageHeight, nx, py);
    const iap =
        ax >= imageWidth || py < 0
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, py);
+            : pixelRGBA(imageData, imageWidth, imageHeight, ax, py);

    const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
    const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
@ -221,17 +114,15 @@ export function getPixelBicubic(
    // const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);

    const ipc =
-        px < 0
-            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, y);
+        px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, px, y);
    const inc =
        nx >= imageWidth
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, y);
+            : pixelRGBA(imageData, imageWidth, imageHeight, nx, y);
    const iac =
        ax >= imageWidth
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, y);
+            : pixelRGBA(imageData, imageWidth, imageHeight, ax, y);

    const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
    const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
@ -241,19 +132,19 @@ export function getPixelBicubic(
    const ipn =
        px < 0 || ny >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, ny);
+            : pixelRGBA(imageData, imageWidth, imageHeight, px, ny);
    const icn =
        ny >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, ny);
+            : pixelRGBA(imageData, imageWidth, imageHeight, x, ny);
    const inn =
        nx >= imageWidth || ny >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
+            : pixelRGBA(imageData, imageWidth, imageHeight, nx, ny);
    const ian =
        ax >= imageWidth || ny >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
+            : pixelRGBA(imageData, imageWidth, imageHeight, ax, ny);

    const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
    const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
@ -263,19 +154,19 @@ export function getPixelBicubic(
    const ipa =
        px < 0 || ay >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, px, ay);
+            : pixelRGBA(imageData, imageWidth, imageHeight, px, ay);
    const ica =
        ay >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, x, ay);
+            : pixelRGBA(imageData, imageWidth, imageHeight, x, ay);
    const ina =
        nx >= imageWidth || ay >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
+            : pixelRGBA(imageData, imageWidth, imageHeight, nx, ay);
    const iaa =
        ax >= imageWidth || ay >= imageHeight
            ? icc
-            : readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
+            : pixelRGBA(imageData, imageWidth, imageHeight, ax, ay);

    const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
    const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
@ -290,19 +181,41 @@ export function getPixelBicubic(
    return { r: c0, g: c1, b: c2 };
 }

-/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
-export function getPixelBilinear(
+const pixelRGBA = (
+    imageData: Uint8ClampedArray,
+    width: number,
+    height: number,
+    x: number,
+    y: number,
+) => {
+    if (x < 0 || x >= width || y < 0 || y >= height) {
+        return { r: 0, g: 0, b: 0, a: 0 };
+    }
+    const index = (y * width + x) * 4;
+    return {
+        r: imageData[index],
+        g: imageData[index + 1],
+        b: imageData[index + 2],
+        a: imageData[index + 3],
+    };
+};
+
+/**
+ * Returns the pixel value (RGB) at the given coordinates ({@link fx},
+ * {@link fy}) using bilinear interpolation.
+ */
+export function pixelRGBBilinear(
    fx: number,
    fy: number,
    imageData: Uint8ClampedArray,
    imageWidth: number,
    imageHeight: number,
 ) {
-    // Clamp to image boundaries
+    // Clamp to image boundaries.
    fx = clamp(fx, 0, imageWidth - 1);
    fy = clamp(fy, 0, imageHeight - 1);

-    // Get the surrounding coordinates and their weights
+    // Get the surrounding coordinates and their weights.
    const x0 = Math.floor(fx);
    const x1 = Math.ceil(fx);
    const y0 = Math.floor(fy);
@ -312,27 +225,26 @@ export function getPixelBilinear(
    const dx1 = 1.0 - dx;
    const dy1 = 1.0 - dy;

-    // Get the original pixels
-    const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
-    const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
-    const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
-    const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
+    // Get the original pixels.
+    const pixel1 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y0);
+    const pixel2 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y0);
+    const pixel3 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y1);
+    const pixel4 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y1);

-    function bilinear(val1: number, val2: number, val3: number, val4: number) {
-        return Math.round(
+    const bilinear = (val1: number, val2: number, val3: number, val4: number) =>
+        Math.round(
            val1 * dx1 * dy1 +
                val2 * dx * dy1 +
                val3 * dx1 * dy +
                val4 * dx * dy,
        );
-    }

-    // Interpolate the pixel values
-    const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
-    const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
-    const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
-
-    return { r: red, g: green, b: blue };
+    // Return interpolated pixel colors.
+    return {
+        r: bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r),
+        g: bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g),
+        b: bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b),
+    };
 }

 export function warpAffineFloat32List(
@ -342,7 +254,7 @@ export function warpAffineFloat32List(
    inputData: Float32Array,
    inputStartIndex: number,
 ): void {
-    // Get the pixel data
+    // Get the pixel data.
    const offscreenCanvas = new OffscreenCanvas(
        imageBitmap.width,
        imageBitmap.height,
@ -382,8 +294,8 @@ export function warpAffineFloat32List(
            const yOrigin =
                a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);

-            // Get the pixel from interpolation
-            const pixel = getPixelBicubic(
+            // Get the pixel RGB using bicubic interpolation.
+            const { r, g, b } = pixelRGBBicubic(
                xOrigin,
                yOrigin,
                pixelData,
@ -394,20 +306,26 @@ export function warpAffineFloat32List(
            // Set the pixel in the input data
            const index = (yTrans * faceSize + xTrans) * 3;
            inputData[inputStartIndex + index] =
-                normalizePixelBetweenMinus1And1(pixel.r);
+                normalizePixelBetweenMinus1And1(r);
            inputData[inputStartIndex + index + 1] =
-                normalizePixelBetweenMinus1And1(pixel.g);
+                normalizePixelBetweenMinus1And1(g);
            inputData[inputStartIndex + index + 2] =
-                normalizePixelBetweenMinus1And1(pixel.b);
+                normalizePixelBetweenMinus1And1(b);
        }
    }
 }

+const normalizePixelBetweenMinus1And1 = (pixelValue: number) =>
+    pixelValue / 127.5 - 1.0;
+
+const unnormalizePixelFromBetweenMinus1And1 = (pixelValue: number) =>
+    clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
+
 export function createGrayscaleIntMatrixFromNormalized2List(
    imageList: Float32Array,
    faceNumber: number,
-    width: number = 112,
-    height: number = 112,
+    width: number,
+    height: number,
 ): number[][] {
    const startIndex = faceNumber * width * height * 3;
    return Array.from({ length: height }, (_, y) =>
@ -435,155 +353,3 @@ export function createGrayscaleIntMatrixFromNormalized2List(
        }),
    );
 }
-
-export function resizeToSquare(img: ImageBitmap, size: number) {
-    const scale = size / Math.max(img.height, img.width);
-    const width = scale * img.width;
-    const height = scale * img.height;
-    const offscreen = new OffscreenCanvas(size, size);
-    const ctx = offscreen.getContext("2d");
-    ctx.imageSmoothingQuality = "high";
-    ctx.drawImage(img, 0, 0, width, height);
-    const resizedImage = offscreen.transferToImageBitmap();
-    return { image: resizedImage, width, height };
-}
-
-export function transform(
-    imageBitmap: ImageBitmap,
-    affineMat: number[][],
-    outputWidth: number,
-    outputHeight: number,
-) {
-    const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
-    const context = offscreen.getContext("2d");
-    context.imageSmoothingQuality = "high";
-
-    context.transform(
-        affineMat[0][0],
-        affineMat[1][0],
-        affineMat[0][1],
-        affineMat[1][1],
-        affineMat[0][2],
-        affineMat[1][2],
-    );
-
-    context.drawImage(imageBitmap, 0, 0);
-    return offscreen.transferToImageBitmap();
-}
-
-export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
-    const dimensions: Dimensions = {
-        width: size,
-        height: size,
-    };
-
-    return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
-}
-
-// these utils only work in env where OffscreenCanvas is available
-
-export function cropWithRotation(
-    imageBitmap: ImageBitmap,
-    cropBox: Box,
-    rotation?: number,
-    maxSize?: Dimensions,
-    minSize?: Dimensions,
-) {
-    const box = cropBox.round();
-
-    const outputSize = { width: box.width, height: box.height };
-    if (maxSize) {
-        const minScale = Math.min(
-            maxSize.width / box.width,
-            maxSize.height / box.height,
-        );
-        if (minScale < 1) {
-            outputSize.width = Math.round(minScale * box.width);
-            outputSize.height = Math.round(minScale * box.height);
-        }
-    }
-
-    if (minSize) {
-        const maxScale = Math.max(
-            minSize.width / box.width,
-            minSize.height / box.height,
-        );
-        if (maxScale > 1) {
-            outputSize.width = Math.round(maxScale * box.width);
-            outputSize.height = Math.round(maxScale * box.height);
-        }
-    }
-
-    // log.info({ imageBitmap, box, outputSize });
-
-    const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
-    const offscreenCtx = offscreen.getContext("2d");
-    offscreenCtx.imageSmoothingQuality = "high";
-
-    offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
-    rotation && offscreenCtx.rotate(rotation);
-
-    const outputBox = new Box({
-        x: -outputSize.width / 2,
-        y: -outputSize.height / 2,
-        width: outputSize.width,
-        height: outputSize.height,
-    });
-
-    const enlargedBox = enlargeBox(box, 1.5);
-    const enlargedOutputBox = enlargeBox(outputBox, 1.5);
-
-    offscreenCtx.drawImage(
-        imageBitmap,
-        enlargedBox.x,
-        enlargedBox.y,
-        enlargedBox.width,
-        enlargedBox.height,
-        enlargedOutputBox.x,
-        enlargedOutputBox.y,
-        enlargedOutputBox.width,
-        enlargedOutputBox.height,
-    );
-
-    return offscreen.transferToImageBitmap();
-}
-
-export function addPadding(image: ImageBitmap, padding: number) {
-    const scale = 1 + padding * 2;
-    const width = scale * image.width;
-    const height = scale * image.height;
-    const offscreen = new OffscreenCanvas(width, height);
-    const ctx = offscreen.getContext("2d");
-    ctx.imageSmoothingEnabled = false;
-    ctx.drawImage(
-        image,
-        width / 2 - image.width / 2,
-        height / 2 - image.height / 2,
-        image.width,
-        image.height,
-    );
-
-    return offscreen.transferToImageBitmap();
-}
-
-export interface BlobOptions {
-    type?: string;
-    quality?: number;
-}
-
-export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
-    const offscreen = new OffscreenCanvas(
-        imageBitmap.width,
-        imageBitmap.height,
-    );
-    offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
-
-    return offscreen.convertToBlob({
-        type: "image/jpeg",
-        quality: 0.8,
-    });
-}
-
-export async function imageBitmapFromBlob(blob: Blob) {
-    return createImageBitmap(blob);
-}