[web] ML cleanup - Part 5/x (#1762)

This commit is contained in:
Manav Rathi 2024-05-18 11:06:04 +05:30 committed by GitHub
commit 996d9ccda5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 305 additions and 464 deletions

View file

@ -1,3 +1,4 @@
import { FILE_TYPE } from "@/media/file-type";
import { openCache } from "@/next/blob-cache";
import log from "@/next/log";
import { workerBridge } from "@/next/worker/worker-bridge";
@ -20,12 +21,10 @@ import type { EnteFile } from "types/file";
import {
clamp,
createGrayscaleIntMatrixFromNormalized2List,
cropWithRotation,
fetchImageBitmapForContext,
getFaceId,
getPixelBilinear,
imageBitmapToBlob,
normalizePixelBetween0And1,
fetchImageBitmap,
getLocalFileImageBitmap,
getThumbnailImageBitmap,
pixelRGBBilinear,
warpAffineFloat32List,
} from "./image";
import { transformFaceDetections } from "./transform-box";
@ -70,6 +69,38 @@ export const indexFaces = async (
return newMlFile;
};
const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => {
if (fileContext.imageBitmap) {
return fileContext.imageBitmap;
}
if (fileContext.localFile) {
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
throw new Error("Local file of only image type is supported");
}
fileContext.imageBitmap = await getLocalFileImageBitmap(
fileContext.enteFile,
fileContext.localFile,
);
} else if (
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
fileContext.enteFile.metadata.fileType,
)
) {
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
} else {
// TODO-ML(MR): We don't do it on videos, when will we ever come
// here?
fileContext.imageBitmap = await getThumbnailImageBitmap(
fileContext.enteFile,
);
}
const { width, height } = fileContext.imageBitmap;
fileContext.newMlFile.imageDimensions = { width, height };
return fileContext.imageBitmap;
};
const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
const { newMlFile } = fileContext;
const startTime = Date.now();
@ -96,7 +127,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
fileContext.newDetection = true;
const imageBitmap = await fetchImageBitmapForContext(fileContext);
const faceDetections = await detectFaces(imageBitmap);
// TODO: reenable faces filtering based on width
// TODO-ML(MR): reenable faces filtering based on width
const detectedFaces = faceDetections?.map((detection) => {
return {
fileId: fileContext.enteFile.id,
@ -105,7 +136,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
});
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
...detectedFace,
id: getFaceId(detectedFace, newMlFile.imageDimensions),
id: makeFaceID(detectedFace, newMlFile.imageDimensions),
}));
// ?.filter((f) =>
// f.box.width > syncContext.config.faceDetection.minFaceSize
@ -121,149 +152,104 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
const detectFaces = async (
imageBitmap: ImageBitmap,
): Promise<Array<FaceDetection>> => {
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
imageBitmap,
640,
640,
);
const data = preprocessResult.data;
const resized = preprocessResult.newSize;
const outputData = await workerBridge.detectFaces(data);
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
const inBox = newBox(0, 0, resized.width, resized.height);
const { yoloInput, yoloSize } =
convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
const yoloOutput = await workerBridge.detectFaces(yoloInput);
const faces = faceDetectionsFromYOLOOutput(yoloOutput);
const inBox = newBox(0, 0, yoloSize.width, yoloSize.height);
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
const faceDetections = transformFaceDetections(faces, inBox, toBox);
const maxFaceDistancePercent = Math.sqrt(2) / 100;
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
return removeDuplicateDetections(faceDetections, maxFaceDistance);
};
const preprocessImageBitmapToFloat32ChannelsFirst = (
imageBitmap: ImageBitmap,
requiredWidth: number,
requiredHeight: number,
maintainAspectRatio: boolean = true,
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
) => {
/**
* Convert {@link imageBitmap} into the format that the YOLO face detection
* model expects.
*/
const convertToYOLOInputFloat32ChannelsFirst = (imageBitmap: ImageBitmap) => {
const requiredWidth = 640;
const requiredHeight = 640;
const width = imageBitmap.width;
const height = imageBitmap.height;
// Create an OffscreenCanvas and set its size.
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
const offscreenCanvas = new OffscreenCanvas(width, height);
const ctx = offscreenCanvas.getContext("2d");
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
const imageData = ctx.getImageData(
0,
0,
imageBitmap.width,
imageBitmap.height,
);
ctx.drawImage(imageBitmap, 0, 0, width, height);
const imageData = ctx.getImageData(0, 0, width, height);
const pixelData = imageData.data;
let scaleW = requiredWidth / imageBitmap.width;
let scaleH = requiredHeight / imageBitmap.height;
if (maintainAspectRatio) {
const scale = Math.min(
requiredWidth / imageBitmap.width,
requiredHeight / imageBitmap.height,
);
scaleW = scale;
scaleH = scale;
}
const scaledWidth = clamp(
Math.round(imageBitmap.width * scaleW),
0,
requiredWidth,
);
const scaledHeight = clamp(
Math.round(imageBitmap.height * scaleH),
0,
requiredHeight,
);
// Maintain aspect ratio.
const scale = Math.min(requiredWidth / width, requiredHeight / height);
const processedImage = new Float32Array(
1 * 3 * requiredWidth * requiredHeight,
);
const scaledWidth = clamp(Math.round(width * scale), 0, requiredWidth);
const scaledHeight = clamp(Math.round(height * scale), 0, requiredHeight);
// Populate the Float32Array with normalized pixel values
let pixelIndex = 0;
const yoloInput = new Float32Array(1 * 3 * requiredWidth * requiredHeight);
const yoloSize = { width: scaledWidth, height: scaledHeight };
// Populate the Float32Array with normalized pixel values.
let pi = 0;
const channelOffsetGreen = requiredHeight * requiredWidth;
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
for (let h = 0; h < requiredHeight; h++) {
for (let w = 0; w < requiredWidth; w++) {
let pixel: {
r: number;
g: number;
b: number;
};
if (w >= scaledWidth || h >= scaledHeight) {
pixel = { r: 114, g: 114, b: 114 };
} else {
pixel = getPixelBilinear(
w / scaleW,
h / scaleH,
pixelData,
imageBitmap.width,
imageBitmap.height,
);
}
processedImage[pixelIndex] = normFunction(pixel.r);
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
pixel.g,
);
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
pixel.b,
);
pixelIndex++;
const { r, g, b } =
w >= scaledWidth || h >= scaledHeight
? { r: 114, g: 114, b: 114 }
: pixelRGBBilinear(
w / scale,
h / scale,
pixelData,
width,
height,
);
yoloInput[pi] = r / 255.0;
yoloInput[pi + channelOffsetGreen] = g / 255.0;
yoloInput[pi + channelOffsetBlue] = b / 255.0;
pi++;
}
}
return {
data: processedImage,
originalSize: {
width: imageBitmap.width,
height: imageBitmap.height,
},
newSize: { width: scaledWidth, height: scaledHeight },
};
return { yoloInput, yoloSize };
};
/**
* @param rowOutput A Float32Array of shape [25200, 16], where each row
* Extract detected faces from the YOLO's output.
*
* Only detections that exceed a minimum score are returned.
*
* @param rows A Float32Array of shape [25200, 16], where each row
* represents a bounding box.
*/
const getFacesFromYOLOOutput = (
rowOutput: Float32Array,
minScore: number,
): Array<FaceDetection> => {
const faces: Array<FaceDetection> = [];
const faceDetectionsFromYOLOOutput = (rows: Float32Array): FaceDetection[] => {
const faces: FaceDetection[] = [];
// Iterate over each row.
for (let i = 0; i < rowOutput.length; i += 16) {
const score = rowOutput[i + 4];
if (score < minScore) {
continue;
}
// The first 4 values represent the bounding box's coordinates:
//
// (x1, y1, x2, y2)
//
const xCenter = rowOutput[i];
const yCenter = rowOutput[i + 1];
const width = rowOutput[i + 2];
const height = rowOutput[i + 3];
for (let i = 0; i < rows.length; i += 16) {
const score = rows[i + 4];
if (score < 0.7) continue;
const xCenter = rows[i];
const yCenter = rows[i + 1];
const width = rows[i + 2];
const height = rows[i + 3];
const xMin = xCenter - width / 2.0; // topLeft
const yMin = yCenter - height / 2.0; // topLeft
const leftEyeX = rowOutput[i + 5];
const leftEyeY = rowOutput[i + 6];
const rightEyeX = rowOutput[i + 7];
const rightEyeY = rowOutput[i + 8];
const noseX = rowOutput[i + 9];
const noseY = rowOutput[i + 10];
const leftMouthX = rowOutput[i + 11];
const leftMouthY = rowOutput[i + 12];
const rightMouthX = rowOutput[i + 13];
const rightMouthY = rowOutput[i + 14];
const leftEyeX = rows[i + 5];
const leftEyeY = rows[i + 6];
const rightEyeX = rows[i + 7];
const rightEyeY = rows[i + 8];
const noseX = rows[i + 9];
const noseY = rows[i + 10];
const leftMouthX = rows[i + 11];
const leftMouthY = rows[i + 12];
const rightMouthX = rows[i + 13];
const rightMouthY = rows[i + 14];
const box = new Box({
x: xMin,
@ -491,6 +477,43 @@ function normalizeLandmarks(
) as Array<[number, number]>;
}
async function extractFaceImagesToFloat32(
faceAlignments: Array<FaceAlignment>,
faceSize: number,
image: ImageBitmap,
): Promise<Float32Array> {
const faceData = new Float32Array(
faceAlignments.length * faceSize * faceSize * 3,
);
for (let i = 0; i < faceAlignments.length; i++) {
const alignedFace = faceAlignments[i];
const faceDataOffset = i * faceSize * faceSize * 3;
warpAffineFloat32List(
image,
alignedFace,
faceSize,
faceData,
faceDataOffset,
);
}
return faceData;
}
const makeFaceID = (detectedFace: DetectedFace, imageDims: Dimensions) => {
const part = (v: number) => clamp(v, 0.0, 0.999999).toFixed(5).substring(2);
const xMin = part(detectedFace.detection.box.x / imageDims.width);
const yMin = part(detectedFace.detection.box.y / imageDims.height);
const xMax = part(
(detectedFace.detection.box.x + detectedFace.detection.box.width) /
imageDims.width,
);
const yMax = part(
(detectedFace.detection.box.y + detectedFace.detection.box.height) /
imageDims.height,
);
return [detectedFace.fileId, xMin, yMin, xMax, yMax].join("_");
};
/**
* Laplacian blur detection.
*/
@ -506,6 +529,8 @@ const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
mobileFaceNetFaceSize,
mobileFaceNetFaceSize,
);
const laplacian = applyLaplacian(faceImage, direction);
blurValues.push(matrixVariance(laplacian));
@ -738,6 +763,12 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
return blob;
};
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
};
const getFaceCrop = (
imageBitmap: ImageBitmap,
faceDetection: FaceDetection,
@ -766,24 +797,68 @@ const getFaceCrop = (
};
};
async function extractFaceImagesToFloat32(
faceAlignments: Array<FaceAlignment>,
faceSize: number,
image: ImageBitmap,
): Promise<Float32Array> {
const faceData = new Float32Array(
faceAlignments.length * faceSize * faceSize * 3,
);
for (let i = 0; i < faceAlignments.length; i++) {
const alignedFace = faceAlignments[i];
const faceDataOffset = i * faceSize * faceSize * 3;
warpAffineFloat32List(
image,
alignedFace,
faceSize,
faceData,
faceDataOffset,
export function cropWithRotation(
imageBitmap: ImageBitmap,
cropBox: Box,
rotation?: number,
maxSize?: Dimensions,
minSize?: Dimensions,
) {
const box = cropBox.round();
const outputSize = { width: box.width, height: box.height };
if (maxSize) {
const minScale = Math.min(
maxSize.width / box.width,
maxSize.height / box.height,
);
if (minScale < 1) {
outputSize.width = Math.round(minScale * box.width);
outputSize.height = Math.round(minScale * box.height);
}
}
return faceData;
if (minSize) {
const maxScale = Math.max(
minSize.width / box.width,
minSize.height / box.height,
);
if (maxScale > 1) {
outputSize.width = Math.round(maxScale * box.width);
outputSize.height = Math.round(maxScale * box.height);
}
}
// log.info({ imageBitmap, box, outputSize });
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
rotation && offscreenCtx.rotate(rotation);
const outputBox = new Box({
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
});
const enlargedBox = enlargeBox(box, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
}

View file

@ -3,93 +3,22 @@ import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import { Matrix, inverse } from "ml-matrix";
import DownloadManager from "services/download";
import { Box, Dimensions, enlargeBox } from "services/face/geom";
import {
DetectedFace,
FaceAlignment,
MLSyncFileContext,
} from "services/face/types";
import { FaceAlignment } from "services/face/types";
import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";
import { getRenderableImage } from "utils/file";
export const fetchImageBitmapForContext = async (
fileContext: MLSyncFileContext,
) => {
if (fileContext.imageBitmap) {
return fileContext.imageBitmap;
}
if (fileContext.localFile) {
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
throw new Error("Local file of only image type is supported");
}
fileContext.imageBitmap = await getLocalFileImageBitmap(
fileContext.enteFile,
fileContext.localFile,
);
} else if (
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
fileContext.enteFile.metadata.fileType,
)
) {
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
} else {
// TODO-ML(MR): We don't do it on videos, when will we ever come
// here?
fileContext.imageBitmap = await getThumbnailImageBitmap(
fileContext.enteFile,
);
}
const { width, height } = fileContext.imageBitmap;
fileContext.newMlFile.imageDimensions = { width, height };
return fileContext.imageBitmap;
};
/**
* Clamp {@link value} to between {@link min} and {@link max}, inclusive.
*/
export const clamp = (value: number, min: number, max: number) =>
Math.min(max, Math.max(min, value));
export async function getLocalFile(fileId: number) {
const localFiles = await getLocalFiles();
return localFiles.find((f) => f.id === fileId);
}
export function getFaceId(detectedFace: DetectedFace, imageDims: Dimensions) {
const xMin = clamp(
detectedFace.detection.box.x / imageDims.width,
0.0,
0.999999,
)
.toFixed(5)
.substring(2);
const yMin = clamp(
detectedFace.detection.box.y / imageDims.height,
0.0,
0.999999,
)
.toFixed(5)
.substring(2);
const xMax = clamp(
(detectedFace.detection.box.x + detectedFace.detection.box.width) /
imageDims.width,
0.0,
0.999999,
)
.toFixed(5)
.substring(2);
const yMax = clamp(
(detectedFace.detection.box.y + detectedFace.detection.box.height) /
imageDims.height,
0.0,
0.999999,
)
.toFixed(5)
.substring(2);
const rawFaceID = `${xMin}_${yMin}_${xMax}_${yMax}`;
const faceID = `${detectedFace.fileId}_${rawFaceID}`;
return faceID;
}
export const fetchImageBitmap = async (file: EnteFile) =>
fetchRenderableBlob(file).then(createImageBitmap);
@ -123,49 +52,18 @@ export async function getLocalFileImageBitmap(
return createImageBitmap(fileBlob);
}
export function normalizePixelBetween0And1(pixelValue: number) {
return pixelValue / 255.0;
}
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
return pixelValue / 127.5 - 1.0;
}
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
}
export function readPixelColor(
imageData: Uint8ClampedArray,
width: number,
height: number,
x: number,
y: number,
) {
if (x < 0 || x >= width || y < 0 || y >= height) {
return { r: 0, g: 0, b: 0, a: 0 };
}
const index = (y * width + x) * 4;
return {
r: imageData[index],
g: imageData[index + 1],
b: imageData[index + 2],
a: imageData[index + 3],
};
}
export function clamp(value: number, min: number, max: number) {
return Math.min(max, Math.max(min, value));
}
export function getPixelBicubic(
/**
* Returns the pixel value (RGB) at the given coordinates ({@link fx},
* {@link fy}) using bicubic interpolation.
*/
export function pixelRGBBicubic(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
// Clamp to image boundaries.
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
@ -180,40 +78,35 @@ export function getPixelBicubic(
const dx = fx - x;
const dy = fy - y;
function cubic(
const cubic = (
dx: number,
ipp: number,
icp: number,
inp: number,
iap: number,
) {
return (
icp +
0.5 *
(dx * (-ipp + inp) +
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
);
}
) =>
icp +
0.5 *
(dx * (-ipp + inp) +
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap));
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
const icc = pixelRGBA(imageData, imageWidth, imageHeight, x, y);
const ipp =
px < 0 || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
: pixelRGBA(imageData, imageWidth, imageHeight, px, py);
const icp =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, x, py);
const inp =
py < 0 || nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
: pixelRGBA(imageData, imageWidth, imageHeight, nx, py);
const iap =
ax >= imageWidth || py < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
: pixelRGBA(imageData, imageWidth, imageHeight, ax, py);
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
@ -221,17 +114,15 @@ export function getPixelBicubic(
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
const ipc =
px < 0
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, px, y);
const inc =
nx >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
: pixelRGBA(imageData, imageWidth, imageHeight, nx, y);
const iac =
ax >= imageWidth
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
: pixelRGBA(imageData, imageWidth, imageHeight, ax, y);
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
@ -241,19 +132,19 @@ export function getPixelBicubic(
const ipn =
px < 0 || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
: pixelRGBA(imageData, imageWidth, imageHeight, px, ny);
const icn =
ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
: pixelRGBA(imageData, imageWidth, imageHeight, x, ny);
const inn =
nx >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
: pixelRGBA(imageData, imageWidth, imageHeight, nx, ny);
const ian =
ax >= imageWidth || ny >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
: pixelRGBA(imageData, imageWidth, imageHeight, ax, ny);
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
@ -263,19 +154,19 @@ export function getPixelBicubic(
const ipa =
px < 0 || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
: pixelRGBA(imageData, imageWidth, imageHeight, px, ay);
const ica =
ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
: pixelRGBA(imageData, imageWidth, imageHeight, x, ay);
const ina =
nx >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
: pixelRGBA(imageData, imageWidth, imageHeight, nx, ay);
const iaa =
ax >= imageWidth || ay >= imageHeight
? icc
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
: pixelRGBA(imageData, imageWidth, imageHeight, ax, ay);
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
@ -290,19 +181,41 @@ export function getPixelBicubic(
return { r: c0, g: c1, b: c2 };
}
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
export function getPixelBilinear(
const pixelRGBA = (
imageData: Uint8ClampedArray,
width: number,
height: number,
x: number,
y: number,
) => {
if (x < 0 || x >= width || y < 0 || y >= height) {
return { r: 0, g: 0, b: 0, a: 0 };
}
const index = (y * width + x) * 4;
return {
r: imageData[index],
g: imageData[index + 1],
b: imageData[index + 2],
a: imageData[index + 3],
};
};
/**
* Returns the pixel value (RGB) at the given coordinates ({@link fx},
* {@link fy}) using bilinear interpolation.
*/
export function pixelRGBBilinear(
fx: number,
fy: number,
imageData: Uint8ClampedArray,
imageWidth: number,
imageHeight: number,
) {
// Clamp to image boundaries
// Clamp to image boundaries.
fx = clamp(fx, 0, imageWidth - 1);
fy = clamp(fy, 0, imageHeight - 1);
// Get the surrounding coordinates and their weights
// Get the surrounding coordinates and their weights.
const x0 = Math.floor(fx);
const x1 = Math.ceil(fx);
const y0 = Math.floor(fy);
@ -312,27 +225,26 @@ export function getPixelBilinear(
const dx1 = 1.0 - dx;
const dy1 = 1.0 - dy;
// Get the original pixels
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
// Get the original pixels.
const pixel1 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y0);
const pixel2 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y0);
const pixel3 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y1);
const pixel4 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y1);
function bilinear(val1: number, val2: number, val3: number, val4: number) {
return Math.round(
const bilinear = (val1: number, val2: number, val3: number, val4: number) =>
Math.round(
val1 * dx1 * dy1 +
val2 * dx * dy1 +
val3 * dx1 * dy +
val4 * dx * dy,
);
}
// Interpolate the pixel values
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
return { r: red, g: green, b: blue };
// Return interpolated pixel colors.
return {
r: bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r),
g: bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g),
b: bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b),
};
}
export function warpAffineFloat32List(
@ -342,7 +254,7 @@ export function warpAffineFloat32List(
inputData: Float32Array,
inputStartIndex: number,
): void {
// Get the pixel data
// Get the pixel data.
const offscreenCanvas = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
@ -382,8 +294,8 @@ export function warpAffineFloat32List(
const yOrigin =
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
// Get the pixel from interpolation
const pixel = getPixelBicubic(
// Get the pixel RGB using bicubic interpolation.
const { r, g, b } = pixelRGBBicubic(
xOrigin,
yOrigin,
pixelData,
@ -394,20 +306,26 @@ export function warpAffineFloat32List(
// Set the pixel in the input data
const index = (yTrans * faceSize + xTrans) * 3;
inputData[inputStartIndex + index] =
normalizePixelBetweenMinus1And1(pixel.r);
normalizePixelBetweenMinus1And1(r);
inputData[inputStartIndex + index + 1] =
normalizePixelBetweenMinus1And1(pixel.g);
normalizePixelBetweenMinus1And1(g);
inputData[inputStartIndex + index + 2] =
normalizePixelBetweenMinus1And1(pixel.b);
normalizePixelBetweenMinus1And1(b);
}
}
}
const normalizePixelBetweenMinus1And1 = (pixelValue: number) =>
pixelValue / 127.5 - 1.0;
const unnormalizePixelFromBetweenMinus1And1 = (pixelValue: number) =>
clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
export function createGrayscaleIntMatrixFromNormalized2List(
imageList: Float32Array,
faceNumber: number,
width: number = 112,
height: number = 112,
width: number,
height: number,
): number[][] {
const startIndex = faceNumber * width * height * 3;
return Array.from({ length: height }, (_, y) =>
@ -435,155 +353,3 @@ export function createGrayscaleIntMatrixFromNormalized2List(
}),
);
}
export function resizeToSquare(img: ImageBitmap, size: number) {
const scale = size / Math.max(img.height, img.width);
const width = scale * img.width;
const height = scale * img.height;
const offscreen = new OffscreenCanvas(size, size);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingQuality = "high";
ctx.drawImage(img, 0, 0, width, height);
const resizedImage = offscreen.transferToImageBitmap();
return { image: resizedImage, width, height };
}
export function transform(
imageBitmap: ImageBitmap,
affineMat: number[][],
outputWidth: number,
outputHeight: number,
) {
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
const context = offscreen.getContext("2d");
context.imageSmoothingQuality = "high";
context.transform(
affineMat[0][0],
affineMat[1][0],
affineMat[0][1],
affineMat[1][1],
affineMat[0][2],
affineMat[1][2],
);
context.drawImage(imageBitmap, 0, 0);
return offscreen.transferToImageBitmap();
}
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
const dimensions: Dimensions = {
width: size,
height: size,
};
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
}
// these utils only work in env where OffscreenCanvas is available
export function cropWithRotation(
imageBitmap: ImageBitmap,
cropBox: Box,
rotation?: number,
maxSize?: Dimensions,
minSize?: Dimensions,
) {
const box = cropBox.round();
const outputSize = { width: box.width, height: box.height };
if (maxSize) {
const minScale = Math.min(
maxSize.width / box.width,
maxSize.height / box.height,
);
if (minScale < 1) {
outputSize.width = Math.round(minScale * box.width);
outputSize.height = Math.round(minScale * box.height);
}
}
if (minSize) {
const maxScale = Math.max(
minSize.width / box.width,
minSize.height / box.height,
);
if (maxScale > 1) {
outputSize.width = Math.round(maxScale * box.width);
outputSize.height = Math.round(maxScale * box.height);
}
}
// log.info({ imageBitmap, box, outputSize });
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
const offscreenCtx = offscreen.getContext("2d");
offscreenCtx.imageSmoothingQuality = "high";
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
rotation && offscreenCtx.rotate(rotation);
const outputBox = new Box({
x: -outputSize.width / 2,
y: -outputSize.height / 2,
width: outputSize.width,
height: outputSize.height,
});
const enlargedBox = enlargeBox(box, 1.5);
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
offscreenCtx.drawImage(
imageBitmap,
enlargedBox.x,
enlargedBox.y,
enlargedBox.width,
enlargedBox.height,
enlargedOutputBox.x,
enlargedOutputBox.y,
enlargedOutputBox.width,
enlargedOutputBox.height,
);
return offscreen.transferToImageBitmap();
}
export function addPadding(image: ImageBitmap, padding: number) {
const scale = 1 + padding * 2;
const width = scale * image.width;
const height = scale * image.height;
const offscreen = new OffscreenCanvas(width, height);
const ctx = offscreen.getContext("2d");
ctx.imageSmoothingEnabled = false;
ctx.drawImage(
image,
width / 2 - image.width / 2,
height / 2 - image.height / 2,
image.width,
image.height,
);
return offscreen.transferToImageBitmap();
}
export interface BlobOptions {
type?: string;
quality?: number;
}
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
const offscreen = new OffscreenCanvas(
imageBitmap.width,
imageBitmap.height,
);
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
return offscreen.convertToBlob({
type: "image/jpeg",
quality: 0.8,
});
}
export async function imageBitmapFromBlob(blob: Blob) {
return createImageBitmap(blob);
}