[web] ML cleanup - Part 5/x (#1762)
This commit is contained in:
commit
996d9ccda5
2 changed files with 305 additions and 464 deletions
|
@ -1,3 +1,4 @@
|
|||
import { FILE_TYPE } from "@/media/file-type";
|
||||
import { openCache } from "@/next/blob-cache";
|
||||
import log from "@/next/log";
|
||||
import { workerBridge } from "@/next/worker/worker-bridge";
|
||||
|
@ -20,12 +21,10 @@ import type { EnteFile } from "types/file";
|
|||
import {
|
||||
clamp,
|
||||
createGrayscaleIntMatrixFromNormalized2List,
|
||||
cropWithRotation,
|
||||
fetchImageBitmapForContext,
|
||||
getFaceId,
|
||||
getPixelBilinear,
|
||||
imageBitmapToBlob,
|
||||
normalizePixelBetween0And1,
|
||||
fetchImageBitmap,
|
||||
getLocalFileImageBitmap,
|
||||
getThumbnailImageBitmap,
|
||||
pixelRGBBilinear,
|
||||
warpAffineFloat32List,
|
||||
} from "./image";
|
||||
import { transformFaceDetections } from "./transform-box";
|
||||
|
@ -70,6 +69,38 @@ export const indexFaces = async (
|
|||
return newMlFile;
|
||||
};
|
||||
|
||||
const fetchImageBitmapForContext = async (fileContext: MLSyncFileContext) => {
|
||||
if (fileContext.imageBitmap) {
|
||||
return fileContext.imageBitmap;
|
||||
}
|
||||
if (fileContext.localFile) {
|
||||
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
|
||||
throw new Error("Local file of only image type is supported");
|
||||
}
|
||||
fileContext.imageBitmap = await getLocalFileImageBitmap(
|
||||
fileContext.enteFile,
|
||||
fileContext.localFile,
|
||||
);
|
||||
} else if (
|
||||
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
|
||||
fileContext.enteFile.metadata.fileType,
|
||||
)
|
||||
) {
|
||||
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
|
||||
} else {
|
||||
// TODO-ML(MR): We don't do it on videos, when will we ever come
|
||||
// here?
|
||||
fileContext.imageBitmap = await getThumbnailImageBitmap(
|
||||
fileContext.enteFile,
|
||||
);
|
||||
}
|
||||
|
||||
const { width, height } = fileContext.imageBitmap;
|
||||
fileContext.newMlFile.imageDimensions = { width, height };
|
||||
|
||||
return fileContext.imageBitmap;
|
||||
};
|
||||
|
||||
const syncFileAnalyzeFaces = async (fileContext: MLSyncFileContext) => {
|
||||
const { newMlFile } = fileContext;
|
||||
const startTime = Date.now();
|
||||
|
@ -96,7 +127,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
|||
fileContext.newDetection = true;
|
||||
const imageBitmap = await fetchImageBitmapForContext(fileContext);
|
||||
const faceDetections = await detectFaces(imageBitmap);
|
||||
// TODO: reenable faces filtering based on width
|
||||
// TODO-ML(MR): reenable faces filtering based on width
|
||||
const detectedFaces = faceDetections?.map((detection) => {
|
||||
return {
|
||||
fileId: fileContext.enteFile.id,
|
||||
|
@ -105,7 +136,7 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
|||
});
|
||||
newMlFile.faces = detectedFaces?.map((detectedFace) => ({
|
||||
...detectedFace,
|
||||
id: getFaceId(detectedFace, newMlFile.imageDimensions),
|
||||
id: makeFaceID(detectedFace, newMlFile.imageDimensions),
|
||||
}));
|
||||
// ?.filter((f) =>
|
||||
// f.box.width > syncContext.config.faceDetection.minFaceSize
|
||||
|
@ -121,149 +152,104 @@ const syncFileFaceDetections = async (fileContext: MLSyncFileContext) => {
|
|||
const detectFaces = async (
|
||||
imageBitmap: ImageBitmap,
|
||||
): Promise<Array<FaceDetection>> => {
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
const preprocessResult = preprocessImageBitmapToFloat32ChannelsFirst(
|
||||
imageBitmap,
|
||||
640,
|
||||
640,
|
||||
);
|
||||
const data = preprocessResult.data;
|
||||
const resized = preprocessResult.newSize;
|
||||
const outputData = await workerBridge.detectFaces(data);
|
||||
const faces = getFacesFromYOLOOutput(outputData as Float32Array, 0.7);
|
||||
const inBox = newBox(0, 0, resized.width, resized.height);
|
||||
const { yoloInput, yoloSize } =
|
||||
convertToYOLOInputFloat32ChannelsFirst(imageBitmap);
|
||||
const yoloOutput = await workerBridge.detectFaces(yoloInput);
|
||||
const faces = faceDetectionsFromYOLOOutput(yoloOutput);
|
||||
const inBox = newBox(0, 0, yoloSize.width, yoloSize.height);
|
||||
const toBox = newBox(0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const faceDetections = transformFaceDetections(faces, inBox, toBox);
|
||||
|
||||
const maxFaceDistancePercent = Math.sqrt(2) / 100;
|
||||
const maxFaceDistance = imageBitmap.width * maxFaceDistancePercent;
|
||||
return removeDuplicateDetections(faceDetections, maxFaceDistance);
|
||||
};
|
||||
|
||||
const preprocessImageBitmapToFloat32ChannelsFirst = (
|
||||
imageBitmap: ImageBitmap,
|
||||
requiredWidth: number,
|
||||
requiredHeight: number,
|
||||
maintainAspectRatio: boolean = true,
|
||||
normFunction: (pixelValue: number) => number = normalizePixelBetween0And1,
|
||||
) => {
|
||||
/**
|
||||
* Convert {@link imageBitmap} into the format that the YOLO face detection
|
||||
* model expects.
|
||||
*/
|
||||
const convertToYOLOInputFloat32ChannelsFirst = (imageBitmap: ImageBitmap) => {
|
||||
const requiredWidth = 640;
|
||||
const requiredHeight = 640;
|
||||
|
||||
const width = imageBitmap.width;
|
||||
const height = imageBitmap.height;
|
||||
|
||||
// Create an OffscreenCanvas and set its size.
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
const offscreenCanvas = new OffscreenCanvas(width, height);
|
||||
const ctx = offscreenCanvas.getContext("2d");
|
||||
ctx.drawImage(imageBitmap, 0, 0, imageBitmap.width, imageBitmap.height);
|
||||
const imageData = ctx.getImageData(
|
||||
0,
|
||||
0,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
ctx.drawImage(imageBitmap, 0, 0, width, height);
|
||||
const imageData = ctx.getImageData(0, 0, width, height);
|
||||
const pixelData = imageData.data;
|
||||
|
||||
let scaleW = requiredWidth / imageBitmap.width;
|
||||
let scaleH = requiredHeight / imageBitmap.height;
|
||||
if (maintainAspectRatio) {
|
||||
const scale = Math.min(
|
||||
requiredWidth / imageBitmap.width,
|
||||
requiredHeight / imageBitmap.height,
|
||||
);
|
||||
scaleW = scale;
|
||||
scaleH = scale;
|
||||
}
|
||||
const scaledWidth = clamp(
|
||||
Math.round(imageBitmap.width * scaleW),
|
||||
0,
|
||||
requiredWidth,
|
||||
);
|
||||
const scaledHeight = clamp(
|
||||
Math.round(imageBitmap.height * scaleH),
|
||||
0,
|
||||
requiredHeight,
|
||||
);
|
||||
// Maintain aspect ratio.
|
||||
const scale = Math.min(requiredWidth / width, requiredHeight / height);
|
||||
|
||||
const processedImage = new Float32Array(
|
||||
1 * 3 * requiredWidth * requiredHeight,
|
||||
);
|
||||
const scaledWidth = clamp(Math.round(width * scale), 0, requiredWidth);
|
||||
const scaledHeight = clamp(Math.round(height * scale), 0, requiredHeight);
|
||||
|
||||
// Populate the Float32Array with normalized pixel values
|
||||
let pixelIndex = 0;
|
||||
const yoloInput = new Float32Array(1 * 3 * requiredWidth * requiredHeight);
|
||||
const yoloSize = { width: scaledWidth, height: scaledHeight };
|
||||
|
||||
// Populate the Float32Array with normalized pixel values.
|
||||
let pi = 0;
|
||||
const channelOffsetGreen = requiredHeight * requiredWidth;
|
||||
const channelOffsetBlue = 2 * requiredHeight * requiredWidth;
|
||||
for (let h = 0; h < requiredHeight; h++) {
|
||||
for (let w = 0; w < requiredWidth; w++) {
|
||||
let pixel: {
|
||||
r: number;
|
||||
g: number;
|
||||
b: number;
|
||||
};
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = { r: 114, g: 114, b: 114 };
|
||||
} else {
|
||||
pixel = getPixelBilinear(
|
||||
w / scaleW,
|
||||
h / scaleH,
|
||||
pixelData,
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
}
|
||||
processedImage[pixelIndex] = normFunction(pixel.r);
|
||||
processedImage[pixelIndex + channelOffsetGreen] = normFunction(
|
||||
pixel.g,
|
||||
);
|
||||
processedImage[pixelIndex + channelOffsetBlue] = normFunction(
|
||||
pixel.b,
|
||||
);
|
||||
pixelIndex++;
|
||||
const { r, g, b } =
|
||||
w >= scaledWidth || h >= scaledHeight
|
||||
? { r: 114, g: 114, b: 114 }
|
||||
: pixelRGBBilinear(
|
||||
w / scale,
|
||||
h / scale,
|
||||
pixelData,
|
||||
width,
|
||||
height,
|
||||
);
|
||||
yoloInput[pi] = r / 255.0;
|
||||
yoloInput[pi + channelOffsetGreen] = g / 255.0;
|
||||
yoloInput[pi + channelOffsetBlue] = b / 255.0;
|
||||
pi++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
data: processedImage,
|
||||
originalSize: {
|
||||
width: imageBitmap.width,
|
||||
height: imageBitmap.height,
|
||||
},
|
||||
newSize: { width: scaledWidth, height: scaledHeight },
|
||||
};
|
||||
return { yoloInput, yoloSize };
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rowOutput A Float32Array of shape [25200, 16], where each row
|
||||
* Extract detected faces from the YOLO's output.
|
||||
*
|
||||
* Only detections that exceed a minimum score are returned.
|
||||
*
|
||||
* @param rows A Float32Array of shape [25200, 16], where each row
|
||||
* represents a bounding box.
|
||||
*/
|
||||
const getFacesFromYOLOOutput = (
|
||||
rowOutput: Float32Array,
|
||||
minScore: number,
|
||||
): Array<FaceDetection> => {
|
||||
const faces: Array<FaceDetection> = [];
|
||||
const faceDetectionsFromYOLOOutput = (rows: Float32Array): FaceDetection[] => {
|
||||
const faces: FaceDetection[] = [];
|
||||
// Iterate over each row.
|
||||
for (let i = 0; i < rowOutput.length; i += 16) {
|
||||
const score = rowOutput[i + 4];
|
||||
if (score < minScore) {
|
||||
continue;
|
||||
}
|
||||
// The first 4 values represent the bounding box's coordinates:
|
||||
//
|
||||
// (x1, y1, x2, y2)
|
||||
//
|
||||
const xCenter = rowOutput[i];
|
||||
const yCenter = rowOutput[i + 1];
|
||||
const width = rowOutput[i + 2];
|
||||
const height = rowOutput[i + 3];
|
||||
for (let i = 0; i < rows.length; i += 16) {
|
||||
const score = rows[i + 4];
|
||||
if (score < 0.7) continue;
|
||||
|
||||
const xCenter = rows[i];
|
||||
const yCenter = rows[i + 1];
|
||||
const width = rows[i + 2];
|
||||
const height = rows[i + 3];
|
||||
const xMin = xCenter - width / 2.0; // topLeft
|
||||
const yMin = yCenter - height / 2.0; // topLeft
|
||||
|
||||
const leftEyeX = rowOutput[i + 5];
|
||||
const leftEyeY = rowOutput[i + 6];
|
||||
const rightEyeX = rowOutput[i + 7];
|
||||
const rightEyeY = rowOutput[i + 8];
|
||||
const noseX = rowOutput[i + 9];
|
||||
const noseY = rowOutput[i + 10];
|
||||
const leftMouthX = rowOutput[i + 11];
|
||||
const leftMouthY = rowOutput[i + 12];
|
||||
const rightMouthX = rowOutput[i + 13];
|
||||
const rightMouthY = rowOutput[i + 14];
|
||||
const leftEyeX = rows[i + 5];
|
||||
const leftEyeY = rows[i + 6];
|
||||
const rightEyeX = rows[i + 7];
|
||||
const rightEyeY = rows[i + 8];
|
||||
const noseX = rows[i + 9];
|
||||
const noseY = rows[i + 10];
|
||||
const leftMouthX = rows[i + 11];
|
||||
const leftMouthY = rows[i + 12];
|
||||
const rightMouthX = rows[i + 13];
|
||||
const rightMouthY = rows[i + 14];
|
||||
|
||||
const box = new Box({
|
||||
x: xMin,
|
||||
|
@ -491,6 +477,43 @@ function normalizeLandmarks(
|
|||
) as Array<[number, number]>;
|
||||
}
|
||||
|
||||
async function extractFaceImagesToFloat32(
|
||||
faceAlignments: Array<FaceAlignment>,
|
||||
faceSize: number,
|
||||
image: ImageBitmap,
|
||||
): Promise<Float32Array> {
|
||||
const faceData = new Float32Array(
|
||||
faceAlignments.length * faceSize * faceSize * 3,
|
||||
);
|
||||
for (let i = 0; i < faceAlignments.length; i++) {
|
||||
const alignedFace = faceAlignments[i];
|
||||
const faceDataOffset = i * faceSize * faceSize * 3;
|
||||
warpAffineFloat32List(
|
||||
image,
|
||||
alignedFace,
|
||||
faceSize,
|
||||
faceData,
|
||||
faceDataOffset,
|
||||
);
|
||||
}
|
||||
return faceData;
|
||||
}
|
||||
|
||||
const makeFaceID = (detectedFace: DetectedFace, imageDims: Dimensions) => {
|
||||
const part = (v: number) => clamp(v, 0.0, 0.999999).toFixed(5).substring(2);
|
||||
const xMin = part(detectedFace.detection.box.x / imageDims.width);
|
||||
const yMin = part(detectedFace.detection.box.y / imageDims.height);
|
||||
const xMax = part(
|
||||
(detectedFace.detection.box.x + detectedFace.detection.box.width) /
|
||||
imageDims.width,
|
||||
);
|
||||
const yMax = part(
|
||||
(detectedFace.detection.box.y + detectedFace.detection.box.height) /
|
||||
imageDims.height,
|
||||
);
|
||||
return [detectedFace.fileId, xMin, yMin, xMax, yMax].join("_");
|
||||
};
|
||||
|
||||
/**
|
||||
* Laplacian blur detection.
|
||||
*/
|
||||
|
@ -506,6 +529,8 @@ const detectBlur = (alignedFaces: Float32Array, faces: Face[]): number[] => {
|
|||
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
|
||||
alignedFaces,
|
||||
i,
|
||||
mobileFaceNetFaceSize,
|
||||
mobileFaceNetFaceSize,
|
||||
);
|
||||
const laplacian = applyLaplacian(faceImage, direction);
|
||||
blurValues.push(matrixVariance(laplacian));
|
||||
|
@ -738,6 +763,12 @@ export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
|
|||
return blob;
|
||||
};
|
||||
|
||||
const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
|
||||
const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
|
||||
canvas.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
|
||||
};
|
||||
|
||||
const getFaceCrop = (
|
||||
imageBitmap: ImageBitmap,
|
||||
faceDetection: FaceDetection,
|
||||
|
@ -766,24 +797,68 @@ const getFaceCrop = (
|
|||
};
|
||||
};
|
||||
|
||||
async function extractFaceImagesToFloat32(
|
||||
faceAlignments: Array<FaceAlignment>,
|
||||
faceSize: number,
|
||||
image: ImageBitmap,
|
||||
): Promise<Float32Array> {
|
||||
const faceData = new Float32Array(
|
||||
faceAlignments.length * faceSize * faceSize * 3,
|
||||
);
|
||||
for (let i = 0; i < faceAlignments.length; i++) {
|
||||
const alignedFace = faceAlignments[i];
|
||||
const faceDataOffset = i * faceSize * faceSize * 3;
|
||||
warpAffineFloat32List(
|
||||
image,
|
||||
alignedFace,
|
||||
faceSize,
|
||||
faceData,
|
||||
faceDataOffset,
|
||||
export function cropWithRotation(
|
||||
imageBitmap: ImageBitmap,
|
||||
cropBox: Box,
|
||||
rotation?: number,
|
||||
maxSize?: Dimensions,
|
||||
minSize?: Dimensions,
|
||||
) {
|
||||
const box = cropBox.round();
|
||||
|
||||
const outputSize = { width: box.width, height: box.height };
|
||||
if (maxSize) {
|
||||
const minScale = Math.min(
|
||||
maxSize.width / box.width,
|
||||
maxSize.height / box.height,
|
||||
);
|
||||
if (minScale < 1) {
|
||||
outputSize.width = Math.round(minScale * box.width);
|
||||
outputSize.height = Math.round(minScale * box.height);
|
||||
}
|
||||
}
|
||||
return faceData;
|
||||
|
||||
if (minSize) {
|
||||
const maxScale = Math.max(
|
||||
minSize.width / box.width,
|
||||
minSize.height / box.height,
|
||||
);
|
||||
if (maxScale > 1) {
|
||||
outputSize.width = Math.round(maxScale * box.width);
|
||||
outputSize.height = Math.round(maxScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
// log.info({ imageBitmap, box, outputSize });
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
rotation && offscreenCtx.rotate(rotation);
|
||||
|
||||
const outputBox = new Box({
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
});
|
||||
|
||||
const enlargedBox = enlargeBox(box, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
|
|
@ -3,93 +3,22 @@ import { decodeLivePhoto } from "@/media/live-photo";
|
|||
import log from "@/next/log";
|
||||
import { Matrix, inverse } from "ml-matrix";
|
||||
import DownloadManager from "services/download";
|
||||
import { Box, Dimensions, enlargeBox } from "services/face/geom";
|
||||
import {
|
||||
DetectedFace,
|
||||
FaceAlignment,
|
||||
MLSyncFileContext,
|
||||
} from "services/face/types";
|
||||
import { FaceAlignment } from "services/face/types";
|
||||
import { getLocalFiles } from "services/fileService";
|
||||
import { EnteFile } from "types/file";
|
||||
import { getRenderableImage } from "utils/file";
|
||||
|
||||
export const fetchImageBitmapForContext = async (
|
||||
fileContext: MLSyncFileContext,
|
||||
) => {
|
||||
if (fileContext.imageBitmap) {
|
||||
return fileContext.imageBitmap;
|
||||
}
|
||||
if (fileContext.localFile) {
|
||||
if (fileContext.enteFile.metadata.fileType !== FILE_TYPE.IMAGE) {
|
||||
throw new Error("Local file of only image type is supported");
|
||||
}
|
||||
fileContext.imageBitmap = await getLocalFileImageBitmap(
|
||||
fileContext.enteFile,
|
||||
fileContext.localFile,
|
||||
);
|
||||
} else if (
|
||||
[FILE_TYPE.IMAGE, FILE_TYPE.LIVE_PHOTO].includes(
|
||||
fileContext.enteFile.metadata.fileType,
|
||||
)
|
||||
) {
|
||||
fileContext.imageBitmap = await fetchImageBitmap(fileContext.enteFile);
|
||||
} else {
|
||||
// TODO-ML(MR): We don't do it on videos, when will we ever come
|
||||
// here?
|
||||
fileContext.imageBitmap = await getThumbnailImageBitmap(
|
||||
fileContext.enteFile,
|
||||
);
|
||||
}
|
||||
|
||||
const { width, height } = fileContext.imageBitmap;
|
||||
fileContext.newMlFile.imageDimensions = { width, height };
|
||||
|
||||
return fileContext.imageBitmap;
|
||||
};
|
||||
/**
|
||||
* Clamp {@link value} to between {@link min} and {@link max}, inclusive.
|
||||
*/
|
||||
export const clamp = (value: number, min: number, max: number) =>
|
||||
Math.min(max, Math.max(min, value));
|
||||
|
||||
export async function getLocalFile(fileId: number) {
|
||||
const localFiles = await getLocalFiles();
|
||||
return localFiles.find((f) => f.id === fileId);
|
||||
}
|
||||
|
||||
export function getFaceId(detectedFace: DetectedFace, imageDims: Dimensions) {
|
||||
const xMin = clamp(
|
||||
detectedFace.detection.box.x / imageDims.width,
|
||||
0.0,
|
||||
0.999999,
|
||||
)
|
||||
.toFixed(5)
|
||||
.substring(2);
|
||||
const yMin = clamp(
|
||||
detectedFace.detection.box.y / imageDims.height,
|
||||
0.0,
|
||||
0.999999,
|
||||
)
|
||||
.toFixed(5)
|
||||
.substring(2);
|
||||
const xMax = clamp(
|
||||
(detectedFace.detection.box.x + detectedFace.detection.box.width) /
|
||||
imageDims.width,
|
||||
0.0,
|
||||
0.999999,
|
||||
)
|
||||
.toFixed(5)
|
||||
.substring(2);
|
||||
const yMax = clamp(
|
||||
(detectedFace.detection.box.y + detectedFace.detection.box.height) /
|
||||
imageDims.height,
|
||||
0.0,
|
||||
0.999999,
|
||||
)
|
||||
.toFixed(5)
|
||||
.substring(2);
|
||||
|
||||
const rawFaceID = `${xMin}_${yMin}_${xMax}_${yMax}`;
|
||||
const faceID = `${detectedFace.fileId}_${rawFaceID}`;
|
||||
|
||||
return faceID;
|
||||
}
|
||||
|
||||
export const fetchImageBitmap = async (file: EnteFile) =>
|
||||
fetchRenderableBlob(file).then(createImageBitmap);
|
||||
|
||||
|
@ -123,49 +52,18 @@ export async function getLocalFileImageBitmap(
|
|||
return createImageBitmap(fileBlob);
|
||||
}
|
||||
|
||||
export function normalizePixelBetween0And1(pixelValue: number) {
|
||||
return pixelValue / 255.0;
|
||||
}
|
||||
|
||||
export function normalizePixelBetweenMinus1And1(pixelValue: number) {
|
||||
return pixelValue / 127.5 - 1.0;
|
||||
}
|
||||
|
||||
export function unnormalizePixelFromBetweenMinus1And1(pixelValue: number) {
|
||||
return clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
|
||||
}
|
||||
|
||||
export function readPixelColor(
|
||||
imageData: Uint8ClampedArray,
|
||||
width: number,
|
||||
height: number,
|
||||
x: number,
|
||||
y: number,
|
||||
) {
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) {
|
||||
return { r: 0, g: 0, b: 0, a: 0 };
|
||||
}
|
||||
const index = (y * width + x) * 4;
|
||||
return {
|
||||
r: imageData[index],
|
||||
g: imageData[index + 1],
|
||||
b: imageData[index + 2],
|
||||
a: imageData[index + 3],
|
||||
};
|
||||
}
|
||||
|
||||
export function clamp(value: number, min: number, max: number) {
|
||||
return Math.min(max, Math.max(min, value));
|
||||
}
|
||||
|
||||
export function getPixelBicubic(
|
||||
/**
|
||||
* Returns the pixel value (RGB) at the given coordinates ({@link fx},
|
||||
* {@link fy}) using bicubic interpolation.
|
||||
*/
|
||||
export function pixelRGBBicubic(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
// Clamp to image boundaries.
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
|
@ -180,40 +78,35 @@ export function getPixelBicubic(
|
|||
const dx = fx - x;
|
||||
const dy = fy - y;
|
||||
|
||||
function cubic(
|
||||
const cubic = (
|
||||
dx: number,
|
||||
ipp: number,
|
||||
icp: number,
|
||||
inp: number,
|
||||
iap: number,
|
||||
) {
|
||||
return (
|
||||
icp +
|
||||
0.5 *
|
||||
(dx * (-ipp + inp) +
|
||||
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
|
||||
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap))
|
||||
);
|
||||
}
|
||||
) =>
|
||||
icp +
|
||||
0.5 *
|
||||
(dx * (-ipp + inp) +
|
||||
dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
|
||||
dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap));
|
||||
|
||||
const icc = readPixelColor(imageData, imageWidth, imageHeight, x, y);
|
||||
const icc = pixelRGBA(imageData, imageWidth, imageHeight, x, y);
|
||||
|
||||
const ipp =
|
||||
px < 0 || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, py);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, px, py);
|
||||
const icp =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, py);
|
||||
px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, x, py);
|
||||
const inp =
|
||||
py < 0 || nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, py);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, nx, py);
|
||||
const iap =
|
||||
ax >= imageWidth || py < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, py);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, ax, py);
|
||||
|
||||
const ip0 = cubic(dx, ipp.r, icp.r, inp.r, iap.r);
|
||||
const ip1 = cubic(dx, ipp.g, icp.g, inp.g, iap.g);
|
||||
|
@ -221,17 +114,15 @@ export function getPixelBicubic(
|
|||
// const ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);
|
||||
|
||||
const ipc =
|
||||
px < 0
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, y);
|
||||
px < 0 ? icc : pixelRGBA(imageData, imageWidth, imageHeight, px, y);
|
||||
const inc =
|
||||
nx >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, y);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, nx, y);
|
||||
const iac =
|
||||
ax >= imageWidth
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, y);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, ax, y);
|
||||
|
||||
const ic0 = cubic(dx, ipc.r, icc.r, inc.r, iac.r);
|
||||
const ic1 = cubic(dx, ipc.g, icc.g, inc.g, iac.g);
|
||||
|
@ -241,19 +132,19 @@ export function getPixelBicubic(
|
|||
const ipn =
|
||||
px < 0 || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ny);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, px, ny);
|
||||
const icn =
|
||||
ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ny);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, x, ny);
|
||||
const inn =
|
||||
nx >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ny);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, nx, ny);
|
||||
const ian =
|
||||
ax >= imageWidth || ny >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ny);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, ax, ny);
|
||||
|
||||
const in0 = cubic(dx, ipn.r, icn.r, inn.r, ian.r);
|
||||
const in1 = cubic(dx, ipn.g, icn.g, inn.g, ian.g);
|
||||
|
@ -263,19 +154,19 @@ export function getPixelBicubic(
|
|||
const ipa =
|
||||
px < 0 || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, px, ay);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, px, ay);
|
||||
const ica =
|
||||
ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, x, ay);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, x, ay);
|
||||
const ina =
|
||||
nx >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, nx, ay);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, nx, ay);
|
||||
const iaa =
|
||||
ax >= imageWidth || ay >= imageHeight
|
||||
? icc
|
||||
: readPixelColor(imageData, imageWidth, imageHeight, ax, ay);
|
||||
: pixelRGBA(imageData, imageWidth, imageHeight, ax, ay);
|
||||
|
||||
const ia0 = cubic(dx, ipa.r, ica.r, ina.r, iaa.r);
|
||||
const ia1 = cubic(dx, ipa.g, ica.g, ina.g, iaa.g);
|
||||
|
@ -290,19 +181,41 @@ export function getPixelBicubic(
|
|||
return { r: c0, g: c1, b: c2 };
|
||||
}
|
||||
|
||||
/// Returns the pixel value (RGB) at the given coordinates using bilinear interpolation.
|
||||
export function getPixelBilinear(
|
||||
const pixelRGBA = (
|
||||
imageData: Uint8ClampedArray,
|
||||
width: number,
|
||||
height: number,
|
||||
x: number,
|
||||
y: number,
|
||||
) => {
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) {
|
||||
return { r: 0, g: 0, b: 0, a: 0 };
|
||||
}
|
||||
const index = (y * width + x) * 4;
|
||||
return {
|
||||
r: imageData[index],
|
||||
g: imageData[index + 1],
|
||||
b: imageData[index + 2],
|
||||
a: imageData[index + 3],
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the pixel value (RGB) at the given coordinates ({@link fx},
|
||||
* {@link fy}) using bilinear interpolation.
|
||||
*/
|
||||
export function pixelRGBBilinear(
|
||||
fx: number,
|
||||
fy: number,
|
||||
imageData: Uint8ClampedArray,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
) {
|
||||
// Clamp to image boundaries
|
||||
// Clamp to image boundaries.
|
||||
fx = clamp(fx, 0, imageWidth - 1);
|
||||
fy = clamp(fy, 0, imageHeight - 1);
|
||||
|
||||
// Get the surrounding coordinates and their weights
|
||||
// Get the surrounding coordinates and their weights.
|
||||
const x0 = Math.floor(fx);
|
||||
const x1 = Math.ceil(fx);
|
||||
const y0 = Math.floor(fy);
|
||||
|
@ -312,27 +225,26 @@ export function getPixelBilinear(
|
|||
const dx1 = 1.0 - dx;
|
||||
const dy1 = 1.0 - dy;
|
||||
|
||||
// Get the original pixels
|
||||
const pixel1 = readPixelColor(imageData, imageWidth, imageHeight, x0, y0);
|
||||
const pixel2 = readPixelColor(imageData, imageWidth, imageHeight, x1, y0);
|
||||
const pixel3 = readPixelColor(imageData, imageWidth, imageHeight, x0, y1);
|
||||
const pixel4 = readPixelColor(imageData, imageWidth, imageHeight, x1, y1);
|
||||
// Get the original pixels.
|
||||
const pixel1 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y0);
|
||||
const pixel2 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y0);
|
||||
const pixel3 = pixelRGBA(imageData, imageWidth, imageHeight, x0, y1);
|
||||
const pixel4 = pixelRGBA(imageData, imageWidth, imageHeight, x1, y1);
|
||||
|
||||
function bilinear(val1: number, val2: number, val3: number, val4: number) {
|
||||
return Math.round(
|
||||
const bilinear = (val1: number, val2: number, val3: number, val4: number) =>
|
||||
Math.round(
|
||||
val1 * dx1 * dy1 +
|
||||
val2 * dx * dy1 +
|
||||
val3 * dx1 * dy +
|
||||
val4 * dx * dy,
|
||||
);
|
||||
}
|
||||
|
||||
// Interpolate the pixel values
|
||||
const red = bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r);
|
||||
const green = bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g);
|
||||
const blue = bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b);
|
||||
|
||||
return { r: red, g: green, b: blue };
|
||||
// Return interpolated pixel colors.
|
||||
return {
|
||||
r: bilinear(pixel1.r, pixel2.r, pixel3.r, pixel4.r),
|
||||
g: bilinear(pixel1.g, pixel2.g, pixel3.g, pixel4.g),
|
||||
b: bilinear(pixel1.b, pixel2.b, pixel3.b, pixel4.b),
|
||||
};
|
||||
}
|
||||
|
||||
export function warpAffineFloat32List(
|
||||
|
@ -342,7 +254,7 @@ export function warpAffineFloat32List(
|
|||
inputData: Float32Array,
|
||||
inputStartIndex: number,
|
||||
): void {
|
||||
// Get the pixel data
|
||||
// Get the pixel data.
|
||||
const offscreenCanvas = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
|
@ -382,8 +294,8 @@ export function warpAffineFloat32List(
|
|||
const yOrigin =
|
||||
a10Prime * (xTrans - b00) + a11Prime * (yTrans - b10);
|
||||
|
||||
// Get the pixel from interpolation
|
||||
const pixel = getPixelBicubic(
|
||||
// Get the pixel RGB using bicubic interpolation.
|
||||
const { r, g, b } = pixelRGBBicubic(
|
||||
xOrigin,
|
||||
yOrigin,
|
||||
pixelData,
|
||||
|
@ -394,20 +306,26 @@ export function warpAffineFloat32List(
|
|||
// Set the pixel in the input data
|
||||
const index = (yTrans * faceSize + xTrans) * 3;
|
||||
inputData[inputStartIndex + index] =
|
||||
normalizePixelBetweenMinus1And1(pixel.r);
|
||||
normalizePixelBetweenMinus1And1(r);
|
||||
inputData[inputStartIndex + index + 1] =
|
||||
normalizePixelBetweenMinus1And1(pixel.g);
|
||||
normalizePixelBetweenMinus1And1(g);
|
||||
inputData[inputStartIndex + index + 2] =
|
||||
normalizePixelBetweenMinus1And1(pixel.b);
|
||||
normalizePixelBetweenMinus1And1(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const normalizePixelBetweenMinus1And1 = (pixelValue: number) =>
|
||||
pixelValue / 127.5 - 1.0;
|
||||
|
||||
const unnormalizePixelFromBetweenMinus1And1 = (pixelValue: number) =>
|
||||
clamp(Math.round((pixelValue + 1.0) * 127.5), 0, 255);
|
||||
|
||||
export function createGrayscaleIntMatrixFromNormalized2List(
|
||||
imageList: Float32Array,
|
||||
faceNumber: number,
|
||||
width: number = 112,
|
||||
height: number = 112,
|
||||
width: number,
|
||||
height: number,
|
||||
): number[][] {
|
||||
const startIndex = faceNumber * width * height * 3;
|
||||
return Array.from({ length: height }, (_, y) =>
|
||||
|
@ -435,155 +353,3 @@ export function createGrayscaleIntMatrixFromNormalized2List(
|
|||
}),
|
||||
);
|
||||
}
|
||||
|
||||
export function resizeToSquare(img: ImageBitmap, size: number) {
|
||||
const scale = size / Math.max(img.height, img.width);
|
||||
const width = scale * img.width;
|
||||
const height = scale * img.height;
|
||||
const offscreen = new OffscreenCanvas(size, size);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingQuality = "high";
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
const resizedImage = offscreen.transferToImageBitmap();
|
||||
return { image: resizedImage, width, height };
|
||||
}
|
||||
|
||||
export function transform(
|
||||
imageBitmap: ImageBitmap,
|
||||
affineMat: number[][],
|
||||
outputWidth: number,
|
||||
outputHeight: number,
|
||||
) {
|
||||
const offscreen = new OffscreenCanvas(outputWidth, outputHeight);
|
||||
const context = offscreen.getContext("2d");
|
||||
context.imageSmoothingQuality = "high";
|
||||
|
||||
context.transform(
|
||||
affineMat[0][0],
|
||||
affineMat[1][0],
|
||||
affineMat[0][1],
|
||||
affineMat[1][1],
|
||||
affineMat[0][2],
|
||||
affineMat[1][2],
|
||||
);
|
||||
|
||||
context.drawImage(imageBitmap, 0, 0);
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function crop(imageBitmap: ImageBitmap, cropBox: Box, size: number) {
|
||||
const dimensions: Dimensions = {
|
||||
width: size,
|
||||
height: size,
|
||||
};
|
||||
|
||||
return cropWithRotation(imageBitmap, cropBox, 0, dimensions, dimensions);
|
||||
}
|
||||
|
||||
// these utils only work in env where OffscreenCanvas is available
|
||||
|
||||
export function cropWithRotation(
|
||||
imageBitmap: ImageBitmap,
|
||||
cropBox: Box,
|
||||
rotation?: number,
|
||||
maxSize?: Dimensions,
|
||||
minSize?: Dimensions,
|
||||
) {
|
||||
const box = cropBox.round();
|
||||
|
||||
const outputSize = { width: box.width, height: box.height };
|
||||
if (maxSize) {
|
||||
const minScale = Math.min(
|
||||
maxSize.width / box.width,
|
||||
maxSize.height / box.height,
|
||||
);
|
||||
if (minScale < 1) {
|
||||
outputSize.width = Math.round(minScale * box.width);
|
||||
outputSize.height = Math.round(minScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
if (minSize) {
|
||||
const maxScale = Math.max(
|
||||
minSize.width / box.width,
|
||||
minSize.height / box.height,
|
||||
);
|
||||
if (maxScale > 1) {
|
||||
outputSize.width = Math.round(maxScale * box.width);
|
||||
outputSize.height = Math.round(maxScale * box.height);
|
||||
}
|
||||
}
|
||||
|
||||
// log.info({ imageBitmap, box, outputSize });
|
||||
|
||||
const offscreen = new OffscreenCanvas(outputSize.width, outputSize.height);
|
||||
const offscreenCtx = offscreen.getContext("2d");
|
||||
offscreenCtx.imageSmoothingQuality = "high";
|
||||
|
||||
offscreenCtx.translate(outputSize.width / 2, outputSize.height / 2);
|
||||
rotation && offscreenCtx.rotate(rotation);
|
||||
|
||||
const outputBox = new Box({
|
||||
x: -outputSize.width / 2,
|
||||
y: -outputSize.height / 2,
|
||||
width: outputSize.width,
|
||||
height: outputSize.height,
|
||||
});
|
||||
|
||||
const enlargedBox = enlargeBox(box, 1.5);
|
||||
const enlargedOutputBox = enlargeBox(outputBox, 1.5);
|
||||
|
||||
offscreenCtx.drawImage(
|
||||
imageBitmap,
|
||||
enlargedBox.x,
|
||||
enlargedBox.y,
|
||||
enlargedBox.width,
|
||||
enlargedBox.height,
|
||||
enlargedOutputBox.x,
|
||||
enlargedOutputBox.y,
|
||||
enlargedOutputBox.width,
|
||||
enlargedOutputBox.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export function addPadding(image: ImageBitmap, padding: number) {
|
||||
const scale = 1 + padding * 2;
|
||||
const width = scale * image.width;
|
||||
const height = scale * image.height;
|
||||
const offscreen = new OffscreenCanvas(width, height);
|
||||
const ctx = offscreen.getContext("2d");
|
||||
ctx.imageSmoothingEnabled = false;
|
||||
ctx.drawImage(
|
||||
image,
|
||||
width / 2 - image.width / 2,
|
||||
height / 2 - image.height / 2,
|
||||
image.width,
|
||||
image.height,
|
||||
);
|
||||
|
||||
return offscreen.transferToImageBitmap();
|
||||
}
|
||||
|
||||
export interface BlobOptions {
|
||||
type?: string;
|
||||
quality?: number;
|
||||
}
|
||||
|
||||
export async function imageBitmapToBlob(imageBitmap: ImageBitmap) {
|
||||
const offscreen = new OffscreenCanvas(
|
||||
imageBitmap.width,
|
||||
imageBitmap.height,
|
||||
);
|
||||
offscreen.getContext("2d").drawImage(imageBitmap, 0, 0);
|
||||
|
||||
return offscreen.convertToBlob({
|
||||
type: "image/jpeg",
|
||||
quality: 0.8,
|
||||
});
|
||||
}
|
||||
|
||||
export async function imageBitmapFromBlob(blob: Blob) {
|
||||
return createImageBitmap(blob);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue