Align faces with bilinear interpolation in dart

This commit is contained in:
laurenspriem 2024-03-11 18:50:00 +05:30
parent ebc69b645e
commit 5b3519ea38
2 changed files with 175 additions and 198 deletions

View file

@ -286,8 +286,8 @@ class ImageMlIsolate {
_resetInactivityTimer();
} else {
_logger.info(
'Clustering Isolate has been inactive for ${_inactivityDuration.inSeconds} seconds with no tasks running. Killing isolate.',
);
'Clustering Isolate has been inactive for ${_inactivityDuration.inSeconds} seconds with no tasks running. Killing isolate.',
);
dispose();
}
});

View file

@ -167,6 +167,34 @@ List<List<int>> createGrayscaleIntMatrixFromImage(
);
}
List<List<int>> createGrayscaleIntMatrixFromNormalized2List(
Float32List imageList,
int startIndex, {
int width = 112,
int height = 112,
}) {
return List.generate(
height,
(y) => List.generate(
width,
(x) {
// 0.299 Red + 0.587 Green + 0.114 Blue
final pixelIndex = startIndex + 3 * (y * width + x);
return (0.299 * unnormalizePixelRange2(imageList[pixelIndex]) +
0.587 * unnormalizePixelRange2(imageList[pixelIndex + 1]) +
0.114 * unnormalizePixelRange2(imageList[pixelIndex + 2]))
.round()
.clamp(0, 255);
// return unnormalizePixelRange2(
// (0.299 * imageList[pixelIndex] +
// 0.587 * imageList[pixelIndex + 1] +
// 0.114 * imageList[pixelIndex + 2]),
// ).round().clamp(0, 255);
},
),
);
}
Float32List createFloat32ListFromImageChannelsFirst(
Image image,
ByteData byteDataRgba, {
@ -239,6 +267,13 @@ double normalizePixelRange2(num pixelValue) {
return (pixelValue / 127.5) - 1;
}
/// Function unnormalizes the pixel value to be in range [0, 255].
///
/// It assumes that the pixel value is originally in range [-1, 1]
int unnormalizePixelRange2(double pixelValue) {
return ((pixelValue + 1) * 127.5).round().clamp(0, 255);
}
/// Function normalizes the pixel value to be in range [0, 1].
///
/// It assumes that the pixel value is originally in range [0, 255]
@ -816,8 +851,9 @@ Future<
return (alignedImages, alignmentResults, isBlurs, blurValues, originalSize);
}
@Deprecated("Old image manipulation that used canvas, causing issues on iOS")
Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
preprocessToMobileFaceNetFloat32List(
preprocessToMobileFaceNetFloat32ListCanvas(
String imagePath,
List<FaceDetectionRelative> relativeFaces, {
int width = 112,
@ -905,145 +941,113 @@ Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
);
}
/// Function to warp an image [imageData] with an affine transformation using the estimated [transformationMatrix].
///
/// Returns the warped image in the specified width and height, in [Uint8List] RGBA format.
Future<Uint8List> warpAffineToUint8List(
Image inputImage,
ByteData imgByteDataRgba,
List<List<double>> transformationMatrix, {
required int width,
required int height,
Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
preprocessToMobileFaceNetFloat32List(
String imagePath,
List<FaceDetectionRelative> relativeFaces, {
int width = 112,
int height = 112,
}) async {
final Uint8List outputList = Uint8List(4 * width * height);
final Uint8List imageData = await File(imagePath).readAsBytes();
final stopwatch = Stopwatch()..start();
final Image image = await decodeImageFromData(imageData);
final imageByteData = await getByteDataFromImage(image);
stopwatch.stop();
log("Face Alignment decoding ui image took: ${stopwatch.elapsedMilliseconds} ms");
final Size originalSize =
Size(image.width.toDouble(), image.height.toDouble());
if (width != 112 || height != 112) {
throw Exception(
'Width and height must be 112, other transformations are not supported yet.',
);
}
final A = Matrix.fromList([
[transformationMatrix[0][0], transformationMatrix[0][1]],
[transformationMatrix[1][0], transformationMatrix[1][1]],
]);
final aInverse = A.inverse();
// final aInverseMinus = aInverse * -1;
final B = Vector.fromList(
[transformationMatrix[0][2], transformationMatrix[1][2]],
final List<FaceDetectionAbsolute> absoluteFaces =
relativeToAbsoluteDetections(
relativeDetections: relativeFaces,
imageWidth: image.width,
imageHeight: image.height,
);
final b00 = B[0];
final b10 = B[1];
final a00Prime = aInverse[0][0];
final a01Prime = aInverse[0][1];
final a10Prime = aInverse[1][0];
final a11Prime = aInverse[1][1];
for (int yTrans = 0; yTrans < height; ++yTrans) {
for (int xTrans = 0; xTrans < width; ++xTrans) {
// Perform inverse affine transformation (original implementation, intuitive but slow)
// final X = aInverse * (Vector.fromList([xTrans, yTrans]) - B);
// final X = aInverseMinus * (B - [xTrans, yTrans]);
// final xList = X.asFlattenedList;
// num xOrigin = xList[0];
// num yOrigin = xList[1];
final List<List<List<double>>> faceLandmarks =
absoluteFaces.map((face) => face.allKeypoints).toList();
// Perform inverse affine transformation (fast implementation, less intuitive)
num xOrigin = (xTrans - b00) * a00Prime + (yTrans - b10) * a01Prime;
num yOrigin = (xTrans - b00) * a10Prime + (yTrans - b10) * a11Prime;
final alignedImagesFloat32List =
Float32List(3 * width * height * faceLandmarks.length);
final alignmentResults = <AlignmentResult>[];
final isBlurs = <bool>[];
final blurValues = <double>[];
// Clamp to image boundaries
xOrigin = xOrigin.clamp(0, inputImage.width - 1);
yOrigin = yOrigin.clamp(0, inputImage.height - 1);
// Bilinear interpolation
final int x0 = xOrigin.floor();
final int x1 = xOrigin.ceil();
final int y0 = yOrigin.floor();
final int y1 = yOrigin.ceil();
// Get the original pixels
final Color pixel1 = readPixelColor(inputImage, imgByteDataRgba, x0, y0);
final Color pixel2 = readPixelColor(inputImage, imgByteDataRgba, x1, y0);
final Color pixel3 = readPixelColor(inputImage, imgByteDataRgba, x0, y1);
final Color pixel4 = readPixelColor(inputImage, imgByteDataRgba, x1, y1);
// Calculate the weights for each pixel
final fx = xOrigin - x0;
final fy = yOrigin - y0;
final fx1 = 1.0 - fx;
final fy1 = 1.0 - fy;
// Calculate the weighted sum of pixels
final int r = bilinearInterpolation(
pixel1.red,
pixel2.red,
pixel3.red,
pixel4.red,
fx,
fy,
fx1,
fy1,
);
final int g = bilinearInterpolation(
pixel1.green,
pixel2.green,
pixel3.green,
pixel4.green,
fx,
fy,
fx1,
fy1,
);
final int b = bilinearInterpolation(
pixel1.blue,
pixel2.blue,
pixel3.blue,
pixel4.blue,
fx,
fy,
fx1,
fy1,
);
// Set the new pixel
outputList[4 * (yTrans * width + xTrans)] = r;
outputList[4 * (yTrans * width + xTrans) + 1] = g;
outputList[4 * (yTrans * width + xTrans) + 2] = b;
outputList[4 * (yTrans * width + xTrans) + 3] = 255;
int alignedImageIndex = 0;
for (final faceLandmark in faceLandmarks) {
final (alignmentResult, correctlyEstimated) =
SimilarityTransform.instance.estimate(faceLandmark);
if (!correctlyEstimated) {
alignedImageIndex += 3 * width * height;
alignmentResults.add(AlignmentResult.empty());
continue;
}
}
alignmentResults.add(alignmentResult);
return outputList;
warpAffineFloat32List(
image,
imageByteData,
alignmentResult.affineMatrix,
alignedImagesFloat32List,
alignedImageIndex,
);
final blurDetectionStopwatch = Stopwatch()..start();
final faceGrayMatrix = createGrayscaleIntMatrixFromNormalized2List(
alignedImagesFloat32List,
alignedImageIndex,
);
alignedImageIndex += 3 * width * height;
final grayscalems = blurDetectionStopwatch.elapsedMilliseconds;
log('creating grayscale matrix took $grayscalems ms');
final (isBlur, blurValue) = await BlurDetectionService.instance
.predictIsBlurGrayLaplacian(faceGrayMatrix);
final blurms = blurDetectionStopwatch.elapsedMilliseconds - grayscalems;
log('blur detection took $blurms ms');
log(
'total blur detection took ${blurDetectionStopwatch.elapsedMilliseconds} ms',
);
blurDetectionStopwatch.stop();
isBlurs.add(isBlur);
blurValues.add(blurValue);
}
return (
alignedImagesFloat32List,
alignmentResults,
isBlurs,
blurValues,
originalSize
);
}
/// Function to warp an image [imageData] with an affine transformation using the estimated [transformationMatrix].
///
/// Returns a [Num3DInputMatrix], potentially normalized (RGB) and ready to be used as input for a ML model.
Future<Double3DInputMatrix> warpAffineToMatrix(
void warpAffineFloat32List(
Image inputImage,
ByteData imgByteDataRgba,
List<List<double>> transformationMatrix, {
required int width,
required int height,
bool normalize = true,
}) async {
final List<List<List<double>>> outputMatrix = List.generate(
height,
(y) => List.generate(
width,
(_) => List.filled(3, 0.0),
),
);
final double Function(num) pixelValue =
normalize ? normalizePixelRange2 : (num value) => value.toDouble();
List<List<double>> affineMatrix,
Float32List outputList,
int startIndex, {
int width = 112,
int height = 112,
}) {
if (width != 112 || height != 112) {
throw Exception(
'Width and height must be 112, other transformations are not supported yet.',
);
}
final transformationMatrix = affineMatrix
.map(
(row) => row.map((e) {
if (e != 1.0) {
return e * 112;
} else {
return 1.0;
}
}).toList(),
)
.toList();
final A = Matrix.fromList([
[transformationMatrix[0][0], transformationMatrix[0][1]],
[transformationMatrix[1][0], transformationMatrix[1][1]],
@ -1070,73 +1074,21 @@ Future<Double3DInputMatrix> warpAffineToMatrix(
// num yOrigin = xList[1];
// Perform inverse affine transformation (fast implementation, less intuitive)
num xOrigin = (xTrans - b00) * a00Prime + (yTrans - b10) * a01Prime;
num yOrigin = (xTrans - b00) * a10Prime + (yTrans - b10) * a11Prime;
final num xOrigin = (xTrans - b00) * a00Prime + (yTrans - b10) * a01Prime;
final num yOrigin = (xTrans - b00) * a10Prime + (yTrans - b10) * a11Prime;
// Clamp to image boundaries
xOrigin = xOrigin.clamp(0, inputImage.width - 1);
yOrigin = yOrigin.clamp(0, inputImage.height - 1);
// Bilinear interpolation
final int x0 = xOrigin.floor();
final int x1 = xOrigin.ceil();
final int y0 = yOrigin.floor();
final int y1 = yOrigin.ceil();
// Get the original pixels
final Color pixel1 = readPixelColor(inputImage, imgByteDataRgba, x0, y0);
final Color pixel2 = readPixelColor(inputImage, imgByteDataRgba, x1, y0);
final Color pixel3 = readPixelColor(inputImage, imgByteDataRgba, x0, y1);
final Color pixel4 = readPixelColor(inputImage, imgByteDataRgba, x1, y1);
// Calculate the weights for each pixel
final fx = xOrigin - x0;
final fy = yOrigin - y0;
final fx1 = 1.0 - fx;
final fy1 = 1.0 - fy;
// Calculate the weighted sum of pixels
final int r = bilinearInterpolation(
pixel1.red,
pixel2.red,
pixel3.red,
pixel4.red,
fx,
fy,
fx1,
fy1,
);
final int g = bilinearInterpolation(
pixel1.green,
pixel2.green,
pixel3.green,
pixel4.green,
fx,
fy,
fx1,
fy1,
);
final int b = bilinearInterpolation(
pixel1.blue,
pixel2.blue,
pixel3.blue,
pixel4.blue,
fx,
fy,
fx1,
fy1,
);
final Color pixel =
getPixelBilinear(xOrigin, yOrigin, inputImage, imgByteDataRgba);
// Set the new pixel
outputMatrix[yTrans][xTrans] = [
pixelValue(r),
pixelValue(g),
pixelValue(b),
];
outputList[startIndex + 3 * (yTrans * width + xTrans)] =
normalizePixelRange2(pixel.red);
outputList[startIndex + 3 * (yTrans * width + xTrans) + 1] =
normalizePixelRange2(pixel.green);
outputList[startIndex + 3 * (yTrans * width + xTrans) + 2] =
normalizePixelRange2(pixel.blue);
}
}
return outputMatrix;
}
/// Generates a face thumbnail from [imageData] and a [faceDetection].
@ -1230,18 +1182,43 @@ Future<Uint8List> cropAndPadFaceData(
return await encodeImageToUint8List(facePadded);
}
int bilinearInterpolation(
num val1,
num val2,
num val3,
num val4,
num fx,
num fy,
num fx1,
num fy1,
) {
return (val1 * fx1 * fy1 + val2 * fx * fy1 + val3 * fx1 * fy + val4 * fx * fy)
.round();
Color getPixelBilinear(num fx, num fy, Image image, ByteData byteDataRgba) {
// Clamp to image boundaries
fx = fx.clamp(0, image.width - 1);
fy = fy.clamp(0, image.height - 1);
// Get the surrounding coordinates and their weights
final int x0 = fx.floor();
final int x1 = fx.ceil();
final int y0 = fy.floor();
final int y1 = fy.ceil();
final dx = fx - x0;
final dy = fy - y0;
final dx1 = 1.0 - dx;
final dy1 = 1.0 - dy;
// Get the original pixels
final Color pixel1 = readPixelColor(image, byteDataRgba, x0, y0);
final Color pixel2 = readPixelColor(image, byteDataRgba, x1, y0);
final Color pixel3 = readPixelColor(image, byteDataRgba, x0, y1);
final Color pixel4 = readPixelColor(image, byteDataRgba, x1, y1);
int bilinear(
num val1,
num val2,
num val3,
num val4,
) =>
(val1 * dx1 * dy1 + val2 * dx * dy1 + val3 * dx1 * dy + val4 * dx * dy)
.round();
// Calculate the weighted sum of pixels
final int r = bilinear(pixel1.red, pixel2.red, pixel3.red, pixel4.red);
final int g =
bilinear(pixel1.green, pixel2.green, pixel3.green, pixel4.green);
final int b = bilinear(pixel1.blue, pixel2.blue, pixel3.blue, pixel4.blue);
return Color.fromRGBO(r, g, b, 1.0);
}
List<double> getAlignedFaceBox(AlignmentResult alignment) {