diff --git a/mobile/lib/face/model/dimension.dart b/mobile/lib/face/model/dimension.dart new file mode 100644 index 000000000..d4ae7a3bc --- /dev/null +++ b/mobile/lib/face/model/dimension.dart @@ -0,0 +1,25 @@ +class Dimensions { + final int width; + final int height; + + const Dimensions({required this.width, required this.height}); + + @override + String toString() { + return 'Dimensions(width: $width, height: $height})'; + } + + Map toJson() { + return { + 'width': width, + 'height': height, + }; + } + + factory Dimensions.fromJson(Map json) { + return Dimensions( + width: json['width'] as int, + height: json['height'] as int, + ); + } +} diff --git a/mobile/lib/services/machine_learning/face_ml/face_detection/detection.dart b/mobile/lib/services/machine_learning/face_ml/face_detection/detection.dart index b1f7a9bcd..7aa088141 100644 --- a/mobile/lib/services/machine_learning/face_ml/face_detection/detection.dart +++ b/mobile/lib/services/machine_learning/face_ml/face_detection/detection.dart @@ -1,5 +1,6 @@ import 'dart:math' show sqrt, pow; -import 'dart:ui' show Size; + +import "package:photos/face/model/dimension.dart"; abstract class Detection { final double score; @@ -179,8 +180,8 @@ class FaceDetectionRelative extends Detection { } void correctForMaintainedAspectRatio( - Size originalSize, - Size newSize, + Dimensions originalSize, + Dimensions newSize, ) { // Return if both are the same size, meaning no scaling was done on both width and height if (originalSize == newSize) { diff --git a/mobile/lib/services/machine_learning/face_ml/face_detection/yolov5face/onnx_face_detection.dart b/mobile/lib/services/machine_learning/face_ml/face_detection/yolov5face/onnx_face_detection.dart index a2138fe7a..83ff20d3d 100644 --- a/mobile/lib/services/machine_learning/face_ml/face_detection/yolov5face/onnx_face_detection.dart +++ b/mobile/lib/services/machine_learning/face_ml/face_detection/yolov5face/onnx_face_detection.dart @@ -9,6 +9,7 @@ import "package:computer/computer.dart"; import 'package:flutter/material.dart'; import 'package:logging/logging.dart'; import 'package:onnxruntime/onnxruntime.dart'; +import "package:photos/face/model/dimension.dart"; import 'package:photos/services/machine_learning/face_ml/face_detection/detection.dart'; import 'package:photos/services/machine_learning/face_ml/face_detection/naive_non_max_suppression.dart'; import 'package:photos/services/machine_learning/face_ml/face_detection/yolov5face/yolo_face_detection_exceptions.dart'; @@ -143,7 +144,7 @@ class YoloOnnxFaceDetection { case FaceDetectionOperation.yoloInferenceAndPostProcessing: final inputImageList = args['inputImageList'] as Float32List; final inputShape = args['inputShape'] as List; - final newSize = args['newSize'] as Size; + final newSize = args['newSize'] as Dimensions; final sessionAddress = args['sessionAddress'] as int; final timeSentToIsolate = args['timeNow'] as DateTime; final delaySentToIsolate = @@ -249,7 +250,7 @@ class YoloOnnxFaceDetection { } /// Detects faces in the given image data. - Future<(List, Size)> predict( + Future<(List, Dimensions)> predict( Uint8List imageData, ) async { assert(isInitialized); @@ -314,7 +315,7 @@ class YoloOnnxFaceDetection { } /// Detects faces in the given image data. - static Future<(List, Size)> predictSync( + static Future<(List, Dimensions)> predictSync( ui.Image image, ByteData imageByteData, int sessionAddress, @@ -384,7 +385,7 @@ class YoloOnnxFaceDetection { } /// Detects faces in the given image data. - Future<(List, Size)> predictInIsolate( + Future<(List, Dimensions)> predictInIsolate( Uint8List imageData, ) async { await ensureSpawnedIsolate(); @@ -446,7 +447,7 @@ class YoloOnnxFaceDetection { return (relativeDetections, originalSize); } - Future<(List, Size)> predictInComputer( + Future<(List, Dimensions)> predictInComputer( String imagePath, ) async { assert(isInitialized); @@ -524,7 +525,7 @@ class YoloOnnxFaceDetection { final stopwatchDecoding = Stopwatch()..start(); final List inputImageDataLists = []; - final List<(Size, Size)> originalAndNewSizeList = []; + final List<(Dimensions, Dimensions)> originalAndNewSizeList = []; int concatenatedImageInputsLength = 0; for (final imageData in imageDataList) { final (inputImageList, originalSize, newSize) = @@ -624,9 +625,9 @@ class YoloOnnxFaceDetection { // Account for the fact that the aspect ratio was maintained for (final faceDetection in relativeDetections) { faceDetection.correctForMaintainedAspectRatio( - Size( - kInputWidth.toDouble(), - kInputHeight.toDouble(), + const Dimensions( + width: kInputWidth, + height: kInputHeight, ), originalAndNewSizeList[imageOutputToUse].$2, ); @@ -653,7 +654,7 @@ class YoloOnnxFaceDetection { static List _yoloPostProcessOutputs( List? outputs, - Size newSize, + Dimensions newSize, ) { // // Get output tensors final nestedResults = @@ -684,9 +685,9 @@ class YoloOnnxFaceDetection { // Account for the fact that the aspect ratio was maintained for (final faceDetection in relativeDetections) { faceDetection.correctForMaintainedAspectRatio( - Size( - kInputWidth.toDouble(), - kInputHeight.toDouble(), + const Dimensions( + width: kInputWidth, + height: kInputHeight, ), newSize, ); @@ -735,7 +736,7 @@ class YoloOnnxFaceDetection { ) async { final inputImageList = args['inputImageList'] as Float32List; final inputShape = args['inputShape'] as List; - final newSize = args['newSize'] as Size; + final newSize = args['newSize'] as Dimensions; final sessionAddress = args['sessionAddress'] as int; final timeSentToIsolate = args['timeNow'] as DateTime; final delaySentToIsolate = diff --git a/mobile/lib/services/machine_learning/face_ml/face_ml_result.dart b/mobile/lib/services/machine_learning/face_ml/face_ml_result.dart index 892ce8483..5ad0c4eee 100644 --- a/mobile/lib/services/machine_learning/face_ml/face_ml_result.dart +++ b/mobile/lib/services/machine_learning/face_ml/face_ml_result.dart @@ -1,7 +1,8 @@ import "dart:convert" show jsonEncode, jsonDecode; -import "package:flutter/material.dart" show Size, debugPrint, immutable; +import "package:flutter/material.dart" show debugPrint, immutable; import "package:logging/logging.dart"; +import "package:photos/face/model/dimension.dart"; import "package:photos/models/file/file.dart"; import 'package:photos/models/ml/ml_typedefs.dart'; import "package:photos/models/ml/ml_versions.dart"; @@ -284,8 +285,7 @@ class FaceMlResult { final List faces; - final Size? faceDetectionImageSize; - final Size? faceAlignmentImageSize; + final Dimensions decodedImageSize; final int mlVersion; final bool errorOccured; @@ -319,8 +319,7 @@ class FaceMlResult { required this.mlVersion, required this.errorOccured, required this.onlyThumbnailUsed, - required this.faceDetectionImageSize, - this.faceAlignmentImageSize, + required this.decodedImageSize, }); Map _toJson() => { @@ -329,16 +328,10 @@ class FaceMlResult { 'mlVersion': mlVersion, 'errorOccured': errorOccured, 'onlyThumbnailUsed': onlyThumbnailUsed, - if (faceDetectionImageSize != null) - 'faceDetectionImageSize': { - 'width': faceDetectionImageSize!.width, - 'height': faceDetectionImageSize!.height, - }, - if (faceAlignmentImageSize != null) - 'faceAlignmentImageSize': { - 'width': faceAlignmentImageSize!.width, - 'height': faceAlignmentImageSize!.height, - }, + 'decodedImageSize': { + 'width': decodedImageSize.width, + 'height': decodedImageSize.height, + }, }; String toJsonString() => jsonEncode(_toJson()); @@ -352,18 +345,19 @@ class FaceMlResult { mlVersion: json['mlVersion'], errorOccured: json['errorOccured'] ?? false, onlyThumbnailUsed: json['onlyThumbnailUsed'] ?? false, - faceDetectionImageSize: json['faceDetectionImageSize'] == null - ? null - : Size( - json['faceDetectionImageSize']['width'], - json['faceDetectionImageSize']['height'], - ), - faceAlignmentImageSize: json['faceAlignmentImageSize'] == null - ? null - : Size( - json['faceAlignmentImageSize']['width'], - json['faceAlignmentImageSize']['height'], - ), + decodedImageSize: json['decodedImageSize'] != null + ? Dimensions( + width: json['decodedImageSize']['width'], + height: json['decodedImageSize']['height'], + ) + : json['faceDetectionImageSize'] == null + ? const Dimensions(width: -1, height: -1) + : Dimensions( + width: (json['faceDetectionImageSize']['width'] as double) + .truncate(), + height: (json['faceDetectionImageSize']['height'] as double) + .truncate(), + ), ); } @@ -400,8 +394,7 @@ class FaceMlResultBuilder { List faces = []; - Size? faceDetectionImageSize; - Size? faceAlignmentImageSize; + Dimensions decodedImageSize; int mlVersion; bool errorOccured; @@ -412,6 +405,7 @@ class FaceMlResultBuilder { this.mlVersion = faceMlVersion, this.errorOccured = false, this.onlyThumbnailUsed = false, + this.decodedImageSize = const Dimensions(width: -1, height: -1), }); FaceMlResultBuilder.fromEnteFile( @@ -419,6 +413,7 @@ class FaceMlResultBuilder { this.mlVersion = faceMlVersion, this.errorOccured = false, this.onlyThumbnailUsed = false, + this.decodedImageSize = const Dimensions(width: -1, height: -1), }) : fileId = file.uploadedFileID ?? -1; FaceMlResultBuilder.fromEnteFileID( @@ -426,13 +421,14 @@ class FaceMlResultBuilder { this.mlVersion = faceMlVersion, this.errorOccured = false, this.onlyThumbnailUsed = false, + this.decodedImageSize = const Dimensions(width: -1, height: -1), }) : fileId = fileID; void addNewlyDetectedFaces( List faceDetections, - Size originalSize, + Dimensions originalSize, ) { - faceDetectionImageSize = originalSize; + decodedImageSize = originalSize; for (var i = 0; i < faceDetections.length; i++) { faces.add( FaceResultBuilder.fromFaceDetection( @@ -446,7 +442,6 @@ class FaceMlResultBuilder { void addAlignmentResults( List alignmentResults, List blurValues, - Size imageSizeUsedForAlignment, ) { if (alignmentResults.length != faces.length) { throw Exception( @@ -458,7 +453,6 @@ class FaceMlResultBuilder { faces[i].alignment = alignmentResults[i]; faces[i].blurValue = blurValues[i]; } - faceAlignmentImageSize = imageSizeUsedForAlignment; } void addEmbeddingsToExistingFaces( @@ -485,8 +479,7 @@ class FaceMlResultBuilder { mlVersion: mlVersion, errorOccured: errorOccured, onlyThumbnailUsed: onlyThumbnailUsed, - faceDetectionImageSize: faceDetectionImageSize, - faceAlignmentImageSize: faceAlignmentImageSize, + decodedImageSize: decodedImageSize, ); } diff --git a/mobile/lib/services/machine_learning/face_ml/face_ml_service.dart b/mobile/lib/services/machine_learning/face_ml/face_ml_service.dart index afbb5b3ee..b50140f2d 100644 --- a/mobile/lib/services/machine_learning/face_ml/face_ml_service.dart +++ b/mobile/lib/services/machine_learning/face_ml/face_ml_service.dart @@ -661,13 +661,13 @@ class FaceMlService { ), ); } else { - if (result.faceDetectionImageSize == null || - result.faceAlignmentImageSize == null) { - _logger.severe( - "faceDetectionImageSize or faceDetectionImageSize is null for image with " - "ID: ${enteFile.uploadedFileID}"); + if (result.decodedImageSize.width == -1 || + result.decodedImageSize.height == -1) { + _logger + .severe("decodedImageSize is not stored correctly for image with " + "ID: ${enteFile.uploadedFileID}"); _logger.info( - "Using aligned image size for image with ID: ${enteFile.uploadedFileID}. This size is ${result.faceAlignmentImageSize!.width}x${result.faceAlignmentImageSize!.height} compared to size of ${enteFile.width}x${enteFile.height} in the metadata", + "Using aligned image size for image with ID: ${enteFile.uploadedFileID}. This size is ${result.decodedImageSize.width}x${result.decodedImageSize.height} compared to size of ${enteFile.width}x${enteFile.height} in the metadata", ); } for (int i = 0; i < result.faces.length; ++i) { @@ -697,8 +697,8 @@ class FaceMlService { detection, faceRes.blurValue, fileInfo: FileInfo( - imageHeight: result.faceDetectionImageSize!.height.truncate(), - imageWidth: result.faceDetectionImageSize!.width.truncate(), + imageHeight: result.decodedImageSize.height, + imageWidth: result.decodedImageSize.width, ), ), ); @@ -714,8 +714,8 @@ class FaceMlService { result.mlVersion, error: result.errorOccured ? true : null, ), - height: result.faceDetectionImageSize!.height.truncate(), - width: result.faceDetectionImageSize!.width.truncate(), + height: result.decodedImageSize.height, + width: result.decodedImageSize.width, ), ); await FaceMLDataDB.instance.bulkInsertFaces(faces); @@ -1093,7 +1093,7 @@ class FaceMlService { FaceMlResultBuilder? resultBuilder, }) async { try { - final (alignedFaces, alignmentResults, _, blurValues, originalImageSize) = + final (alignedFaces, alignmentResults, _, blurValues, _) = await ImageMlIsolate.instance .preprocessMobileFaceNetOnnx(imagePath, faces); @@ -1101,7 +1101,6 @@ class FaceMlService { resultBuilder.addAlignmentResults( alignmentResults, blurValues, - originalImageSize, ); } @@ -1128,7 +1127,7 @@ class FaceMlService { }) async { try { final stopwatch = Stopwatch()..start(); - final (alignedFaces, alignmentResults, _, blurValues, originalImageSize) = + final (alignedFaces, alignmentResults, _, blurValues, _) = await preprocessToMobileFaceNetFloat32List( image, imageByteData, @@ -1143,7 +1142,6 @@ class FaceMlService { resultBuilder.addAlignmentResults( alignmentResults, blurValues, - originalImageSize, ); } diff --git a/mobile/lib/utils/image_ml_isolate.dart b/mobile/lib/utils/image_ml_isolate.dart index 157615d8e..a771dd2ca 100644 --- a/mobile/lib/utils/image_ml_isolate.dart +++ b/mobile/lib/utils/image_ml_isolate.dart @@ -8,6 +8,7 @@ import "package:flutter/rendering.dart"; import 'package:flutter_isolate/flutter_isolate.dart'; import "package:logging/logging.dart"; import "package:photos/face/model/box.dart"; +import "package:photos/face/model/dimension.dart"; import 'package:photos/models/ml/ml_typedefs.dart'; import 'package:photos/services/machine_learning/face_ml/face_alignment/alignment_result.dart'; import 'package:photos/services/machine_learning/face_ml/face_detection/detection.dart'; @@ -343,7 +344,7 @@ class ImageMlIsolate { @Deprecated( "Old method, not needed since we now run the whole ML pipeline for faces in a single isolate", ) - Future<(Float32List, Size, Size)> preprocessImageYoloOnnx( + Future<(Float32List, Dimensions, Dimensions)> preprocessImageYoloOnnx( Uint8List imageData, { required bool normalize, required int requiredWidth, @@ -365,13 +366,13 @@ class ImageMlIsolate { ), ); final inputs = results['inputs'] as Float32List; - final originalSize = Size( - results['originalWidth'] as double, - results['originalHeight'] as double, + final originalSize = Dimensions( + width:results['originalWidth'] as int, + height: results['originalHeight'] as int, ); - final newSize = Size( - results['newWidth'] as double, - results['newHeight'] as double, + final newSize = Dimensions( + width: results['newWidth'] as int, + height: results['newHeight'] as int, ); return (inputs, originalSize, newSize); } diff --git a/mobile/lib/utils/image_ml_util.dart b/mobile/lib/utils/image_ml_util.dart index 1ba29df6b..0705237ba 100644 --- a/mobile/lib/utils/image_ml_util.dart +++ b/mobile/lib/utils/image_ml_util.dart @@ -17,6 +17,7 @@ import "dart:ui"; import 'package:flutter/painting.dart' as paint show decodeImageFromList; import 'package:ml_linalg/linalg.dart'; import "package:photos/face/model/box.dart"; +import "package:photos/face/model/dimension.dart"; import 'package:photos/models/ml/ml_typedefs.dart'; import 'package:photos/services/machine_learning/face_ml/face_alignment/alignment_result.dart'; import 'package:photos/services/machine_learning/face_ml/face_alignment/similarity_transform.dart'; @@ -716,7 +717,8 @@ Future<(Num3DInputMatrix, Size, Size)> preprocessImageToMatrix( return (imageMatrix, originalSize, newSize); } -Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst( +Future<(Float32List, Dimensions, Dimensions)> + preprocessImageToFloat32ChannelsFirst( Image image, ByteData imgByteData, { required int normalization, @@ -730,7 +732,7 @@ Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst( : normalization == 1 ? normalizePixelRange1 : normalizePixelNoRange; - final originalSize = Size(image.width.toDouble(), image.height.toDouble()); + final originalSize = Dimensions(width: image.width, height: image.height); if (image.width == requiredWidth && image.height == requiredHeight) { return ( @@ -784,7 +786,7 @@ Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst( return ( processedBytes, originalSize, - Size(scaledWidth.toDouble(), scaledHeight.toDouble()) + Dimensions(width: scaledWidth, height: scaledHeight) ); }