[mob] Cleaner handling of decoded image dimensions
This commit is contained in:
parent
eeedf8b3c2
commit
4cb15268e9
7 changed files with 97 additions and 76 deletions
25
mobile/lib/face/model/dimension.dart
Normal file
25
mobile/lib/face/model/dimension.dart
Normal file
|
@ -0,0 +1,25 @@
|
|||
class Dimensions {
|
||||
final int width;
|
||||
final int height;
|
||||
|
||||
const Dimensions({required this.width, required this.height});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'Dimensions(width: $width, height: $height})';
|
||||
}
|
||||
|
||||
Map<String, int> toJson() {
|
||||
return {
|
||||
'width': width,
|
||||
'height': height,
|
||||
};
|
||||
}
|
||||
|
||||
factory Dimensions.fromJson(Map<String, dynamic> json) {
|
||||
return Dimensions(
|
||||
width: json['width'] as int,
|
||||
height: json['height'] as int,
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
import 'dart:math' show sqrt, pow;
|
||||
import 'dart:ui' show Size;
|
||||
|
||||
import "package:photos/face/model/dimension.dart";
|
||||
|
||||
abstract class Detection {
|
||||
final double score;
|
||||
|
@ -179,8 +180,8 @@ class FaceDetectionRelative extends Detection {
|
|||
}
|
||||
|
||||
void correctForMaintainedAspectRatio(
|
||||
Size originalSize,
|
||||
Size newSize,
|
||||
Dimensions originalSize,
|
||||
Dimensions newSize,
|
||||
) {
|
||||
// Return if both are the same size, meaning no scaling was done on both width and height
|
||||
if (originalSize == newSize) {
|
||||
|
|
|
@ -9,6 +9,7 @@ import "package:computer/computer.dart";
|
|||
import 'package:flutter/material.dart';
|
||||
import 'package:logging/logging.dart';
|
||||
import 'package:onnxruntime/onnxruntime.dart';
|
||||
import "package:photos/face/model/dimension.dart";
|
||||
import 'package:photos/services/machine_learning/face_ml/face_detection/detection.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_detection/naive_non_max_suppression.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_detection/yolov5face/yolo_face_detection_exceptions.dart';
|
||||
|
@ -143,7 +144,7 @@ class YoloOnnxFaceDetection {
|
|||
case FaceDetectionOperation.yoloInferenceAndPostProcessing:
|
||||
final inputImageList = args['inputImageList'] as Float32List;
|
||||
final inputShape = args['inputShape'] as List<int>;
|
||||
final newSize = args['newSize'] as Size;
|
||||
final newSize = args['newSize'] as Dimensions;
|
||||
final sessionAddress = args['sessionAddress'] as int;
|
||||
final timeSentToIsolate = args['timeNow'] as DateTime;
|
||||
final delaySentToIsolate =
|
||||
|
@ -249,7 +250,7 @@ class YoloOnnxFaceDetection {
|
|||
}
|
||||
|
||||
/// Detects faces in the given image data.
|
||||
Future<(List<FaceDetectionRelative>, Size)> predict(
|
||||
Future<(List<FaceDetectionRelative>, Dimensions)> predict(
|
||||
Uint8List imageData,
|
||||
) async {
|
||||
assert(isInitialized);
|
||||
|
@ -314,7 +315,7 @@ class YoloOnnxFaceDetection {
|
|||
}
|
||||
|
||||
/// Detects faces in the given image data.
|
||||
static Future<(List<FaceDetectionRelative>, Size)> predictSync(
|
||||
static Future<(List<FaceDetectionRelative>, Dimensions)> predictSync(
|
||||
ui.Image image,
|
||||
ByteData imageByteData,
|
||||
int sessionAddress,
|
||||
|
@ -384,7 +385,7 @@ class YoloOnnxFaceDetection {
|
|||
}
|
||||
|
||||
/// Detects faces in the given image data.
|
||||
Future<(List<FaceDetectionRelative>, Size)> predictInIsolate(
|
||||
Future<(List<FaceDetectionRelative>, Dimensions)> predictInIsolate(
|
||||
Uint8List imageData,
|
||||
) async {
|
||||
await ensureSpawnedIsolate();
|
||||
|
@ -446,7 +447,7 @@ class YoloOnnxFaceDetection {
|
|||
return (relativeDetections, originalSize);
|
||||
}
|
||||
|
||||
Future<(List<FaceDetectionRelative>, Size)> predictInComputer(
|
||||
Future<(List<FaceDetectionRelative>, Dimensions)> predictInComputer(
|
||||
String imagePath,
|
||||
) async {
|
||||
assert(isInitialized);
|
||||
|
@ -524,7 +525,7 @@ class YoloOnnxFaceDetection {
|
|||
|
||||
final stopwatchDecoding = Stopwatch()..start();
|
||||
final List<Float32List> inputImageDataLists = [];
|
||||
final List<(Size, Size)> originalAndNewSizeList = [];
|
||||
final List<(Dimensions, Dimensions)> originalAndNewSizeList = [];
|
||||
int concatenatedImageInputsLength = 0;
|
||||
for (final imageData in imageDataList) {
|
||||
final (inputImageList, originalSize, newSize) =
|
||||
|
@ -624,9 +625,9 @@ class YoloOnnxFaceDetection {
|
|||
// Account for the fact that the aspect ratio was maintained
|
||||
for (final faceDetection in relativeDetections) {
|
||||
faceDetection.correctForMaintainedAspectRatio(
|
||||
Size(
|
||||
kInputWidth.toDouble(),
|
||||
kInputHeight.toDouble(),
|
||||
const Dimensions(
|
||||
width: kInputWidth,
|
||||
height: kInputHeight,
|
||||
),
|
||||
originalAndNewSizeList[imageOutputToUse].$2,
|
||||
);
|
||||
|
@ -653,7 +654,7 @@ class YoloOnnxFaceDetection {
|
|||
|
||||
static List<FaceDetectionRelative> _yoloPostProcessOutputs(
|
||||
List<OrtValue?>? outputs,
|
||||
Size newSize,
|
||||
Dimensions newSize,
|
||||
) {
|
||||
// // Get output tensors
|
||||
final nestedResults =
|
||||
|
@ -684,9 +685,9 @@ class YoloOnnxFaceDetection {
|
|||
// Account for the fact that the aspect ratio was maintained
|
||||
for (final faceDetection in relativeDetections) {
|
||||
faceDetection.correctForMaintainedAspectRatio(
|
||||
Size(
|
||||
kInputWidth.toDouble(),
|
||||
kInputHeight.toDouble(),
|
||||
const Dimensions(
|
||||
width: kInputWidth,
|
||||
height: kInputHeight,
|
||||
),
|
||||
newSize,
|
||||
);
|
||||
|
@ -735,7 +736,7 @@ class YoloOnnxFaceDetection {
|
|||
) async {
|
||||
final inputImageList = args['inputImageList'] as Float32List;
|
||||
final inputShape = args['inputShape'] as List<int>;
|
||||
final newSize = args['newSize'] as Size;
|
||||
final newSize = args['newSize'] as Dimensions;
|
||||
final sessionAddress = args['sessionAddress'] as int;
|
||||
final timeSentToIsolate = args['timeNow'] as DateTime;
|
||||
final delaySentToIsolate =
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import "dart:convert" show jsonEncode, jsonDecode;
|
||||
|
||||
import "package:flutter/material.dart" show Size, debugPrint, immutable;
|
||||
import "package:flutter/material.dart" show debugPrint, immutable;
|
||||
import "package:logging/logging.dart";
|
||||
import "package:photos/face/model/dimension.dart";
|
||||
import "package:photos/models/file/file.dart";
|
||||
import 'package:photos/models/ml/ml_typedefs.dart';
|
||||
import "package:photos/models/ml/ml_versions.dart";
|
||||
|
@ -284,8 +285,7 @@ class FaceMlResult {
|
|||
|
||||
final List<FaceResult> faces;
|
||||
|
||||
final Size? faceDetectionImageSize;
|
||||
final Size? faceAlignmentImageSize;
|
||||
final Dimensions decodedImageSize;
|
||||
|
||||
final int mlVersion;
|
||||
final bool errorOccured;
|
||||
|
@ -319,8 +319,7 @@ class FaceMlResult {
|
|||
required this.mlVersion,
|
||||
required this.errorOccured,
|
||||
required this.onlyThumbnailUsed,
|
||||
required this.faceDetectionImageSize,
|
||||
this.faceAlignmentImageSize,
|
||||
required this.decodedImageSize,
|
||||
});
|
||||
|
||||
Map<String, dynamic> _toJson() => {
|
||||
|
@ -329,16 +328,10 @@ class FaceMlResult {
|
|||
'mlVersion': mlVersion,
|
||||
'errorOccured': errorOccured,
|
||||
'onlyThumbnailUsed': onlyThumbnailUsed,
|
||||
if (faceDetectionImageSize != null)
|
||||
'faceDetectionImageSize': {
|
||||
'width': faceDetectionImageSize!.width,
|
||||
'height': faceDetectionImageSize!.height,
|
||||
},
|
||||
if (faceAlignmentImageSize != null)
|
||||
'faceAlignmentImageSize': {
|
||||
'width': faceAlignmentImageSize!.width,
|
||||
'height': faceAlignmentImageSize!.height,
|
||||
},
|
||||
'decodedImageSize': {
|
||||
'width': decodedImageSize.width,
|
||||
'height': decodedImageSize.height,
|
||||
},
|
||||
};
|
||||
|
||||
String toJsonString() => jsonEncode(_toJson());
|
||||
|
@ -352,18 +345,19 @@ class FaceMlResult {
|
|||
mlVersion: json['mlVersion'],
|
||||
errorOccured: json['errorOccured'] ?? false,
|
||||
onlyThumbnailUsed: json['onlyThumbnailUsed'] ?? false,
|
||||
faceDetectionImageSize: json['faceDetectionImageSize'] == null
|
||||
? null
|
||||
: Size(
|
||||
json['faceDetectionImageSize']['width'],
|
||||
json['faceDetectionImageSize']['height'],
|
||||
),
|
||||
faceAlignmentImageSize: json['faceAlignmentImageSize'] == null
|
||||
? null
|
||||
: Size(
|
||||
json['faceAlignmentImageSize']['width'],
|
||||
json['faceAlignmentImageSize']['height'],
|
||||
),
|
||||
decodedImageSize: json['decodedImageSize'] != null
|
||||
? Dimensions(
|
||||
width: json['decodedImageSize']['width'],
|
||||
height: json['decodedImageSize']['height'],
|
||||
)
|
||||
: json['faceDetectionImageSize'] == null
|
||||
? const Dimensions(width: -1, height: -1)
|
||||
: Dimensions(
|
||||
width: (json['faceDetectionImageSize']['width'] as double)
|
||||
.truncate(),
|
||||
height: (json['faceDetectionImageSize']['height'] as double)
|
||||
.truncate(),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -400,8 +394,7 @@ class FaceMlResultBuilder {
|
|||
|
||||
List<FaceResultBuilder> faces = <FaceResultBuilder>[];
|
||||
|
||||
Size? faceDetectionImageSize;
|
||||
Size? faceAlignmentImageSize;
|
||||
Dimensions decodedImageSize;
|
||||
|
||||
int mlVersion;
|
||||
bool errorOccured;
|
||||
|
@ -412,6 +405,7 @@ class FaceMlResultBuilder {
|
|||
this.mlVersion = faceMlVersion,
|
||||
this.errorOccured = false,
|
||||
this.onlyThumbnailUsed = false,
|
||||
this.decodedImageSize = const Dimensions(width: -1, height: -1),
|
||||
});
|
||||
|
||||
FaceMlResultBuilder.fromEnteFile(
|
||||
|
@ -419,6 +413,7 @@ class FaceMlResultBuilder {
|
|||
this.mlVersion = faceMlVersion,
|
||||
this.errorOccured = false,
|
||||
this.onlyThumbnailUsed = false,
|
||||
this.decodedImageSize = const Dimensions(width: -1, height: -1),
|
||||
}) : fileId = file.uploadedFileID ?? -1;
|
||||
|
||||
FaceMlResultBuilder.fromEnteFileID(
|
||||
|
@ -426,13 +421,14 @@ class FaceMlResultBuilder {
|
|||
this.mlVersion = faceMlVersion,
|
||||
this.errorOccured = false,
|
||||
this.onlyThumbnailUsed = false,
|
||||
this.decodedImageSize = const Dimensions(width: -1, height: -1),
|
||||
}) : fileId = fileID;
|
||||
|
||||
void addNewlyDetectedFaces(
|
||||
List<FaceDetectionRelative> faceDetections,
|
||||
Size originalSize,
|
||||
Dimensions originalSize,
|
||||
) {
|
||||
faceDetectionImageSize = originalSize;
|
||||
decodedImageSize = originalSize;
|
||||
for (var i = 0; i < faceDetections.length; i++) {
|
||||
faces.add(
|
||||
FaceResultBuilder.fromFaceDetection(
|
||||
|
@ -446,7 +442,6 @@ class FaceMlResultBuilder {
|
|||
void addAlignmentResults(
|
||||
List<AlignmentResult> alignmentResults,
|
||||
List<double> blurValues,
|
||||
Size imageSizeUsedForAlignment,
|
||||
) {
|
||||
if (alignmentResults.length != faces.length) {
|
||||
throw Exception(
|
||||
|
@ -458,7 +453,6 @@ class FaceMlResultBuilder {
|
|||
faces[i].alignment = alignmentResults[i];
|
||||
faces[i].blurValue = blurValues[i];
|
||||
}
|
||||
faceAlignmentImageSize = imageSizeUsedForAlignment;
|
||||
}
|
||||
|
||||
void addEmbeddingsToExistingFaces(
|
||||
|
@ -485,8 +479,7 @@ class FaceMlResultBuilder {
|
|||
mlVersion: mlVersion,
|
||||
errorOccured: errorOccured,
|
||||
onlyThumbnailUsed: onlyThumbnailUsed,
|
||||
faceDetectionImageSize: faceDetectionImageSize,
|
||||
faceAlignmentImageSize: faceAlignmentImageSize,
|
||||
decodedImageSize: decodedImageSize,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -661,13 +661,13 @@ class FaceMlService {
|
|||
),
|
||||
);
|
||||
} else {
|
||||
if (result.faceDetectionImageSize == null ||
|
||||
result.faceAlignmentImageSize == null) {
|
||||
_logger.severe(
|
||||
"faceDetectionImageSize or faceDetectionImageSize is null for image with "
|
||||
"ID: ${enteFile.uploadedFileID}");
|
||||
if (result.decodedImageSize.width == -1 ||
|
||||
result.decodedImageSize.height == -1) {
|
||||
_logger
|
||||
.severe("decodedImageSize is not stored correctly for image with "
|
||||
"ID: ${enteFile.uploadedFileID}");
|
||||
_logger.info(
|
||||
"Using aligned image size for image with ID: ${enteFile.uploadedFileID}. This size is ${result.faceAlignmentImageSize!.width}x${result.faceAlignmentImageSize!.height} compared to size of ${enteFile.width}x${enteFile.height} in the metadata",
|
||||
"Using aligned image size for image with ID: ${enteFile.uploadedFileID}. This size is ${result.decodedImageSize.width}x${result.decodedImageSize.height} compared to size of ${enteFile.width}x${enteFile.height} in the metadata",
|
||||
);
|
||||
}
|
||||
for (int i = 0; i < result.faces.length; ++i) {
|
||||
|
@ -697,8 +697,8 @@ class FaceMlService {
|
|||
detection,
|
||||
faceRes.blurValue,
|
||||
fileInfo: FileInfo(
|
||||
imageHeight: result.faceDetectionImageSize!.height.truncate(),
|
||||
imageWidth: result.faceDetectionImageSize!.width.truncate(),
|
||||
imageHeight: result.decodedImageSize.height,
|
||||
imageWidth: result.decodedImageSize.width,
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -714,8 +714,8 @@ class FaceMlService {
|
|||
result.mlVersion,
|
||||
error: result.errorOccured ? true : null,
|
||||
),
|
||||
height: result.faceDetectionImageSize!.height.truncate(),
|
||||
width: result.faceDetectionImageSize!.width.truncate(),
|
||||
height: result.decodedImageSize.height,
|
||||
width: result.decodedImageSize.width,
|
||||
),
|
||||
);
|
||||
await FaceMLDataDB.instance.bulkInsertFaces(faces);
|
||||
|
@ -1093,7 +1093,7 @@ class FaceMlService {
|
|||
FaceMlResultBuilder? resultBuilder,
|
||||
}) async {
|
||||
try {
|
||||
final (alignedFaces, alignmentResults, _, blurValues, originalImageSize) =
|
||||
final (alignedFaces, alignmentResults, _, blurValues, _) =
|
||||
await ImageMlIsolate.instance
|
||||
.preprocessMobileFaceNetOnnx(imagePath, faces);
|
||||
|
||||
|
@ -1101,7 +1101,6 @@ class FaceMlService {
|
|||
resultBuilder.addAlignmentResults(
|
||||
alignmentResults,
|
||||
blurValues,
|
||||
originalImageSize,
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1128,7 +1127,7 @@ class FaceMlService {
|
|||
}) async {
|
||||
try {
|
||||
final stopwatch = Stopwatch()..start();
|
||||
final (alignedFaces, alignmentResults, _, blurValues, originalImageSize) =
|
||||
final (alignedFaces, alignmentResults, _, blurValues, _) =
|
||||
await preprocessToMobileFaceNetFloat32List(
|
||||
image,
|
||||
imageByteData,
|
||||
|
@ -1143,7 +1142,6 @@ class FaceMlService {
|
|||
resultBuilder.addAlignmentResults(
|
||||
alignmentResults,
|
||||
blurValues,
|
||||
originalImageSize,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import "package:flutter/rendering.dart";
|
|||
import 'package:flutter_isolate/flutter_isolate.dart';
|
||||
import "package:logging/logging.dart";
|
||||
import "package:photos/face/model/box.dart";
|
||||
import "package:photos/face/model/dimension.dart";
|
||||
import 'package:photos/models/ml/ml_typedefs.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_alignment/alignment_result.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_detection/detection.dart';
|
||||
|
@ -343,7 +344,7 @@ class ImageMlIsolate {
|
|||
@Deprecated(
|
||||
"Old method, not needed since we now run the whole ML pipeline for faces in a single isolate",
|
||||
)
|
||||
Future<(Float32List, Size, Size)> preprocessImageYoloOnnx(
|
||||
Future<(Float32List, Dimensions, Dimensions)> preprocessImageYoloOnnx(
|
||||
Uint8List imageData, {
|
||||
required bool normalize,
|
||||
required int requiredWidth,
|
||||
|
@ -365,13 +366,13 @@ class ImageMlIsolate {
|
|||
),
|
||||
);
|
||||
final inputs = results['inputs'] as Float32List;
|
||||
final originalSize = Size(
|
||||
results['originalWidth'] as double,
|
||||
results['originalHeight'] as double,
|
||||
final originalSize = Dimensions(
|
||||
width:results['originalWidth'] as int,
|
||||
height: results['originalHeight'] as int,
|
||||
);
|
||||
final newSize = Size(
|
||||
results['newWidth'] as double,
|
||||
results['newHeight'] as double,
|
||||
final newSize = Dimensions(
|
||||
width: results['newWidth'] as int,
|
||||
height: results['newHeight'] as int,
|
||||
);
|
||||
return (inputs, originalSize, newSize);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ import "dart:ui";
|
|||
import 'package:flutter/painting.dart' as paint show decodeImageFromList;
|
||||
import 'package:ml_linalg/linalg.dart';
|
||||
import "package:photos/face/model/box.dart";
|
||||
import "package:photos/face/model/dimension.dart";
|
||||
import 'package:photos/models/ml/ml_typedefs.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_alignment/alignment_result.dart';
|
||||
import 'package:photos/services/machine_learning/face_ml/face_alignment/similarity_transform.dart';
|
||||
|
@ -716,7 +717,8 @@ Future<(Num3DInputMatrix, Size, Size)> preprocessImageToMatrix(
|
|||
return (imageMatrix, originalSize, newSize);
|
||||
}
|
||||
|
||||
Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst(
|
||||
Future<(Float32List, Dimensions, Dimensions)>
|
||||
preprocessImageToFloat32ChannelsFirst(
|
||||
Image image,
|
||||
ByteData imgByteData, {
|
||||
required int normalization,
|
||||
|
@ -730,7 +732,7 @@ Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst(
|
|||
: normalization == 1
|
||||
? normalizePixelRange1
|
||||
: normalizePixelNoRange;
|
||||
final originalSize = Size(image.width.toDouble(), image.height.toDouble());
|
||||
final originalSize = Dimensions(width: image.width, height: image.height);
|
||||
|
||||
if (image.width == requiredWidth && image.height == requiredHeight) {
|
||||
return (
|
||||
|
@ -784,7 +786,7 @@ Future<(Float32List, Size, Size)> preprocessImageToFloat32ChannelsFirst(
|
|||
return (
|
||||
processedBytes,
|
||||
originalSize,
|
||||
Size(scaledWidth.toDouble(), scaledHeight.toDouble())
|
||||
Dimensions(width: scaledWidth, height: scaledHeight)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue