diff --git a/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart b/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart index 5f88f2e4d..06167d20d 100644 --- a/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart +++ b/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart @@ -432,23 +432,142 @@ class ClusterFeedbackService { Future debugLogClusterBlurValues( int clusterID, { int? clusterSize, + bool logClusterSummary = false, + bool logBlurValues = false, }) async { - final List blurValues = await FaceMLDataDB.instance - .getBlurValuesForCluster(clusterID) - .then((value) => value.toList()); - - // Round the blur values to integers - final blurValuesIntegers = - blurValues.map((value) => value.round()).toList(); - - // Sort the blur values in ascending order - blurValuesIntegers.sort(); - - // Log the sorted blur values + if (!kDebugMode) return; + // Logging the clusterID _logger.info( - "Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers", + "Debug logging for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}", ); + const int biggestClusterID = 1714971868664806; + + // Logging the cluster summary for the cluster + if (logClusterSummary) { + final summaryMap = await FaceMLDataDB.instance.getClusterToClusterSummary( + [clusterID, biggestClusterID], + ); + final summary = summaryMap[clusterID]; + if (summary != null) { + _logger.info( + "Cluster summary for cluster $clusterID says the amount of faces is: ${summary.$2}", + ); + } + + final biggestClusterSummary = summaryMap[biggestClusterID]; + final clusterSummary = summaryMap[clusterID]; + if (biggestClusterSummary != null && clusterSummary != null) { + _logger.info( + "Cluster summary for biggest cluster $biggestClusterID says the size is: ${biggestClusterSummary.$2}", + ); + _logger.info( + "Cluster summary for current cluster $clusterID says the size is: ${clusterSummary.$2}", + ); + + // Mean distance + final biggestMean = Vector.fromList( + EVector.fromBuffer(biggestClusterSummary.$1).values, + dtype: DType.float32, + ); + final currentMean = Vector.fromList( + EVector.fromBuffer(clusterSummary.$1).values, + dtype: DType.float32, + ); + final bigClustersMeanDistance = + cosineDistanceSIMD(biggestMean, currentMean); + _logger.info( + "Mean distance between biggest cluster and current cluster: $bigClustersMeanDistance", + ); + _logger.info( + 'Element differences between the two means are ${biggestMean - currentMean}', + ); + final currentL2Norm = currentMean.norm(); + _logger.info( + 'L2 norm of current mean: $currentL2Norm', + ); + final trueDistance = + biggestMean.distanceTo(currentMean, distance: Distance.cosine); + _logger.info('True distance between the two means: $trueDistance'); + + // Median distance + const sampleSize = 100; + final Iterable biggestEmbeddings = await FaceMLDataDB + .instance + .getFaceEmbeddingsForCluster(biggestClusterID); + final List biggestSampledEmbeddingsProto = + _randomSampleWithoutReplacement( + biggestEmbeddings, + sampleSize, + ); + final List biggestSampledEmbeddings = + biggestSampledEmbeddingsProto + .map( + (embedding) => Vector.fromList( + EVector.fromBuffer(embedding).values, + dtype: DType.float32, + ), + ) + .toList(growable: false); + + final Iterable currentEmbeddings = + await FaceMLDataDB.instance.getFaceEmbeddingsForCluster(clusterID); + final List currentSampledEmbeddingsProto = + _randomSampleWithoutReplacement( + currentEmbeddings, + sampleSize, + ); + final List currentSampledEmbeddings = + currentSampledEmbeddingsProto + .map( + (embedding) => Vector.fromList( + EVector.fromBuffer(embedding).values, + dtype: DType.float32, + ), + ) + .toList(growable: false); + + // Calculate distances and find the median + final List distances = []; + final List trueDistances = []; + for (final biggestEmbedding in biggestSampledEmbeddings) { + for (final currentEmbedding in currentSampledEmbeddings) { + distances + .add(cosineDistanceSIMD(biggestEmbedding, currentEmbedding)); + trueDistances.add( + biggestEmbedding.distanceTo( + currentEmbedding, + distance: Distance.cosine, + ), + ); + } + } + distances.sort(); + trueDistances.sort(); + final double medianDistance = distances[distances.length ~/ 2]; + final double trueMedianDistance = + trueDistances[trueDistances.length ~/ 2]; + _logger.info( + "Median distance between biggest cluster and current cluster: $medianDistance (using sample of $sampleSize)", + ); + _logger.info( + 'True distance median between the two embeddings: $trueMedianDistance', + ); + } + } + + // Logging the blur values for the cluster + if (logBlurValues) { + final List blurValues = await FaceMLDataDB.instance + .getBlurValuesForCluster(clusterID) + .then((value) => value.toList()); + final blurValuesIntegers = + blurValues.map((value) => value.round()).toList(); + blurValuesIntegers.sort(); + _logger.info( + "Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers", + ); + } return; }