[mob][photos] More debug logging on clusters

This commit is contained in:
laurenspriem 2024-05-06 16:45:27 +05:30
parent 3c24345b68
commit 71b572917e

View file

@ -432,23 +432,142 @@ class ClusterFeedbackService {
Future<void> debugLogClusterBlurValues(
int clusterID, {
int? clusterSize,
bool logClusterSummary = false,
bool logBlurValues = false,
}) async {
final List<double> blurValues = await FaceMLDataDB.instance
.getBlurValuesForCluster(clusterID)
.then((value) => value.toList());
// Round the blur values to integers
final blurValuesIntegers =
blurValues.map((value) => value.round()).toList();
// Sort the blur values in ascending order
blurValuesIntegers.sort();
// Log the sorted blur values
if (!kDebugMode) return;
// Logging the clusterID
_logger.info(
"Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers",
"Debug logging for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}",
);
const int biggestClusterID = 1714971868664806;
// Logging the cluster summary for the cluster
if (logClusterSummary) {
final summaryMap = await FaceMLDataDB.instance.getClusterToClusterSummary(
[clusterID, biggestClusterID],
);
final summary = summaryMap[clusterID];
if (summary != null) {
_logger.info(
"Cluster summary for cluster $clusterID says the amount of faces is: ${summary.$2}",
);
}
final biggestClusterSummary = summaryMap[biggestClusterID];
final clusterSummary = summaryMap[clusterID];
if (biggestClusterSummary != null && clusterSummary != null) {
_logger.info(
"Cluster summary for biggest cluster $biggestClusterID says the size is: ${biggestClusterSummary.$2}",
);
_logger.info(
"Cluster summary for current cluster $clusterID says the size is: ${clusterSummary.$2}",
);
// Mean distance
final biggestMean = Vector.fromList(
EVector.fromBuffer(biggestClusterSummary.$1).values,
dtype: DType.float32,
);
final currentMean = Vector.fromList(
EVector.fromBuffer(clusterSummary.$1).values,
dtype: DType.float32,
);
final bigClustersMeanDistance =
cosineDistanceSIMD(biggestMean, currentMean);
_logger.info(
"Mean distance between biggest cluster and current cluster: $bigClustersMeanDistance",
);
_logger.info(
'Element differences between the two means are ${biggestMean - currentMean}',
);
final currentL2Norm = currentMean.norm();
_logger.info(
'L2 norm of current mean: $currentL2Norm',
);
final trueDistance =
biggestMean.distanceTo(currentMean, distance: Distance.cosine);
_logger.info('True distance between the two means: $trueDistance');
// Median distance
const sampleSize = 100;
final Iterable<Uint8List> biggestEmbeddings = await FaceMLDataDB
.instance
.getFaceEmbeddingsForCluster(biggestClusterID);
final List<Uint8List> biggestSampledEmbeddingsProto =
_randomSampleWithoutReplacement(
biggestEmbeddings,
sampleSize,
);
final List<Vector> biggestSampledEmbeddings =
biggestSampledEmbeddingsProto
.map(
(embedding) => Vector.fromList(
EVector.fromBuffer(embedding).values,
dtype: DType.float32,
),
)
.toList(growable: false);
final Iterable<Uint8List> currentEmbeddings =
await FaceMLDataDB.instance.getFaceEmbeddingsForCluster(clusterID);
final List<Uint8List> currentSampledEmbeddingsProto =
_randomSampleWithoutReplacement(
currentEmbeddings,
sampleSize,
);
final List<Vector> currentSampledEmbeddings =
currentSampledEmbeddingsProto
.map(
(embedding) => Vector.fromList(
EVector.fromBuffer(embedding).values,
dtype: DType.float32,
),
)
.toList(growable: false);
// Calculate distances and find the median
final List<double> distances = [];
final List<double> trueDistances = [];
for (final biggestEmbedding in biggestSampledEmbeddings) {
for (final currentEmbedding in currentSampledEmbeddings) {
distances
.add(cosineDistanceSIMD(biggestEmbedding, currentEmbedding));
trueDistances.add(
biggestEmbedding.distanceTo(
currentEmbedding,
distance: Distance.cosine,
),
);
}
}
distances.sort();
trueDistances.sort();
final double medianDistance = distances[distances.length ~/ 2];
final double trueMedianDistance =
trueDistances[trueDistances.length ~/ 2];
_logger.info(
"Median distance between biggest cluster and current cluster: $medianDistance (using sample of $sampleSize)",
);
_logger.info(
'True distance median between the two embeddings: $trueMedianDistance',
);
}
}
// Logging the blur values for the cluster
if (logBlurValues) {
final List<double> blurValues = await FaceMLDataDB.instance
.getBlurValuesForCluster(clusterID)
.then((value) => value.toList());
final blurValuesIntegers =
blurValues.map((value) => value.round()).toList();
blurValuesIntegers.sort();
_logger.info(
"Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers",
);
}
return;
}