Deduplication: Refactor + Dedupe by hash and size (instead of just size) (#1172)
This commit is contained in:
commit
e7e520f9f7
3 changed files with 45 additions and 80 deletions
|
@ -10,7 +10,6 @@ import 'package:photos/models/ente_file.dart';
|
|||
import 'package:photos/models/file_type.dart';
|
||||
import 'package:photos/models/location/location.dart';
|
||||
import "package:photos/models/metadata/file_magic.dart";
|
||||
|
||||
import 'package:photos/services/feature_flag_service.dart';
|
||||
import 'package:photos/utils/date_time_util.dart';
|
||||
import 'package:photos/utils/exif_util.dart';
|
||||
|
@ -159,11 +158,11 @@ class File extends EnteFile {
|
|||
// handle past live photos upload from web client
|
||||
if (hash == null &&
|
||||
fileType == FileType.livePhoto &&
|
||||
metadata.containsKey('imgHash') &&
|
||||
metadata.containsKey('vidHash')) {
|
||||
metadata.containsKey('imageHash') &&
|
||||
metadata.containsKey('videoHash')) {
|
||||
// convert to imgHash:vidHash
|
||||
hash =
|
||||
'${metadata['imgHash']}$kLivePhotoHashSeparator${metadata['vidHash']}';
|
||||
'${metadata['imageHash']}$kLivePhotoHashSeparator${metadata['videoHash']}';
|
||||
}
|
||||
metadataVersion = metadata["version"] ?? 0;
|
||||
}
|
||||
|
|
|
@ -66,79 +66,33 @@ class DeduplicationService {
|
|||
}
|
||||
}
|
||||
|
||||
List<DuplicateFiles> clubDuplicatesByTime(List<DuplicateFiles> dupes) {
|
||||
final result = <DuplicateFiles>[];
|
||||
for (final dupe in dupes) {
|
||||
final files = <File>[];
|
||||
final Map<int, int> creationTimeCounter = {};
|
||||
int mostFrequentCreationTime = 0, mostFrequentCreationTimeCount = 0;
|
||||
// Counts the frequency of creationTimes within the supposed duplicates
|
||||
for (final file in dupe.files) {
|
||||
if (creationTimeCounter.containsKey(file.creationTime!)) {
|
||||
creationTimeCounter[file.creationTime!] =
|
||||
creationTimeCounter[file.creationTime!]! + 1;
|
||||
} else {
|
||||
creationTimeCounter[file.creationTime!] = 0;
|
||||
List<DuplicateFiles> clubDuplicates(
|
||||
List<DuplicateFiles> dupesBySize, {
|
||||
required String? Function(File) clubbingKey,
|
||||
}) {
|
||||
final dupesBySizeAndClubKey = <DuplicateFiles>[];
|
||||
for (final sizeBasedDupe in dupesBySize) {
|
||||
final Map<String, List<File>> clubKeyToFilesMap = {};
|
||||
for (final file in sizeBasedDupe.files) {
|
||||
final String? clubKey = clubbingKey(file);
|
||||
if (clubKey == null || clubKey.isEmpty) {
|
||||
continue;
|
||||
}
|
||||
if (creationTimeCounter[file.creationTime]! >
|
||||
mostFrequentCreationTimeCount) {
|
||||
mostFrequentCreationTimeCount =
|
||||
creationTimeCounter[file.creationTime]!;
|
||||
mostFrequentCreationTime = file.creationTime!;
|
||||
if (!clubKeyToFilesMap.containsKey(clubKey)) {
|
||||
clubKeyToFilesMap[clubKey] = <File>[];
|
||||
}
|
||||
files.add(file);
|
||||
clubKeyToFilesMap[clubKey]!.add(file);
|
||||
}
|
||||
// Ignores those files that were not created within the most common creationTime
|
||||
final incorrectDuplicates = <File>{};
|
||||
for (final file in files) {
|
||||
if (file.creationTime != mostFrequentCreationTime) {
|
||||
incorrectDuplicates.add(file);
|
||||
for (final clubbingKey in clubKeyToFilesMap.keys) {
|
||||
final clubbedFiles = clubKeyToFilesMap[clubbingKey]!;
|
||||
if (clubbedFiles.length > 1) {
|
||||
dupesBySizeAndClubKey.add(
|
||||
DuplicateFiles(clubbedFiles, sizeBasedDupe.size),
|
||||
);
|
||||
}
|
||||
}
|
||||
files.removeWhere((file) => incorrectDuplicates.contains(file));
|
||||
if (files.length > 1) {
|
||||
result.add(DuplicateFiles(files, dupe.size));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
List<DuplicateFiles> clubDuplicatesByName(List<DuplicateFiles> dupes) {
|
||||
final result = <DuplicateFiles>[];
|
||||
for (final dupe in dupes) {
|
||||
final files = <File>[];
|
||||
final Map<String, int> fileNameCounter = {};
|
||||
String mostFrequentFileName = "";
|
||||
int mostFrequentFileNameCount = 0;
|
||||
// Counts the frequency of creationTimes within the supposed duplicates
|
||||
for (final file in dupe.files) {
|
||||
if (fileNameCounter.containsKey(file.displayName)) {
|
||||
fileNameCounter[file.displayName] =
|
||||
fileNameCounter[file.displayName]! + 1;
|
||||
} else {
|
||||
fileNameCounter[file.displayName] = 0;
|
||||
}
|
||||
if (fileNameCounter[file.displayName]! >
|
||||
mostFrequentFileNameCount) {
|
||||
mostFrequentFileNameCount =
|
||||
fileNameCounter[file.displayName]!;
|
||||
mostFrequentFileName = file.displayName;
|
||||
}
|
||||
files.add(file);
|
||||
}
|
||||
// Ignores those files that were not created within the most common creationTime
|
||||
final incorrectDuplicates = <File>{};
|
||||
for (final file in files) {
|
||||
if (file.displayName != mostFrequentFileName) {
|
||||
incorrectDuplicates.add(file);
|
||||
}
|
||||
}
|
||||
files.removeWhere((file) => incorrectDuplicates.contains(file));
|
||||
if (files.length > 1) {
|
||||
result.add(DuplicateFiles(files, dupe.size));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
return dupesBySizeAndClubKey;
|
||||
}
|
||||
|
||||
Future<DuplicateFilesResponse> _fetchDuplicateFileIDs() async {
|
||||
|
|
|
@ -48,7 +48,7 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
|||
final Set<File> _selectedFiles = <File>{};
|
||||
final Map<int?, int> _fileSizeMap = {};
|
||||
late List<DuplicateFiles> _duplicates;
|
||||
bool _shouldClubByCaptureTime = true;
|
||||
bool _shouldClubByCaptureTime = false;
|
||||
bool _shouldClubByFileName = false;
|
||||
bool toastShown = false;
|
||||
|
||||
|
@ -56,8 +56,10 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
|||
|
||||
@override
|
||||
void initState() {
|
||||
_duplicates =
|
||||
DeduplicationService.instance.clubDuplicatesByTime(widget.duplicates);
|
||||
_duplicates = DeduplicationService.instance.clubDuplicates(
|
||||
widget.duplicates,
|
||||
clubbingKey: (File f) => f.hash,
|
||||
);
|
||||
_selectAllFilesButFirst();
|
||||
|
||||
super.initState();
|
||||
|
@ -228,6 +230,9 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
|||
value: _shouldClubByFileName,
|
||||
onChanged: (value) {
|
||||
_shouldClubByFileName = value!;
|
||||
if (_shouldClubByFileName) {
|
||||
_shouldClubByCaptureTime = false;
|
||||
}
|
||||
_resetEntriesAndSelection();
|
||||
setState(() {});
|
||||
},
|
||||
|
@ -237,6 +242,9 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
|||
value: _shouldClubByCaptureTime,
|
||||
onChanged: (value) {
|
||||
_shouldClubByCaptureTime = value!;
|
||||
if (_shouldClubByCaptureTime) {
|
||||
_shouldClubByFileName = false;
|
||||
}
|
||||
_resetEntriesAndSelection();
|
||||
setState(() {});
|
||||
},
|
||||
|
@ -258,14 +266,18 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
|||
|
||||
void _resetEntriesAndSelection() {
|
||||
_duplicates = widget.duplicates;
|
||||
late String? Function(File) clubbingKeyFn;
|
||||
if (_shouldClubByCaptureTime) {
|
||||
_duplicates =
|
||||
DeduplicationService.instance.clubDuplicatesByTime(_duplicates);
|
||||
}
|
||||
if (_shouldClubByFileName) {
|
||||
_duplicates =
|
||||
DeduplicationService.instance.clubDuplicatesByName(_duplicates);
|
||||
clubbingKeyFn = (File f) => f.creationTime?.toString() ?? '';
|
||||
} else if (_shouldClubByFileName) {
|
||||
clubbingKeyFn = (File f) => f.displayName;
|
||||
} else {
|
||||
clubbingKeyFn = (File f) => f.hash;
|
||||
}
|
||||
_duplicates = DeduplicationService.instance.clubDuplicates(
|
||||
_duplicates,
|
||||
clubbingKey: clubbingKeyFn,
|
||||
);
|
||||
_selectAllFilesButFirst();
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue