Deduplication: Refactor + Dedupe by hash and size (instead of just size) (#1172)
This commit is contained in:
commit
e7e520f9f7
3 changed files with 45 additions and 80 deletions
|
@ -10,7 +10,6 @@ import 'package:photos/models/ente_file.dart';
|
||||||
import 'package:photos/models/file_type.dart';
|
import 'package:photos/models/file_type.dart';
|
||||||
import 'package:photos/models/location/location.dart';
|
import 'package:photos/models/location/location.dart';
|
||||||
import "package:photos/models/metadata/file_magic.dart";
|
import "package:photos/models/metadata/file_magic.dart";
|
||||||
|
|
||||||
import 'package:photos/services/feature_flag_service.dart';
|
import 'package:photos/services/feature_flag_service.dart';
|
||||||
import 'package:photos/utils/date_time_util.dart';
|
import 'package:photos/utils/date_time_util.dart';
|
||||||
import 'package:photos/utils/exif_util.dart';
|
import 'package:photos/utils/exif_util.dart';
|
||||||
|
@ -159,11 +158,11 @@ class File extends EnteFile {
|
||||||
// handle past live photos upload from web client
|
// handle past live photos upload from web client
|
||||||
if (hash == null &&
|
if (hash == null &&
|
||||||
fileType == FileType.livePhoto &&
|
fileType == FileType.livePhoto &&
|
||||||
metadata.containsKey('imgHash') &&
|
metadata.containsKey('imageHash') &&
|
||||||
metadata.containsKey('vidHash')) {
|
metadata.containsKey('videoHash')) {
|
||||||
// convert to imgHash:vidHash
|
// convert to imgHash:vidHash
|
||||||
hash =
|
hash =
|
||||||
'${metadata['imgHash']}$kLivePhotoHashSeparator${metadata['vidHash']}';
|
'${metadata['imageHash']}$kLivePhotoHashSeparator${metadata['videoHash']}';
|
||||||
}
|
}
|
||||||
metadataVersion = metadata["version"] ?? 0;
|
metadataVersion = metadata["version"] ?? 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,79 +66,33 @@ class DeduplicationService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<DuplicateFiles> clubDuplicatesByTime(List<DuplicateFiles> dupes) {
|
List<DuplicateFiles> clubDuplicates(
|
||||||
final result = <DuplicateFiles>[];
|
List<DuplicateFiles> dupesBySize, {
|
||||||
for (final dupe in dupes) {
|
required String? Function(File) clubbingKey,
|
||||||
final files = <File>[];
|
}) {
|
||||||
final Map<int, int> creationTimeCounter = {};
|
final dupesBySizeAndClubKey = <DuplicateFiles>[];
|
||||||
int mostFrequentCreationTime = 0, mostFrequentCreationTimeCount = 0;
|
for (final sizeBasedDupe in dupesBySize) {
|
||||||
// Counts the frequency of creationTimes within the supposed duplicates
|
final Map<String, List<File>> clubKeyToFilesMap = {};
|
||||||
for (final file in dupe.files) {
|
for (final file in sizeBasedDupe.files) {
|
||||||
if (creationTimeCounter.containsKey(file.creationTime!)) {
|
final String? clubKey = clubbingKey(file);
|
||||||
creationTimeCounter[file.creationTime!] =
|
if (clubKey == null || clubKey.isEmpty) {
|
||||||
creationTimeCounter[file.creationTime!]! + 1;
|
continue;
|
||||||
} else {
|
|
||||||
creationTimeCounter[file.creationTime!] = 0;
|
|
||||||
}
|
}
|
||||||
if (creationTimeCounter[file.creationTime]! >
|
if (!clubKeyToFilesMap.containsKey(clubKey)) {
|
||||||
mostFrequentCreationTimeCount) {
|
clubKeyToFilesMap[clubKey] = <File>[];
|
||||||
mostFrequentCreationTimeCount =
|
|
||||||
creationTimeCounter[file.creationTime]!;
|
|
||||||
mostFrequentCreationTime = file.creationTime!;
|
|
||||||
}
|
}
|
||||||
files.add(file);
|
clubKeyToFilesMap[clubKey]!.add(file);
|
||||||
}
|
}
|
||||||
// Ignores those files that were not created within the most common creationTime
|
for (final clubbingKey in clubKeyToFilesMap.keys) {
|
||||||
final incorrectDuplicates = <File>{};
|
final clubbedFiles = clubKeyToFilesMap[clubbingKey]!;
|
||||||
for (final file in files) {
|
if (clubbedFiles.length > 1) {
|
||||||
if (file.creationTime != mostFrequentCreationTime) {
|
dupesBySizeAndClubKey.add(
|
||||||
incorrectDuplicates.add(file);
|
DuplicateFiles(clubbedFiles, sizeBasedDupe.size),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
files.removeWhere((file) => incorrectDuplicates.contains(file));
|
|
||||||
if (files.length > 1) {
|
|
||||||
result.add(DuplicateFiles(files, dupe.size));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return result;
|
return dupesBySizeAndClubKey;
|
||||||
}
|
|
||||||
|
|
||||||
List<DuplicateFiles> clubDuplicatesByName(List<DuplicateFiles> dupes) {
|
|
||||||
final result = <DuplicateFiles>[];
|
|
||||||
for (final dupe in dupes) {
|
|
||||||
final files = <File>[];
|
|
||||||
final Map<String, int> fileNameCounter = {};
|
|
||||||
String mostFrequentFileName = "";
|
|
||||||
int mostFrequentFileNameCount = 0;
|
|
||||||
// Counts the frequency of creationTimes within the supposed duplicates
|
|
||||||
for (final file in dupe.files) {
|
|
||||||
if (fileNameCounter.containsKey(file.displayName)) {
|
|
||||||
fileNameCounter[file.displayName] =
|
|
||||||
fileNameCounter[file.displayName]! + 1;
|
|
||||||
} else {
|
|
||||||
fileNameCounter[file.displayName] = 0;
|
|
||||||
}
|
|
||||||
if (fileNameCounter[file.displayName]! >
|
|
||||||
mostFrequentFileNameCount) {
|
|
||||||
mostFrequentFileNameCount =
|
|
||||||
fileNameCounter[file.displayName]!;
|
|
||||||
mostFrequentFileName = file.displayName;
|
|
||||||
}
|
|
||||||
files.add(file);
|
|
||||||
}
|
|
||||||
// Ignores those files that were not created within the most common creationTime
|
|
||||||
final incorrectDuplicates = <File>{};
|
|
||||||
for (final file in files) {
|
|
||||||
if (file.displayName != mostFrequentFileName) {
|
|
||||||
incorrectDuplicates.add(file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
files.removeWhere((file) => incorrectDuplicates.contains(file));
|
|
||||||
if (files.length > 1) {
|
|
||||||
result.add(DuplicateFiles(files, dupe.size));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<DuplicateFilesResponse> _fetchDuplicateFileIDs() async {
|
Future<DuplicateFilesResponse> _fetchDuplicateFileIDs() async {
|
||||||
|
|
|
@ -48,7 +48,7 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
||||||
final Set<File> _selectedFiles = <File>{};
|
final Set<File> _selectedFiles = <File>{};
|
||||||
final Map<int?, int> _fileSizeMap = {};
|
final Map<int?, int> _fileSizeMap = {};
|
||||||
late List<DuplicateFiles> _duplicates;
|
late List<DuplicateFiles> _duplicates;
|
||||||
bool _shouldClubByCaptureTime = true;
|
bool _shouldClubByCaptureTime = false;
|
||||||
bool _shouldClubByFileName = false;
|
bool _shouldClubByFileName = false;
|
||||||
bool toastShown = false;
|
bool toastShown = false;
|
||||||
|
|
||||||
|
@ -56,8 +56,10 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
||||||
|
|
||||||
@override
|
@override
|
||||||
void initState() {
|
void initState() {
|
||||||
_duplicates =
|
_duplicates = DeduplicationService.instance.clubDuplicates(
|
||||||
DeduplicationService.instance.clubDuplicatesByTime(widget.duplicates);
|
widget.duplicates,
|
||||||
|
clubbingKey: (File f) => f.hash,
|
||||||
|
);
|
||||||
_selectAllFilesButFirst();
|
_selectAllFilesButFirst();
|
||||||
|
|
||||||
super.initState();
|
super.initState();
|
||||||
|
@ -228,6 +230,9 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
||||||
value: _shouldClubByFileName,
|
value: _shouldClubByFileName,
|
||||||
onChanged: (value) {
|
onChanged: (value) {
|
||||||
_shouldClubByFileName = value!;
|
_shouldClubByFileName = value!;
|
||||||
|
if (_shouldClubByFileName) {
|
||||||
|
_shouldClubByCaptureTime = false;
|
||||||
|
}
|
||||||
_resetEntriesAndSelection();
|
_resetEntriesAndSelection();
|
||||||
setState(() {});
|
setState(() {});
|
||||||
},
|
},
|
||||||
|
@ -237,6 +242,9 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
||||||
value: _shouldClubByCaptureTime,
|
value: _shouldClubByCaptureTime,
|
||||||
onChanged: (value) {
|
onChanged: (value) {
|
||||||
_shouldClubByCaptureTime = value!;
|
_shouldClubByCaptureTime = value!;
|
||||||
|
if (_shouldClubByCaptureTime) {
|
||||||
|
_shouldClubByFileName = false;
|
||||||
|
}
|
||||||
_resetEntriesAndSelection();
|
_resetEntriesAndSelection();
|
||||||
setState(() {});
|
setState(() {});
|
||||||
},
|
},
|
||||||
|
@ -258,14 +266,18 @@ class _DeduplicatePageState extends State<DeduplicatePage> {
|
||||||
|
|
||||||
void _resetEntriesAndSelection() {
|
void _resetEntriesAndSelection() {
|
||||||
_duplicates = widget.duplicates;
|
_duplicates = widget.duplicates;
|
||||||
|
late String? Function(File) clubbingKeyFn;
|
||||||
if (_shouldClubByCaptureTime) {
|
if (_shouldClubByCaptureTime) {
|
||||||
_duplicates =
|
clubbingKeyFn = (File f) => f.creationTime?.toString() ?? '';
|
||||||
DeduplicationService.instance.clubDuplicatesByTime(_duplicates);
|
} else if (_shouldClubByFileName) {
|
||||||
}
|
clubbingKeyFn = (File f) => f.displayName;
|
||||||
if (_shouldClubByFileName) {
|
} else {
|
||||||
_duplicates =
|
clubbingKeyFn = (File f) => f.hash;
|
||||||
DeduplicationService.instance.clubDuplicatesByName(_duplicates);
|
|
||||||
}
|
}
|
||||||
|
_duplicates = DeduplicationService.instance.clubDuplicates(
|
||||||
|
_duplicates,
|
||||||
|
clubbingKey: clubbingKeyFn,
|
||||||
|
);
|
||||||
_selectAllFilesButFirst();
|
_selectAllFilesButFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue