From 56eb7bf0fc025de13e4ded836a1421a2e94e25c8 Mon Sep 17 00:00:00 2001 From: Jonathan Jogenfors Date: Wed, 11 Oct 2023 00:59:13 +0200 Subject: [PATCH] fix(server): improve library scan queuing performance (#4418) * fix: inline mark asset as offline * fix: improve log message * chore: lint * fix: offline asset algorithm * fix: use set comparison to check what to import * fix: only mark new offline files as offline * fix: compare the correct array * fix: set default library concurrency to 5 * fix: remove one db call when scanning new files * chore: remove unused import --------- Co-authored-by: Jason Rasmussen --- server/src/domain/job/job.constants.ts | 2 - server/src/domain/job/job.interface.ts | 4 -- .../domain/library/library.service.spec.ts | 33 +++---------- server/src/domain/library/library.service.ts | 48 ++++++------------- .../src/domain/repositories/job.repository.ts | 2 - .../system-config/system-config.core.ts | 2 +- .../system-config.service.spec.ts | 2 +- .../repositories/filesystem.provider.spec.ts | 2 - server/src/microservices/app.service.ts | 1 - server/test/e2e/library.e2e-spec.ts | 6 +-- 10 files changed, 25 insertions(+), 77 deletions(-) diff --git a/server/src/domain/job/job.constants.ts b/server/src/domain/job/job.constants.ts index dbf8c2224..ec1307cba 100644 --- a/server/src/domain/job/job.constants.ts +++ b/server/src/domain/job/job.constants.ts @@ -69,7 +69,6 @@ export enum JobName { LIBRARY_SCAN = 'library-refresh', LIBRARY_SCAN_ASSET = 'library-refresh-asset', LIBRARY_REMOVE_OFFLINE = 'library-remove-offline', - LIBRARY_MARK_ASSET_OFFLINE = 'library-mark-asset-offline', LIBRARY_DELETE = 'library-delete', LIBRARY_QUEUE_SCAN_ALL = 'library-queue-all-refresh', LIBRARY_QUEUE_CLEANUP = 'library-queue-cleanup', @@ -172,7 +171,6 @@ export const JOBS_TO_QUEUE: Record = { // Library managment [JobName.LIBRARY_SCAN_ASSET]: QueueName.LIBRARY, - [JobName.LIBRARY_MARK_ASSET_OFFLINE]: QueueName.LIBRARY, [JobName.LIBRARY_SCAN]: QueueName.LIBRARY, [JobName.LIBRARY_DELETE]: QueueName.LIBRARY, [JobName.LIBRARY_REMOVE_OFFLINE]: QueueName.LIBRARY, diff --git a/server/src/domain/job/job.interface.ts b/server/src/domain/job/job.interface.ts index 7825ccc78..033dfdac4 100644 --- a/server/src/domain/job/job.interface.ts +++ b/server/src/domain/job/job.interface.ts @@ -16,10 +16,6 @@ export interface IAssetDeletionJob extends IEntityJob { fromExternal?: boolean; } -export interface IOfflineLibraryFileJob extends IEntityJob { - assetPath: string; -} - export interface ILibraryFileJob extends IEntityJob { ownerId: string; assetPath: string; diff --git a/server/src/domain/library/library.service.spec.ts b/server/src/domain/library/library.service.spec.ts index 4591fb0b8..b13675a35 100644 --- a/server/src/domain/library/library.service.spec.ts +++ b/server/src/domain/library/library.service.spec.ts @@ -16,7 +16,7 @@ import { userStub, } from '@test'; import { Stats } from 'fs'; -import { ILibraryFileJob, ILibraryRefreshJob, IOfflineLibraryFileJob, JobName } from '../job'; +import { ILibraryFileJob, ILibraryRefreshJob, JobName } from '../job'; import { IAssetRepository, ICryptoRepository, @@ -126,14 +126,11 @@ describe(LibraryService.name, () => { await sut.handleQueueAssetRefresh(mockLibraryJob); - expect(jobMock.queue.mock.calls).toEqual([ + expect(assetMock.updateAll.mock.calls).toEqual([ [ + [assetStub.external.id], { - name: JobName.LIBRARY_MARK_ASSET_OFFLINE, - data: { - id: libraryStub.externalLibrary1.id, - assetPath: '/data/user1/photo.jpg', - }, + isOffline: true, }, ], ]); @@ -150,7 +147,7 @@ describe(LibraryService.name, () => { userMock.get.mockResolvedValue(userStub.user1); - expect(sut.handleQueueAssetRefresh(mockLibraryJob)).resolves.toBe(false); + await expect(sut.handleQueueAssetRefresh(mockLibraryJob)).resolves.toBe(false); }); it('should not scan upload libraries', async () => { @@ -162,7 +159,7 @@ describe(LibraryService.name, () => { libraryMock.get.mockResolvedValue(libraryStub.uploadLibrary1); - expect(sut.handleQueueAssetRefresh(mockLibraryJob)).resolves.toBe(false); + await expect(sut.handleQueueAssetRefresh(mockLibraryJob)).resolves.toBe(false); }); }); @@ -545,24 +542,6 @@ describe(LibraryService.name, () => { }); }); - describe('handleOfflineAsset', () => { - it('should mark an asset as offline', async () => { - const offlineJob: IOfflineLibraryFileJob = { - id: libraryStub.externalLibrary1.id, - assetPath: '/data/user1/photo.jpg', - }; - - assetMock.getByLibraryIdAndOriginalPath.mockResolvedValue(assetStub.image); - - await expect(sut.handleOfflineAsset(offlineJob)).resolves.toBe(true); - - expect(assetMock.save).toHaveBeenCalledWith({ - id: assetStub.image.id, - isOffline: true, - }); - }); - }); - describe('delete', () => { it('should delete a library', async () => { assetMock.getByLibraryIdAndOriginalPath.mockResolvedValue(assetStub.image); diff --git a/server/src/domain/library/library.service.ts b/server/src/domain/library/library.service.ts index 08a128d10..a59663bf7 100644 --- a/server/src/domain/library/library.service.ts +++ b/server/src/domain/library/library.service.ts @@ -8,15 +8,8 @@ import { AccessCore, Permission } from '../access'; import { AuthUserDto } from '../auth'; import { mimeTypes } from '../domain.constant'; import { usePagination } from '../domain.util'; -import { - IBaseJob, - IEntityJob, - ILibraryFileJob, - ILibraryRefreshJob, - IOfflineLibraryFileJob, - JOBS_ASSET_PAGINATION_SIZE, - JobName, -} from '../job'; +import { IBaseJob, IEntityJob, ILibraryFileJob, ILibraryRefreshJob, JOBS_ASSET_PAGINATION_SIZE, JobName } from '../job'; + import { IAccessRepository, IAssetRepository, @@ -371,28 +364,26 @@ export class LibraryService { this.logger.debug(`Found ${crawledAssetPaths.length} assets when crawling import paths ${library.importPaths}`); const assetsInLibrary = await this.assetRepository.getByLibraryId([job.id]); - const offlineAssets = assetsInLibrary.filter((asset) => !crawledAssetPaths.includes(asset.originalPath)); - this.logger.debug(`${offlineAssets.length} assets in library are not present on disk and will be marked offline`); + const onlineFiles = new Set(crawledAssetPaths); + const offlineAssetIds = assetsInLibrary + .filter((asset) => !onlineFiles.has(asset.originalPath)) + .filter((asset) => !asset.isOffline) + .map((asset) => asset.id); + this.logger.debug(`Marking ${offlineAssetIds.length} assets as offline`); - for (const offlineAsset of offlineAssets) { - const offlineJobData: IOfflineLibraryFileJob = { - id: job.id, - assetPath: offlineAsset.originalPath, - }; - - await this.jobRepository.queue({ name: JobName.LIBRARY_MARK_ASSET_OFFLINE, data: offlineJobData }); - } + await this.assetRepository.updateAll(offlineAssetIds, { isOffline: true }); if (crawledAssetPaths.length > 0) { let filteredPaths: string[] = []; if (job.refreshAllFiles || job.refreshModifiedFiles) { filteredPaths = crawledAssetPaths; } else { - const existingPaths = await this.repository.getOnlineAssetPaths(job.id); - this.logger.debug(`Found ${existingPaths.length} existing asset(s) in library ${job.id}`); + const onlinePathsInLibrary = new Set( + assetsInLibrary.filter((asset) => !asset.isOffline).map((asset) => asset.originalPath), + ); + filteredPaths = crawledAssetPaths.filter((assetPath) => !onlinePathsInLibrary.has(assetPath)); - filteredPaths = crawledAssetPaths.filter((assetPath) => !existingPaths.includes(assetPath)); - this.logger.debug(`After db comparison, ${filteredPaths.length} asset(s) remain to be imported`); + this.logger.debug(`Will import ${filteredPaths.length} new asset(s)`); } for (const assetPath of filteredPaths) { @@ -412,17 +403,6 @@ export class LibraryService { return true; } - async handleOfflineAsset(job: IOfflineLibraryFileJob): Promise { - const existingAssetEntity = await this.assetRepository.getByLibraryIdAndOriginalPath(job.id, job.assetPath); - - if (existingAssetEntity) { - this.logger.verbose(`Marking asset as offline: ${job.assetPath}`); - await this.assetRepository.save({ id: existingAssetEntity.id, isOffline: true }); - } - - return true; - } - private async findOrFail(id: string) { const library = await this.repository.get(id); if (!library) { diff --git a/server/src/domain/repositories/job.repository.ts b/server/src/domain/repositories/job.repository.ts index 0ba2b7c1a..0f571dc99 100644 --- a/server/src/domain/repositories/job.repository.ts +++ b/server/src/domain/repositories/job.repository.ts @@ -9,7 +9,6 @@ import { IEntityJob, ILibraryFileJob, ILibraryRefreshJob, - IOfflineLibraryFileJob, } from '../job/job.interface'; export interface JobCounts { @@ -88,7 +87,6 @@ export type JobItem = // Library Managment | { name: JobName.LIBRARY_SCAN_ASSET; data: ILibraryFileJob } - | { name: JobName.LIBRARY_MARK_ASSET_OFFLINE; data: IOfflineLibraryFileJob } | { name: JobName.LIBRARY_SCAN; data: ILibraryRefreshJob } | { name: JobName.LIBRARY_REMOVE_OFFLINE; data: IEntityJob } | { name: JobName.LIBRARY_DELETE; data: IEntityJob } diff --git a/server/src/domain/system-config/system-config.core.ts b/server/src/domain/system-config/system-config.core.ts index 428e29e50..9a9ea969e 100644 --- a/server/src/domain/system-config/system-config.core.ts +++ b/server/src/domain/system-config/system-config.core.ts @@ -52,7 +52,7 @@ export const defaults = Object.freeze({ [QueueName.RECOGNIZE_FACES]: { concurrency: 2 }, [QueueName.SEARCH]: { concurrency: 5 }, [QueueName.SIDECAR]: { concurrency: 5 }, - [QueueName.LIBRARY]: { concurrency: 1 }, + [QueueName.LIBRARY]: { concurrency: 5 }, [QueueName.STORAGE_TEMPLATE_MIGRATION]: { concurrency: 5 }, [QueueName.MIGRATION]: { concurrency: 5 }, [QueueName.THUMBNAIL_GENERATION]: { concurrency: 5 }, diff --git a/server/src/domain/system-config/system-config.service.spec.ts b/server/src/domain/system-config/system-config.service.spec.ts index f82b38b47..ecdec41fd 100644 --- a/server/src/domain/system-config/system-config.service.spec.ts +++ b/server/src/domain/system-config/system-config.service.spec.ts @@ -33,7 +33,7 @@ const updatedConfig = Object.freeze({ [QueueName.RECOGNIZE_FACES]: { concurrency: 2 }, [QueueName.SEARCH]: { concurrency: 5 }, [QueueName.SIDECAR]: { concurrency: 5 }, - [QueueName.LIBRARY]: { concurrency: 1 }, + [QueueName.LIBRARY]: { concurrency: 5 }, [QueueName.STORAGE_TEMPLATE_MIGRATION]: { concurrency: 5 }, [QueueName.MIGRATION]: { concurrency: 5 }, [QueueName.THUMBNAIL_GENERATION]: { concurrency: 5 }, diff --git a/server/src/infra/repositories/filesystem.provider.spec.ts b/server/src/infra/repositories/filesystem.provider.spec.ts index d8f6077bf..5103e9508 100644 --- a/server/src/infra/repositories/filesystem.provider.spec.ts +++ b/server/src/infra/repositories/filesystem.provider.spec.ts @@ -182,8 +182,6 @@ const tests: Test[] = [ describe(FilesystemProvider.name, () => { const sut = new FilesystemProvider(); - console.log(process.cwd()); - afterEach(() => { mockfs.restore(); }); diff --git a/server/src/microservices/app.service.ts b/server/src/microservices/app.service.ts index 248d5a7fb..0282d2c77 100644 --- a/server/src/microservices/app.service.ts +++ b/server/src/microservices/app.service.ts @@ -83,7 +83,6 @@ export class AppService { [JobName.SIDECAR_DISCOVERY]: (data) => this.metadataService.handleSidecarDiscovery(data), [JobName.SIDECAR_SYNC]: () => this.metadataService.handleSidecarSync(), [JobName.LIBRARY_SCAN_ASSET]: (data) => this.libraryService.handleAssetRefresh(data), - [JobName.LIBRARY_MARK_ASSET_OFFLINE]: (data) => this.libraryService.handleOfflineAsset(data), [JobName.LIBRARY_SCAN]: (data) => this.libraryService.handleQueueAssetRefresh(data), [JobName.LIBRARY_DELETE]: (data) => this.libraryService.handleDeleteLibrary(data), [JobName.LIBRARY_REMOVE_OFFLINE]: (data) => this.libraryService.handleOfflineRemoval(data), diff --git a/server/test/e2e/library.e2e-spec.ts b/server/test/e2e/library.e2e-spec.ts index b7aa2a109..742e6b7fe 100644 --- a/server/test/e2e/library.e2e-spec.ts +++ b/server/test/e2e/library.e2e-spec.ts @@ -41,7 +41,7 @@ describe(`${LibraryController.name} (e2e)`, () => { beforeEach(async () => { await db.reset(); - restoreTempFolder(); + await restoreTempFolder(); await api.authApi.adminSignUp(server); admin = await api.authApi.adminLogin(server); }); @@ -49,7 +49,7 @@ describe(`${LibraryController.name} (e2e)`, () => { afterAll(async () => { await db.disconnect(); await app.close(); - restoreTempFolder(); + await restoreTempFolder(); }); describe('GET /library', () => { @@ -407,7 +407,7 @@ describe(`${LibraryController.name} (e2e)`, () => { ); }); - it('should delete an extnernal library with assets', async () => { + it('should delete an external library with assets', async () => { const library = await api.libraryApi.create(server, admin.accessToken, { type: LibraryType.EXTERNAL, importPaths: [`${IMMICH_TEST_ASSET_PATH}/albums/nature`],