瀏覽代碼

refactor(server): filesystem crawl (#4395)

Co-authored-by: Jonathan Jogenfors <jonathan@jogenfors.se>
Jason Rasmussen 1 年之前
父節點
當前提交
9033e7f179
共有 2 個文件被更改,包括 203 次插入210 次删除
  1. 192 196
      server/src/infra/repositories/filesystem.provider.spec.ts
  2. 11 14
      server/src/infra/repositories/filesystem.provider.ts

+ 192 - 196
server/src/infra/repositories/filesystem.provider.spec.ts

@@ -2,208 +2,204 @@ import { CrawlOptionsDto } from '@app/domain';
 import mockfs from 'mock-fs';
 import { FilesystemProvider } from './filesystem.provider';
 
-describe(FilesystemProvider.name, () => {
-  const sut: FilesystemProvider = new FilesystemProvider();
-
-  describe('crawl', () => {
-    it('should return empty wnen crawling an empty path list', async () => {
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = [];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths).toHaveLength(0);
-    });
-
-    it('should crawl a single path', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-      });
-
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
-    });
-
-    it('should exclude by file extension', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/image.tif': '',
-      });
-
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      options.exclusionPatterns = ['**/*.tif'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
-    });
-
-    it('should exclude by file extension without case sensitivity', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/image.tif': '',
-      });
-
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      options.exclusionPatterns = ['**/*.TIF'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
-    });
-
-    it('should exclude by folder', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/raw/image.jpg': '',
-        '/photos/raw2/image.jpg': '',
-        '/photos/folder/raw/image.jpg': '',
-        '/photos/crawl/image.jpg': '',
-      });
-
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      options.exclusionPatterns = ['**/raw/**'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg', '/photos/raw2/image.jpg', '/photos/crawl/image.jpg'].sort());
-    });
+interface Test {
+  test: string;
+  options: CrawlOptionsDto;
+  files: Record<string, boolean>;
+}
+
+const cwd = process.cwd();
+
+const tests: Test[] = [
+  {
+    test: 'should return empty when crawling an empty path list',
+    options: {
+      pathsToCrawl: [],
+    },
+    files: {},
+  },
+  {
+    test: 'should crawl a single path',
+    options: {
+      pathsToCrawl: ['/photos/'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+    },
+  },
+  {
+    test: 'should exclude by file extension',
+    options: {
+      pathsToCrawl: ['/photos/'],
+      exclusionPatterns: ['**/*.tif'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/image.tif': false,
+    },
+  },
+  {
+    test: 'should exclude by file extension without case sensitivity',
+    options: {
+      pathsToCrawl: ['/photos/'],
+      exclusionPatterns: ['**/*.TIF'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/image.tif': false,
+    },
+  },
+  {
+    test: 'should exclude by folder',
+    options: {
+      pathsToCrawl: ['/photos/'],
+      exclusionPatterns: ['**/raw/**'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/raw/image.jpg': false,
+      '/photos/raw2/image.jpg': true,
+      '/photos/folder/raw/image.jpg': false,
+      '/photos/crawl/image.jpg': true,
+    },
+  },
+  {
+    test: 'should crawl multiple paths',
+    options: {
+      pathsToCrawl: ['/photos/', '/images/', '/albums/'],
+    },
+    files: {
+      '/photos/image1.jpg': true,
+      '/images/image2.jpg': true,
+      '/albums/image3.jpg': true,
+    },
+  },
+  {
+    test: 'should support globbing paths',
+    options: {
+      pathsToCrawl: ['/photos*'],
+    },
+    files: {
+      '/photos1/image1.jpg': true,
+      '/photos2/image2.jpg': true,
+      '/images/image3.jpg': false,
+    },
+  },
+  {
+    test: 'should crawl a single path without trailing slash',
+    options: {
+      pathsToCrawl: ['/photos'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+    },
+  },
+  {
+    test: 'should crawl a single path',
+    options: {
+      pathsToCrawl: ['/photos/'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/subfolder/image1.jpg': true,
+      '/photos/subfolder/image2.jpg': true,
+      '/image1.jpg': false,
+    },
+  },
+  {
+    test: 'should filter file extensions',
+    options: {
+      pathsToCrawl: ['/photos/'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/image.txt': false,
+      '/photos/1': false,
+    },
+  },
+  {
+    test: 'should include photo and video extensions',
+    options: {
+      pathsToCrawl: ['/photos/', '/videos/'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/image.jpeg': true,
+      '/photos/image.heic': true,
+      '/photos/image.heif': true,
+      '/photos/image.png': true,
+      '/photos/image.gif': true,
+      '/photos/image.tif': true,
+      '/photos/image.tiff': true,
+      '/photos/image.webp': true,
+      '/photos/image.dng': true,
+      '/photos/image.nef': true,
+      '/videos/video.mp4': true,
+      '/videos/video.mov': true,
+      '/videos/video.webm': true,
+    },
+  },
+  {
+    test: 'should check file extensions without case sensitivity',
+    options: {
+      pathsToCrawl: ['/photos/'],
+    },
+    files: {
+      '/photos/image.jpg': true,
+      '/photos/image.Jpg': true,
+      '/photos/image.jpG': true,
+      '/photos/image.JPG': true,
+      '/photos/image.jpEg': true,
+      '/photos/image.TIFF': true,
+      '/photos/image.tif': true,
+      '/photos/image.dng': true,
+      '/photos/image.NEF': true,
+    },
+  },
+  {
+    test: 'should normalize the path',
+    options: {
+      pathsToCrawl: ['/photos/1/../2'],
+    },
+    files: {
+      '/photos/1/image.jpg': false,
+      '/photos/2/image.jpg': true,
+    },
+  },
+  {
+    test: 'should return absolute paths',
+    options: {
+      pathsToCrawl: ['photos'],
+    },
+    files: {
+      [`${cwd}/photos/1.jpg`]: true,
+      [`${cwd}/photos/2.jpg`]: true,
+      [`/photos/3.jpg`]: false,
+    },
+  },
+];
 
-    it('should crawl multiple paths', async () => {
-      mockfs({
-        '/photos/image1.jpg': '',
-        '/images/image2.jpg': '',
-        '/albums/image3.jpg': '',
-      });
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/', '/images/', '/albums/'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image1.jpg', '/images/image2.jpg', '/albums/image3.jpg'].sort());
-    });
-
-    it('should support globbing paths', async () => {
-      mockfs({
-        '/photos1/image1.jpg': '',
-        '/photos2/image2.jpg': '',
-        '/images/image3.jpg': '',
-      });
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos*'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos1/image1.jpg', '/photos2/image2.jpg'].sort());
-    });
-
-    it('should crawl a single path without trailing slash', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-      });
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
-    });
-
-    // TODO: test for hidden paths (not yet implemented)
-
-    it('should crawl a single path', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/subfolder/image1.jpg': '',
-        '/photos/subfolder/image2.jpg': '',
-        '/image1.jpg': '',
-      });
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(
-        ['/photos/image.jpg', '/photos/subfolder/image1.jpg', '/photos/subfolder/image2.jpg'].sort(),
-      );
-    });
+describe(FilesystemProvider.name, () => {
+  const sut = new FilesystemProvider();
 
-    it('should filter file extensions', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/image.txt': '',
-        '/photos/1': '',
-      });
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
-    });
+  console.log(process.cwd());
 
-    it('should include photo and video extensions', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/image.jpeg': '',
-        '/photos/image.heic': '',
-        '/photos/image.heif': '',
-        '/photos/image.png': '',
-        '/photos/image.gif': '',
-        '/photos/image.tif': '',
-        '/photos/image.tiff': '',
-        '/photos/image.webp': '',
-        '/photos/image.dng': '',
-        '/photos/image.nef': '',
-        '/videos/video.mp4': '',
-        '/videos/video.mov': '',
-        '/videos/video.webm': '',
-      });
+  afterEach(() => {
+    mockfs.restore();
+  });
 
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/', '/videos/'];
-      const paths: string[] = await sut.crawl(options);
+  describe('crawl', () => {
+    for (const { test, options, files } of tests) {
+      it(test, async () => {
+        mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
 
-      expect(paths.sort()).toEqual(
-        [
-          '/photos/image.jpg',
-          '/photos/image.jpeg',
-          '/photos/image.heic',
-          '/photos/image.heif',
-          '/photos/image.png',
-          '/photos/image.gif',
-          '/photos/image.tif',
-          '/photos/image.tiff',
-          '/photos/image.webp',
-          '/photos/image.dng',
-          '/photos/image.nef',
-          '/videos/video.mp4',
-          '/videos/video.mov',
-          '/videos/video.webm',
-        ].sort(),
-      );
-    });
+        const actual = await sut.crawl(options);
+        const expected = Object.entries(files)
+          .filter((entry) => entry[1])
+          .map(([file]) => file);
 
-    it('should check file extensions without case sensitivity', async () => {
-      mockfs({
-        '/photos/image.jpg': '',
-        '/photos/image.Jpg': '',
-        '/photos/image.jpG': '',
-        '/photos/image.JPG': '',
-        '/photos/image.jpEg': '',
-        '/photos/image.TIFF': '',
-        '/photos/image.tif': '',
-        '/photos/image.dng': '',
-        '/photos/image.NEF': '',
+        expect(actual.sort()).toEqual(expected.sort());
       });
-
-      const options = new CrawlOptionsDto();
-      options.pathsToCrawl = ['/photos/'];
-      const paths: string[] = await sut.crawl(options);
-      expect(paths.sort()).toEqual(
-        [
-          '/photos/image.jpg',
-          '/photos/image.Jpg',
-          '/photos/image.jpG',
-          '/photos/image.JPG',
-          '/photos/image.jpEg',
-          '/photos/image.TIFF',
-          '/photos/image.tif',
-          '/photos/image.dng',
-          '/photos/image.NEF',
-        ].sort(),
-      );
-    });
-
-    afterEach(() => {
-      mockfs.restore();
-    });
+    }
   });
 });

+ 11 - 14
server/src/infra/repositories/filesystem.provider.ts

@@ -111,24 +111,21 @@ export class FilesystemProvider implements IStorageRepository {
     };
   }
 
-  async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
-    const pathsToCrawl = crawlOptions.pathsToCrawl;
-
-    let paths: string;
+  crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
+    const { pathsToCrawl, exclusionPatterns } = crawlOptions;
     if (!pathsToCrawl) {
-      // No paths to crawl, return empty list
-      return [];
-    } else if (pathsToCrawl.length === 1) {
-      paths = pathsToCrawl[0];
-    } else {
-      paths = '{' + pathsToCrawl.join(',') + '}';
+      return Promise.resolve([]);
     }
 
-    paths = paths + '/**/*{' + mimeTypes.getSupportedFileExtensions().join(',') + '}';
+    const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
+    const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
 
-    return (await glob(paths, { nocase: true, nodir: true, ignore: crawlOptions.exclusionPatterns })).map((assetPath) =>
-      path.normalize(assetPath),
-    );
+    return glob(`${base}/**/${extensions}`, {
+      absolute: true,
+      nocase: true,
+      nodir: true,
+      ignore: exclusionPatterns,
+    });
   }
 
   readdir = readdir;