瀏覽代碼

feat(server): Fix exif data parsing (#1326)

* Trying to get exifdata working with different lib.

* Got the new library working.

* Addressing PR comments.

* Removed not used vars and proper place for the eslint disable.

* Fix time-utils to use the exiftool-vendored lib.

Fixed also one test, as that would be valid.

* Using filename for timestamp as well if possible.

* Add new tests for time-utils.

* Remember to gracefully terminate the exiftool instance when not needed.

* eslint ignore...

* Apperantly Dockerfile changes were not pushed.

* feat(dockerfile): Tweak the Server Dockerfile

* feat(server): getTimestampFromFilename should return string or undefined.

* feat(server): If we don't have exifData or timestamp from filename, raise an error.

* Apparently test was already right, but my local system disagrees.

* More utilities for parsing and fix the timestampFromFilename.

It was returning an incorrect date as the regex doesn't seem to be the best for this as files named `IMG_0115.HEIC` will want to get parsed incorrectly due to it.

* feat(server/docker): Install perl as it seems to be required.

* feat(server): remember to include exposureTime and focalLength in new exif data.

* feat(server): Remove the parsing from filename as requested.

* feat(server): Import exiftool differently in time-utils.

* feat(server): Error handling when there is no exifData.

* feat(server): Fixes for the error handling when there is no exifData.

* feat(server): Remember to include modifyDate despite no exif.

* feat(server): Remember to include model of Camera.

* feat(server): Fixing up Exiftool usage.

Including proper logging for it, which had to be done in wrapped fashion due to it expecting all the logging levels which NextJS logger doesn't implement.

* feat(server): Do not use a wrapper for ExifTool logging.

* fix merge conflicts in metadata-extractor
Skyler Mäntysaari 2 年之前
父節點
當前提交
dff10e89fe

+ 2 - 2
server/Dockerfile

@@ -2,7 +2,7 @@ FROM node:16-alpine3.14 as builder
 
 WORKDIR /usr/src/app
 
-RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg
+RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg exiftool perl
 
 COPY package.json package-lock.json ./
 
@@ -21,7 +21,7 @@ FROM node:16-alpine3.14
 
 WORKDIR /usr/src/app
 
-RUN apk add --no-cache libheif vips ffmpeg
+RUN apk add --no-cache libheif vips ffmpeg exiftool perl
 
 COPY --from=prod /usr/src/app/node_modules ./node_modules
 COPY --from=prod /usr/src/app/dist ./dist

+ 38 - 75
server/apps/microservices/src/processors/metadata-extraction.processor.ts

@@ -1,8 +1,8 @@
 import { AssetEntity, ExifEntity } from '@app/infra';
 import {
   IExifExtractionProcessor,
-  IVideoLengthExtractionProcessor,
   IReverseGeocodingProcessor,
+  IVideoLengthExtractionProcessor,
   QueueName,
   JobName,
 } from '@app/job';
@@ -11,16 +11,15 @@ import { Logger } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';
 import { InjectRepository } from '@nestjs/typeorm';
 import { Job } from 'bull';
-import exifr from 'exifr';
 import ffmpeg from 'fluent-ffmpeg';
 import path from 'path';
 import sharp from 'sharp';
 import { Repository } from 'typeorm/repository/Repository';
 import geocoder, { InitOptions } from 'local-reverse-geocoder';
 import { getName } from 'i18n-iso-countries';
-import { find } from 'geo-tz';
-import * as luxon from 'luxon';
 import fs from 'node:fs';
+import { ExifDateTime, ExifTool } from 'exiftool-vendored';
+import { timeUtils } from '@app/common';
 
 function geocoderInit(init: InitOptions) {
   return new Promise<void>(function (resolve) {
@@ -75,7 +74,6 @@ export type GeoData = {
 export class MetadataExtractionProcessor {
   private logger = new Logger(MetadataExtractionProcessor.name);
   private isGeocodeInitialized = false;
-
   constructor(
     @InjectRepository(AssetEntity)
     private assetRepository: Repository<AssetEntity>,
@@ -102,7 +100,7 @@ export class MetadataExtractionProcessor {
           configService.get('REVERSE_GEOCODING_DUMP_DIRECTORY') || process.cwd() + '/.reverse-geocoding-dump/',
       }).then(() => {
         this.isGeocodeInitialized = true;
-        Logger.log('Reverse Geocoding Initialised');
+        this.logger.log('Reverse Geocoding Initialised');
       });
     }
   }
@@ -142,84 +140,48 @@ export class MetadataExtractionProcessor {
   async extractExifInfo(job: Job<IExifExtractionProcessor>) {
     try {
       const { asset, fileName }: { asset: AssetEntity; fileName: string } = job.data;
-      const exifData = await exifr.parse(asset.originalPath, {
-        tiff: true,
-        ifd0: true as any,
-        ifd1: true,
-        exif: true,
-        gps: true,
-        interop: true,
-        xmp: true,
-        icc: true,
-        iptc: true,
-        jfif: true,
-        ihdr: true,
+      const exiftool = new ExifTool();
+      const exifData = await exiftool.read(asset.originalPath).catch((e) => {
+        this.logger.warn(`The exifData parsing failed due to: ${e} on file ${asset.originalPath}`);
       });
 
-      if (!exifData) {
-        throw new Error(`can not parse exif data from file ${asset.originalPath}`);
-      }
-
-      const createdAt = new Date(exifData.DateTimeOriginal || exifData.CreateDate || new Date(asset.createdAt));
+      const exifToDate = (exifDate: string | ExifDateTime | undefined) =>
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        exifDate ? new Date(exifDate.toString()!) : null;
 
+      let createdAt = exifToDate(asset.createdAt);
+      const newExif = new ExifEntity();
+      if (exifData) {
+        createdAt = exifToDate(exifData.DateTimeOriginal ?? exifData.CreateDate ?? asset.createdAt);
+        const modifyDate = exifToDate(exifData.ModifyDate);
+        newExif.make = exifData['Make'] || null;
+        newExif.model = exifData['Model'] || null;
+        newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
+        newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
+        newExif.exposureTime = (await timeUtils.parseStringToNumber(exifData['ExposureTime'])) || null;
+        newExif.orientation = exifData['Orientation']?.toString() || null;
+        newExif.dateTimeOriginal = createdAt;
+        newExif.modifyDate = modifyDate || null;
+        newExif.lensModel = exifData['LensModel'] || null;
+        newExif.fNumber = exifData['FNumber'] || null;
+        newExif.focalLength = (await timeUtils.parseStringToNumber(exifData['FocalLength'])) || null;
+        newExif.iso = exifData['ISO'] || null;
+        newExif.latitude = exifData['GPSLatitude'] || null;
+        newExif.longitude = exifData['GPSLongitude'] || null;
+      } else {
+        newExif.dateTimeOriginal = createdAt;
+        newExif.modifyDate = exifToDate(asset.modifiedAt);
+      }
       const fileStats = fs.statSync(asset.originalPath);
       const fileSizeInBytes = fileStats.size;
-
-      const newExif = new ExifEntity();
       newExif.assetId = asset.id;
-      newExif.make = exifData['Make'] || null;
-      newExif.model = exifData['Model'] || null;
       newExif.imageName = path.parse(fileName).name || null;
-      newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
-      newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
       newExif.fileSizeInByte = fileSizeInBytes || null;
-      newExif.orientation = exifData['Orientation'] || null;
-      newExif.dateTimeOriginal = createdAt;
-      newExif.modifyDate = exifData['ModifyDate'] || null;
-      newExif.lensModel = exifData['LensModel'] || null;
-      newExif.fNumber = exifData['FNumber'] || null;
-      newExif.focalLength = exifData['FocalLength'] || null;
-      newExif.iso = exifData['ISO'] || null;
-      newExif.exposureTime = exifData['ExposureTime'] || null;
-      newExif.latitude = exifData['latitude'] || null;
-      newExif.longitude = exifData['longitude'] || null;
 
-      /**
-       * Correctly store UTC time based on timezone
-       * The timestamp being extracted from EXIF is based on the timezone
-       * of the container. We need to correct it to UTC time based on the
-       * timezone of the location.
-       *
-       * The timezone of the location can be exracted from the lat/lon
-       * GPS coordinates.
-       *
-       * Any assets that doesn't have this information will used the
-       * createdAt timestamp of the asset instead.
-       *
-       * The updated/corrected timestamp will be used to update the
-       * createdAt timestamp in the asset table. So that the information
-       * is consistent across the database.
-       *  */
-      if (newExif.longitude && newExif.latitude) {
-        const tz = find(newExif.latitude, newExif.longitude)[0];
-        const localTimeWithTimezone = createdAt.toISOString();
-
-        if (localTimeWithTimezone.length == 24) {
-          // Remove the last character
-          const localTimeWithoutTimezone = localTimeWithTimezone.slice(0, -1);
-          const correctUTCTime = luxon.DateTime.fromISO(localTimeWithoutTimezone, { zone: tz }).toUTC().toISO();
-          newExif.dateTimeOriginal = new Date(correctUTCTime);
-          await this.assetRepository.save({
-            id: asset.id,
-            createdAt: correctUTCTime,
-          });
-        }
-      } else {
-        await this.assetRepository.save({
-          id: asset.id,
-          createdAt: createdAt.toISOString(),
-        });
-      }
+      await this.assetRepository.save({
+        id: asset.id,
+        createdAt: createdAt?.toISOString(),
+      });
 
       /**
        * Reverse Geocoding
@@ -255,6 +217,7 @@ export class MetadataExtractionProcessor {
       }
 
       await this.exifRepository.save(newExif);
+      await exiftool.end();
     } catch (error: any) {
       this.logger.error(`Error extracting EXIF ${error}`, error?.stack);
     }

+ 22 - 16
server/libs/common/src/utils/time-utils.ts

@@ -1,6 +1,12 @@
-import exifr from 'exifr';
+// This is needed as resolving for the vendored
+// exiftool fails in tests otherwise but as it's not meant to be a requirement
+// of a project directly I had to include the line below the comment.
+// eslint-disable-next-line @typescript-eslint/ban-ts-comment
+// @ts-ignore
+import { exiftool } from 'exiftool-vendored.pl';
 
 function createTimeUtils() {
+  const floatRegex = /[+-]?([0-9]*[.])?[0-9]+/;
   const checkValidTimestamp = (timestamp: string): boolean => {
     const parsedTimestamp = Date.parse(timestamp);
 
@@ -19,22 +25,12 @@ function createTimeUtils() {
 
   const getTimestampFromExif = async (originalPath: string): Promise<string> => {
     try {
-      const exifData = await exifr.parse(originalPath, {
-        tiff: true,
-        ifd0: true as any,
-        ifd1: true,
-        exif: true,
-        gps: true,
-        interop: true,
-        xmp: true,
-        icc: true,
-        iptc: true,
-        jfif: true,
-        ihdr: true,
-      });
+      const exifData = await exiftool.read(originalPath);
 
       if (exifData && exifData['DateTimeOriginal']) {
-        return exifData['DateTimeOriginal'];
+        await exiftool.end();
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        return exifData['DateTimeOriginal'].toString()!;
       } else {
         return new Date().toISOString();
       }
@@ -42,7 +38,17 @@ function createTimeUtils() {
       return new Date().toISOString();
     }
   };
-  return { checkValidTimestamp, getTimestampFromExif };
+
+  const parseStringToNumber = async (original: string | undefined): Promise<number | null> => {
+    const match = original?.match(floatRegex)?.[0];
+    if (match) {
+      return parseFloat(match);
+    } else {
+      return null;
+    }
+  };
+
+  return { checkValidTimestamp, getTimestampFromExif, parseStringToNumber };
 }
 
 export const timeUtils = createTimeUtils();

+ 101 - 17
server/package-lock.json

@@ -32,7 +32,7 @@
         "cookie-parser": "^1.4.6",
         "diskusage": "^1.1.3",
         "dotenv": "^14.2.0",
-        "exifr": "^7.1.3",
+        "exiftool-vendored": "^19.0.0",
         "fdir": "^5.3.0",
         "fluent-ffmpeg": "^2.1.2",
         "geo-tz": "^7.0.2",
@@ -2237,6 +2237,11 @@
       "integrity": "sha512-uZtkfKblCEQtZKBF6EBXVZeQNl82yqtDQdv+eck8u7tdPxjLu2/lp5/uPW+um2tpuxINHWy3GhiccY7QgEaVHQ==",
       "dev": true
     },
+    "node_modules/@photostructure/tz-lookup": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/@photostructure/tz-lookup/-/tz-lookup-7.0.0.tgz",
+      "integrity": "sha512-pTRsZz7Sn4yAtItC7I4+0segDHosMyOtJgAXg+xvDOolT0Xz4IFWqBV33OMCWoaNd3oQb60wbWhLeCQgJCyZAA=="
+    },
     "node_modules/@redis/bloom": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.1.0.tgz",
@@ -2724,10 +2729,9 @@
       "dev": true
     },
     "node_modules/@types/luxon": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-2.3.2.tgz",
-      "integrity": "sha512-WOehptuhKIXukSUUkRgGbj2c997Uv/iUgYgII8U7XLJqq9W2oF0kQ6frEznRQbdurioz+L/cdaIm4GutTQfgmA==",
-      "dev": true
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz",
+      "integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA=="
     },
     "node_modules/@types/mime": {
       "version": "1.3.2",
@@ -3775,6 +3779,14 @@
         "node": "^4.5.0 || >= 5.9"
       }
     },
+    "node_modules/batch-cluster": {
+      "version": "11.0.0",
+      "resolved": "https://registry.npmjs.org/batch-cluster/-/batch-cluster-11.0.0.tgz",
+      "integrity": "sha512-8iwqa+rKTaakOHkqdcXDT5L5117pa+FoP8/yAKpNdL44ZnC4V2NEA/sIg0ZO0O9NkpdjLk0A3efRFM5nVizqHw==",
+      "engines": {
+        "node": ">=14"
+      }
+    },
     "node_modules/bcrypt": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/bcrypt/-/bcrypt-5.0.1.tgz",
@@ -5629,10 +5641,39 @@
         "url": "https://github.com/sindresorhus/execa?sponsor=1"
       }
     },
-    "node_modules/exifr": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/exifr/-/exifr-7.1.3.tgz",
-      "integrity": "sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw=="
+    "node_modules/exiftool-vendored": {
+      "version": "19.0.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored/-/exiftool-vendored-19.0.0.tgz",
+      "integrity": "sha512-Zes7TZrYWxts92mbF2Gs3drtWZucm4qsaeYaE6A+OOqmeD9UGaGisqIbyh9MilJrLi+ZHzWEJZtDj37QFf6xsA==",
+      "dependencies": {
+        "@photostructure/tz-lookup": "^7.0.0",
+        "@types/luxon": "^3.2.0",
+        "batch-cluster": "^11.0.0",
+        "he": "^1.2.0",
+        "luxon": "^3.2.1"
+      },
+      "optionalDependencies": {
+        "exiftool-vendored.exe": "12.54.0",
+        "exiftool-vendored.pl": "12.54.0"
+      }
+    },
+    "node_modules/exiftool-vendored.exe": {
+      "version": "12.54.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored.exe/-/exiftool-vendored.exe-12.54.0.tgz",
+      "integrity": "sha512-Dc4W6e0NtQfYuJIYK4piHfDJnd2jvA04e0aaq9R3Q1oO34KC5e+L1D2C7lFuZXqPQLYC1x3GYc/GVv5e+SkkrQ==",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/exiftool-vendored.pl": {
+      "version": "12.54.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored.pl/-/exiftool-vendored.pl-12.54.0.tgz",
+      "integrity": "sha512-RBBowsYcM6EvbWoBkg2dOqHpH3WIzN7bIzHc+o+LquqCTo3doZwECClD/6PNHVSMQsl2Z0fEf75sNq2msooMSg==",
+      "optional": true,
+      "os": [
+        "!win32"
+      ]
     },
     "node_modules/exit": {
       "version": "0.1.2",
@@ -6481,6 +6522,14 @@
       "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
       "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk="
     },
+    "node_modules/he": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
+      "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
+      "bin": {
+        "he": "bin/he"
+      }
+    },
     "node_modules/hexoid": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",
@@ -13218,6 +13267,11 @@
         }
       }
     },
+    "@photostructure/tz-lookup": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/@photostructure/tz-lookup/-/tz-lookup-7.0.0.tgz",
+      "integrity": "sha512-pTRsZz7Sn4yAtItC7I4+0segDHosMyOtJgAXg+xvDOolT0Xz4IFWqBV33OMCWoaNd3oQb60wbWhLeCQgJCyZAA=="
+    },
     "@redis/bloom": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.1.0.tgz",
@@ -13676,10 +13730,9 @@
       "dev": true
     },
     "@types/luxon": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-2.3.2.tgz",
-      "integrity": "sha512-WOehptuhKIXukSUUkRgGbj2c997Uv/iUgYgII8U7XLJqq9W2oF0kQ6frEznRQbdurioz+L/cdaIm4GutTQfgmA==",
-      "dev": true
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz",
+      "integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA=="
     },
     "@types/mime": {
       "version": "1.3.2",
@@ -14528,6 +14581,11 @@
       "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz",
       "integrity": "sha512-lGe34o6EHj9y3Kts9R4ZYs/Gr+6N7MCaMlIFA3F1R2O5/m7K06AxfSeO5530PEERE6/WyEg3lsuyw4GHlPZHog=="
     },
+    "batch-cluster": {
+      "version": "11.0.0",
+      "resolved": "https://registry.npmjs.org/batch-cluster/-/batch-cluster-11.0.0.tgz",
+      "integrity": "sha512-8iwqa+rKTaakOHkqdcXDT5L5117pa+FoP8/yAKpNdL44ZnC4V2NEA/sIg0ZO0O9NkpdjLk0A3efRFM5nVizqHw=="
+    },
     "bcrypt": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/bcrypt/-/bcrypt-5.0.1.tgz",
@@ -15929,10 +15987,31 @@
         "strip-final-newline": "^2.0.0"
       }
     },
-    "exifr": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/exifr/-/exifr-7.1.3.tgz",
-      "integrity": "sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw=="
+    "exiftool-vendored": {
+      "version": "19.0.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored/-/exiftool-vendored-19.0.0.tgz",
+      "integrity": "sha512-Zes7TZrYWxts92mbF2Gs3drtWZucm4qsaeYaE6A+OOqmeD9UGaGisqIbyh9MilJrLi+ZHzWEJZtDj37QFf6xsA==",
+      "requires": {
+        "@photostructure/tz-lookup": "^7.0.0",
+        "@types/luxon": "^3.2.0",
+        "batch-cluster": "^11.0.0",
+        "exiftool-vendored.exe": "12.54.0",
+        "exiftool-vendored.pl": "12.54.0",
+        "he": "^1.2.0",
+        "luxon": "^3.2.1"
+      }
+    },
+    "exiftool-vendored.exe": {
+      "version": "12.54.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored.exe/-/exiftool-vendored.exe-12.54.0.tgz",
+      "integrity": "sha512-Dc4W6e0NtQfYuJIYK4piHfDJnd2jvA04e0aaq9R3Q1oO34KC5e+L1D2C7lFuZXqPQLYC1x3GYc/GVv5e+SkkrQ==",
+      "optional": true
+    },
+    "exiftool-vendored.pl": {
+      "version": "12.54.0",
+      "resolved": "https://registry.npmjs.org/exiftool-vendored.pl/-/exiftool-vendored.pl-12.54.0.tgz",
+      "integrity": "sha512-RBBowsYcM6EvbWoBkg2dOqHpH3WIzN7bIzHc+o+LquqCTo3doZwECClD/6PNHVSMQsl2Z0fEf75sNq2msooMSg==",
+      "optional": true
     },
     "exit": {
       "version": "0.1.2",
@@ -16577,6 +16656,11 @@
       "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
       "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk="
     },
+    "he": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
+      "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw=="
+    },
     "hexoid": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",

+ 1 - 1
server/package.json

@@ -57,7 +57,7 @@
     "cookie-parser": "^1.4.6",
     "diskusage": "^1.1.3",
     "dotenv": "^14.2.0",
-    "exifr": "^7.1.3",
+    "exiftool-vendored": "^19.0.0",
     "fdir": "^5.3.0",
     "fluent-ffmpeg": "^2.1.2",
     "geo-tz": "^7.0.2",