소스 검색

feat(all): transcoding improvements (#2171)

* test: rename some fixtures and add text for vertical video conversion

* feat: transcode video asset when audio or container don't match target

* chore: add niceness to the ffmpeg command to allow other processes to be prioritised

* chore: change video conversion queue to one concurrency

* feat: add transcode disabled preset to completely turn off transcoding

* linter

* Change log level and remove unused await

* opps forgot to save

* better logging

---------

Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
Zack Pollard 2 년 전
부모
커밋
a5a6bebf0b

+ 3 - 0
mobile/openapi/lib/model/system_config_f_fmpeg_dto.dart

@@ -165,12 +165,14 @@ class SystemConfigFFmpegDtoTranscodeEnum {
   static const all = SystemConfigFFmpegDtoTranscodeEnum._(r'all');
   static const optimal = SystemConfigFFmpegDtoTranscodeEnum._(r'optimal');
   static const required_ = SystemConfigFFmpegDtoTranscodeEnum._(r'required');
+  static const disabled = SystemConfigFFmpegDtoTranscodeEnum._(r'disabled');
 
   /// List of all possible values in this [enum][SystemConfigFFmpegDtoTranscodeEnum].
   static const values = <SystemConfigFFmpegDtoTranscodeEnum>[
     all,
     optimal,
     required_,
+    disabled,
   ];
 
   static SystemConfigFFmpegDtoTranscodeEnum? fromJson(dynamic value) => SystemConfigFFmpegDtoTranscodeEnumTypeTransformer().decode(value);
@@ -212,6 +214,7 @@ class SystemConfigFFmpegDtoTranscodeEnumTypeTransformer {
         case r'all': return SystemConfigFFmpegDtoTranscodeEnum.all;
         case r'optimal': return SystemConfigFFmpegDtoTranscodeEnum.optimal;
         case r'required': return SystemConfigFFmpegDtoTranscodeEnum.required_;
+        case r'disabled': return SystemConfigFFmpegDtoTranscodeEnum.disabled;
         default:
           if (!allowNull) {
             throw ArgumentError('Unknown enum value to decode: $data');

+ 1 - 1
server/apps/microservices/src/processors.ts

@@ -163,7 +163,7 @@ export class VideoTranscodeProcessor {
     await this.mediaService.handleQueueVideoConversion(job.data);
   }
 
-  @Process({ name: JobName.VIDEO_CONVERSION, concurrency: 2 })
+  @Process({ name: JobName.VIDEO_CONVERSION, concurrency: 1 })
   async onVideoConversion(job: Job<IAssetJob>) {
     await this.mediaService.handleVideoConversion(job.data);
   }

+ 2 - 1
server/immich-openapi-specs.json

@@ -4679,7 +4679,8 @@
             "enum": [
               "all",
               "optimal",
-              "required"
+              "required",
+              "disabled"
             ]
           }
         },

+ 14 - 1
server/libs/domain/src/media/media.repository.ts

@@ -14,8 +14,21 @@ export interface VideoStreamInfo {
   frameCount: number;
 }
 
+export interface AudioStreamInfo {
+  codecName?: string;
+  codecType?: string;
+}
+
+export interface VideoFormat {
+  formatName?: string;
+  formatLongName?: string;
+  duration: number;
+}
+
 export interface VideoInfo {
-  streams: VideoStreamInfo[];
+  format: VideoFormat;
+  videoStreams: VideoStreamInfo[];
+  audioStreams: AudioStreamInfo[];
 }
 
 export interface IMediaRepository {

+ 38 - 5
server/libs/domain/src/media/media.service.spec.ts

@@ -222,7 +222,7 @@ describe(MediaService.name, () => {
     });
 
     it('should transcode the longest stream', async () => {
-      mediaMock.probe.mockResolvedValue(probeStub.multiple);
+      mediaMock.probe.mockResolvedValue(probeStub.multipleVideoStreams);
 
       await sut.handleVideoConversion({ asset: assetEntityStub.video });
 
@@ -237,7 +237,7 @@ describe(MediaService.name, () => {
     });
 
     it('should skip a video without any streams', async () => {
-      mediaMock.probe.mockResolvedValue(probeStub.empty);
+      mediaMock.probe.mockResolvedValue(probeStub.noVideoStreams);
       await sut.handleVideoConversion({ asset: assetEntityStub.video });
       expect(mediaMock.transcode).not.toHaveBeenCalled();
     });
@@ -249,7 +249,7 @@ describe(MediaService.name, () => {
     });
 
     it('should transcode when set to all', async () => {
-      mediaMock.probe.mockResolvedValue(probeStub.multiple);
+      mediaMock.probe.mockResolvedValue(probeStub.multipleVideoStreams);
       configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'all' }]);
       await sut.handleVideoConversion({ asset: assetEntityStub.video });
       expect(mediaMock.transcode).toHaveBeenCalledWith(
@@ -260,7 +260,40 @@ describe(MediaService.name, () => {
     });
 
     it('should transcode when optimal and too big', async () => {
-      mediaMock.probe.mockResolvedValue(probeStub.tooBig);
+      mediaMock.probe.mockResolvedValue(probeStub.videoStream2160p);
+      configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'optimal' }]);
+      await sut.handleVideoConversion({ asset: assetEntityStub.video });
+      expect(mediaMock.transcode).toHaveBeenCalledWith(
+        '/original/path.ext',
+        'upload/encoded-video/user-id/asset-id.mp4',
+        ['-crf 23', '-preset ultrafast', '-vcodec h264', '-acodec aac', '-movflags faststart', '-vf scale=-2:720'],
+      );
+    });
+
+    it('should transcode with alternate scaling video is vertical', async () => {
+      mediaMock.probe.mockResolvedValue(probeStub.videoStreamVertical2160p);
+      configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'optimal' }]);
+      await sut.handleVideoConversion({ asset: assetEntityStub.video });
+      expect(mediaMock.transcode).toHaveBeenCalledWith(
+        '/original/path.ext',
+        'upload/encoded-video/user-id/asset-id.mp4',
+        ['-crf 23', '-preset ultrafast', '-vcodec h264', '-acodec aac', '-movflags faststart', '-vf scale=720:-2'],
+      );
+    });
+
+    it('should transcode when audio doesnt match target', async () => {
+      mediaMock.probe.mockResolvedValue(probeStub.audioStreamMp3);
+      configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'optimal' }]);
+      await sut.handleVideoConversion({ asset: assetEntityStub.video });
+      expect(mediaMock.transcode).toHaveBeenCalledWith(
+        '/original/path.ext',
+        'upload/encoded-video/user-id/asset-id.mp4',
+        ['-crf 23', '-preset ultrafast', '-vcodec h264', '-acodec aac', '-movflags faststart', '-vf scale=-2:720'],
+      );
+    });
+
+    it('should transcode when container doesnt match target', async () => {
+      mediaMock.probe.mockResolvedValue(probeStub.matroskaContainer);
       configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'optimal' }]);
       await sut.handleVideoConversion({ asset: assetEntityStub.video });
       expect(mediaMock.transcode).toHaveBeenCalledWith(
@@ -271,7 +304,7 @@ describe(MediaService.name, () => {
     });
 
     it('should not transcode an invalid transcode value', async () => {
-      mediaMock.probe.mockResolvedValue(probeStub.tooBig);
+      mediaMock.probe.mockResolvedValue(probeStub.videoStream2160p);
       configMock.load.mockResolvedValue([{ key: SystemConfigKey.FFMPEG_TRANSCODE, value: 'invalid' }]);
       await sut.handleVideoConversion({ asset: assetEntityStub.video });
       expect(mediaMock.transcode).not.toHaveBeenCalled();

+ 37 - 19
server/libs/domain/src/media/media.service.ts

@@ -7,7 +7,7 @@ import { IAssetJob, IBaseJob, IJobRepository, JobName } from '../job';
 import { IStorageRepository, StorageCore, StorageFolder } from '../storage';
 import { ISystemConfigRepository, SystemConfigFFmpegDto } from '../system-config';
 import { SystemConfigCore } from '../system-config/system-config.core';
-import { IMediaRepository, VideoStreamInfo } from './media.repository';
+import { AudioStreamInfo, IMediaRepository, VideoStreamInfo } from './media.repository';
 
 @Injectable()
 export class MediaService {
@@ -127,23 +127,27 @@ export class MediaService {
       const output = join(outputFolder, `${asset.id}.mp4`);
       this.storageRepository.mkdirSync(outputFolder);
 
-      const { streams } = await this.mediaRepository.probe(input);
-      const stream = await this.getLongestStream(streams);
-      if (!stream) {
+      const { videoStreams, audioStreams, format } = await this.mediaRepository.probe(input);
+      const mainVideoStream = this.getMainVideoStream(videoStreams);
+      const mainAudioStream = this.getMainAudioStream(audioStreams);
+      const containerExtension = format.formatName;
+      if (!mainVideoStream || !mainAudioStream || !containerExtension) {
         return;
       }
 
       const { ffmpeg: config } = await this.configCore.getConfig();
 
-      const required = this.isTranscodeRequired(stream, config);
+      const required = this.isTranscodeRequired(mainVideoStream, mainAudioStream, containerExtension, config);
       if (!required) {
         return;
       }
 
-      const options = this.getFfmpegOptions(stream, config);
+      const options = this.getFfmpegOptions(mainVideoStream, config);
+
+      this.logger.log(`Start encoding video ${asset.id} ${options}`);
       await this.mediaRepository.transcode(input, output, options);
 
-      this.logger.log(`Converting Success ${asset.id}`);
+      this.logger.log(`Encoding success ${asset.id}`);
 
       await this.assetRepository.save({ id: asset.id, encodedVideoPath: output });
     } catch (error: any) {
@@ -151,32 +155,48 @@ export class MediaService {
     }
   }
 
-  private getLongestStream(streams: VideoStreamInfo[]): VideoStreamInfo | null {
-    return streams
-      .filter((stream) => stream.codecType === 'video')
-      .sort((stream1, stream2) => stream2.frameCount - stream1.frameCount)[0];
+  private getMainVideoStream(streams: VideoStreamInfo[]): VideoStreamInfo | null {
+    return streams.sort((stream1, stream2) => stream2.frameCount - stream1.frameCount)[0];
+  }
+
+  private getMainAudioStream(streams: AudioStreamInfo[]): AudioStreamInfo | null {
+    return streams[0];
   }
 
-  private isTranscodeRequired(stream: VideoStreamInfo, ffmpegConfig: SystemConfigFFmpegDto): boolean {
-    if (!stream.height || !stream.width) {
+  private isTranscodeRequired(
+    videoStream: VideoStreamInfo,
+    audioStream: AudioStreamInfo,
+    containerExtension: string,
+    ffmpegConfig: SystemConfigFFmpegDto,
+  ): boolean {
+    if (!videoStream.height || !videoStream.width) {
       this.logger.error('Skipping transcode, height or width undefined for video stream');
       return false;
     }
 
-    const isTargetVideoCodec = stream.codecName === ffmpegConfig.targetVideoCodec;
+    const isTargetVideoCodec = videoStream.codecName === ffmpegConfig.targetVideoCodec;
+    const isTargetAudioCodec = audioStream.codecName === ffmpegConfig.targetAudioCodec;
+    const isTargetContainer = ['mov,mp4,m4a,3gp,3g2,mj2', 'mp4', 'mov'].includes(containerExtension);
+
+    this.logger.debug(audioStream.codecName, audioStream.codecType, containerExtension);
+
+    const allTargetsMatching = isTargetVideoCodec && isTargetAudioCodec && isTargetContainer;
 
     const targetResolution = Number.parseInt(ffmpegConfig.targetResolution);
-    const isLargerThanTargetResolution = Math.min(stream.height, stream.width) > targetResolution;
+    const isLargerThanTargetResolution = Math.min(videoStream.height, videoStream.width) > targetResolution;
 
     switch (ffmpegConfig.transcode) {
+      case TranscodePreset.DISABLED:
+        return false;
+
       case TranscodePreset.ALL:
         return true;
 
       case TranscodePreset.REQUIRED:
-        return !isTargetVideoCodec;
+        return !allTargetsMatching;
 
       case TranscodePreset.OPTIMAL:
-        return !isTargetVideoCodec || isLargerThanTargetResolution;
+        return !allTargetsMatching || isLargerThanTargetResolution;
 
       default:
         return false;
@@ -184,8 +204,6 @@ export class MediaService {
   }
 
   private getFfmpegOptions(stream: VideoStreamInfo, ffmpeg: SystemConfigFFmpegDto) {
-    // TODO: If video or audio are already the correct format, don't re-encode, copy the stream
-
     const options = [
       `-crf ${ffmpeg.crf}`,
       `-preset ${ffmpeg.preset}`,

+ 57 - 8
server/libs/domain/test/fixtures.ts

@@ -13,12 +13,15 @@ import {
 import {
   AlbumResponseDto,
   AssetResponseDto,
+  AudioStreamInfo,
   AuthUserDto,
   ExifResponseDto,
   mapUser,
   SearchResult,
   SharedLinkResponseDto,
+  VideoFormat,
   VideoInfo,
+  VideoStreamInfo,
 } from '../src';
 
 const today = new Date();
@@ -706,10 +709,29 @@ export const searchStub = {
   }),
 };
 
+const probeStubDefaultFormat: VideoFormat = {
+  formatName: 'mov,mp4,m4a,3gp,3g2,mj2',
+  formatLongName: 'QuickTime / MOV',
+  duration: 0,
+};
+
+const probeStubDefaultVideoStream: VideoStreamInfo[] = [
+  { height: 1080, width: 1920, codecName: 'h265', codecType: 'video', frameCount: 100, rotation: 0 },
+];
+
+const probeStubDefaultAudioStream: AudioStreamInfo[] = [{ codecName: 'aac', codecType: 'audio' }];
+
+const probeStubDefault: VideoInfo = {
+  format: probeStubDefaultFormat,
+  videoStreams: probeStubDefaultVideoStream,
+  audioStreams: probeStubDefaultAudioStream,
+};
+
 export const probeStub = {
-  empty: { streams: [] },
-  multiple: Object.freeze<VideoInfo>({
-    streams: [
+  noVideoStreams: Object.freeze<VideoInfo>({ ...probeStubDefault, videoStreams: [] }),
+  multipleVideoStreams: Object.freeze<VideoInfo>({
+    ...probeStubDefault,
+    videoStreams: [
       {
         height: 1080,
         width: 400,
@@ -729,7 +751,8 @@ export const probeStub = {
     ],
   }),
   noHeight: Object.freeze<VideoInfo>({
-    streams: [
+    ...probeStubDefault,
+    videoStreams: [
       {
         height: 0,
         width: 400,
@@ -740,11 +763,12 @@ export const probeStub = {
       },
     ],
   }),
-  tooBig: Object.freeze<VideoInfo>({
-    streams: [
+  videoStream2160p: Object.freeze<VideoInfo>({
+    ...probeStubDefault,
+    videoStreams: [
       {
-        height: 10000,
-        width: 10000,
+        height: 2160,
+        width: 3840,
         codecName: 'h264',
         codecType: 'video',
         frameCount: 100,
@@ -752,4 +776,29 @@ export const probeStub = {
       },
     ],
   }),
+  videoStreamVertical2160p: Object.freeze<VideoInfo>({
+    ...probeStubDefault,
+    videoStreams: [
+      {
+        height: 2160,
+        width: 3840,
+        codecName: 'h264',
+        codecType: 'video',
+        frameCount: 100,
+        rotation: 90,
+      },
+    ],
+  }),
+  audioStreamMp3: Object.freeze<VideoInfo>({
+    ...probeStubDefault,
+    audioStreams: [{ codecType: 'audio', codecName: 'aac' }],
+  }),
+  matroskaContainer: Object.freeze<VideoInfo>({
+    ...probeStubDefault,
+    format: {
+      formatName: 'matroska,webm',
+      formatLongName: 'Matroska / WebM',
+      duration: 0,
+    },
+  }),
 };

+ 1 - 0
server/libs/infra/src/entities/system-config.entity.ts

@@ -37,6 +37,7 @@ export enum TranscodePreset {
   ALL = 'all',
   OPTIMAL = 'optimal',
   REQUIRED = 'required',
+  DISABLED = 'disabled',
 }
 
 export interface SystemConfig {

+ 22 - 9
server/libs/infra/src/repositories/media.repository.ts

@@ -50,20 +50,33 @@ export class MediaRepository implements IMediaRepository {
     const results = await probe(input);
 
     return {
-      streams: results.streams.map((stream) => ({
-        height: stream.height || 0,
-        width: stream.width || 0,
-        codecName: stream.codec_name,
-        codecType: stream.codec_type,
-        frameCount: Number.parseInt(stream.nb_frames ?? '0'),
-        rotation: Number.parseInt(`${stream.rotation ?? 0}`),
-      })),
+      format: {
+        formatName: results.format.format_name,
+        formatLongName: results.format.format_long_name,
+        duration: results.format.duration || 0,
+      },
+      videoStreams: results.streams
+        .filter((stream) => stream.codec_type === 'video')
+        .map((stream) => ({
+          height: stream.height || 0,
+          width: stream.width || 0,
+          codecName: stream.codec_name,
+          codecType: stream.codec_type,
+          frameCount: Number.parseInt(stream.nb_frames ?? '0'),
+          rotation: Number.parseInt(`${stream.rotation ?? 0}`),
+        })),
+      audioStreams: results.streams
+        .filter((stream) => stream.codec_type === 'audio')
+        .map((stream) => ({
+          codecType: stream.codec_type,
+          codecName: stream.codec_name,
+        })),
     };
   }
 
   transcode(input: string, output: string, options: string[]): Promise<void> {
     return new Promise((resolve, reject) => {
-      ffmpeg(input)
+      ffmpeg(input, { niceness: 10 })
         //
         .outputOptions(options)
         .output(output)

+ 2 - 1
web/src/api/open-api/api.ts

@@ -2046,7 +2046,8 @@ export interface SystemConfigFFmpegDto {
 export const SystemConfigFFmpegDtoTranscodeEnum = {
     All: 'all',
     Optimal: 'optimal',
-    Required: 'required'
+    Required: 'required',
+    Disabled: 'disabled'
 } as const;
 
 export type SystemConfigFFmpegDtoTranscodeEnum = typeof SystemConfigFFmpegDtoTranscodeEnum[keyof typeof SystemConfigFFmpegDtoTranscodeEnum];

+ 13 - 5
web/src/lib/components/admin-page/settings/ffmpeg/ffmpeg-settings.svelte

@@ -93,16 +93,20 @@
 						isEdited={!(ffmpegConfig.preset == savedConfig.preset)}
 					/>
 
-					<SettingInputField
-						inputType={SettingInputFieldType.TEXT}
-						label="AUDIO CODEC (-acodec)"
+					<SettingSelect
+						label="AUDIO CODEC"
 						bind:value={ffmpegConfig.targetAudioCodec}
-						required={true}
+						options={[
+							{ value: 'aac', text: 'aac' },
+							{ value: 'mp3', text: 'mp3' },
+							{ value: 'opus', text: 'opus' }
+						]}
+						name="acodec"
 						isEdited={!(ffmpegConfig.targetAudioCodec == savedConfig.targetAudioCodec)}
 					/>
 
 					<SettingSelect
-						label="VIDEO CODEC (-vcodec)"
+						label="VIDEO CODEC"
 						bind:value={ffmpegConfig.targetVideoCodec}
 						options={[
 							{ value: 'h264', text: 'h264' },
@@ -140,6 +144,10 @@
 							{
 								value: SystemConfigFFmpegDtoTranscodeEnum.Required,
 								text: 'Only videos not in the desired format'
+							},
+							{
+								value: SystemConfigFFmpegDtoTranscodeEnum.Disabled,
+								text: "Don't transcode any videos, may break playback on some clients"
 							}
 						]}
 						isEdited={!(ffmpegConfig.transcode == savedConfig.transcode)}