123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795 |
- # Kudos to Werner Robitza, AVEQ GmbH, for helping with ffmpeg
- # related content
- import hashlib
- import json
- import os
- import random
- import shutil
- import subprocess
- import tempfile
- from fractions import Fraction
- import filetype
- from django.conf import settings
- CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
- CRF_ENCODING_NUM_SECONDS = 2 # 0 * 60 # videos with greater duration will get
- # CRF encoding and not two-pass
- # Encoding individual chunks may yield quality variations if you use a
- # too low bitrate, so if you go for the chunk-based variant
- # you should use CRF encoding.
- MAX_RATE_MULTIPLIER = 1.5
- MIN_RATE_MULTIPLIER = 0.5
- BUF_SIZE_MULTIPLIER = 1.5
- # in seconds, anything between 2 and 6 makes sense
- KEYFRAME_DISTANCE = 4
- KEYFRAME_DISTANCE_MIN = 2
- # speed presets
- # see https://trac.ffmpeg.org/wiki/Encode/H.264
- X26x_PRESET = "medium" # "medium"
- X265_PRESET = "medium"
- X26x_PRESET_BIG_HEIGHT = "faster"
- # VP9_SPEED = 1 # between 0 and 4, lower is slower
- VP9_SPEED = 2
- VIDEO_CRFS = {
- "h264_baseline": 23,
- "h264": 23,
- "h265": 28,
- "vp9": 32,
- }
- # video rates for 25 or 60 fps input, for different codecs, in kbps
- VIDEO_BITRATES = {
- "h264": {
- 25: {
- 240: 300,
- 360: 500,
- 480: 1000,
- 720: 2500,
- 1080: 4500,
- 1440: 9000,
- 2160: 18000,
- },
- 60: {720: 3500, 1080: 7500, 1440: 18000, 2160: 40000},
- },
- "h265": {
- 25: {
- 240: 150,
- 360: 275,
- 480: 500,
- 720: 1024,
- 1080: 1800,
- 1440: 4500,
- 2160: 10000,
- },
- 60: {720: 1800, 1080: 3000, 1440: 8000, 2160: 18000},
- },
- "vp9": {
- 25: {
- 240: 150,
- 360: 275,
- 480: 500,
- 720: 1024,
- 1080: 1800,
- 1440: 4500,
- 2160: 10000,
- },
- 60: {720: 1800, 1080: 3000, 1440: 8000, 2160: 18000},
- },
- }
- AUDIO_ENCODERS = {"h264": "aac", "h265": "aac", "vp9": "libopus"}
- AUDIO_BITRATES = {"h264": 128, "h265": 128, "vp9": 96}
- EXTENSIONS = {"h264": "mp4", "h265": "mp4", "vp9": "webm"}
- VIDEO_PROFILES = {"h264": "main", "h265": "main"}
- def get_portal_workflow():
- return settings.PORTAL_WORKFLOW
- def get_default_state(user=None):
- # possible states given the portal workflow setting
- state = "private"
- if settings.PORTAL_WORKFLOW == "public":
- state = "public"
- if settings.PORTAL_WORKFLOW == "unlisted":
- state = "unlisted"
- if settings.PORTAL_WORKFLOW == "private_verified":
- if user and user.advancedUser:
- state = "unlisted"
- return state
- def get_file_name(filename):
- return filename.split("/")[-1]
- def get_file_type(filename):
- if not os.path.exists(filename):
- return None
- file_type = None
- kind = filetype.guess(filename)
- if kind is not None:
- if kind.mime.startswith("video"):
- file_type = "video"
- elif kind.mime.startswith("image"):
- file_type = "image"
- elif kind.mime.startswith("audio"):
- file_type = "audio"
- elif "pdf" in kind.mime:
- file_type = "pdf"
- else:
- # TODO: do something for files not supported by filetype lib
- pass
- return file_type
- def rm_file(filename):
- if os.path.isfile(filename):
- try:
- os.remove(filename)
- return True
- except OSError:
- pass
- return False
- def rm_files(filenames):
- if isinstance(filenames, list):
- for filename in filenames:
- rm_file(filename)
- return True
- def rm_dir(directory):
- if os.path.isdir(directory):
- # refuse to delete a dir inside project BASE_DIR
- if directory.startswith(settings.BASE_DIR):
- try:
- shutil.rmtree(directory)
- return True
- except (FileNotFoundError, PermissionError):
- pass
- return False
- def url_from_path(filename):
- # TODO: find a way to preserver http - https ...
- return "{0}{1}".format(settings.MEDIA_URL, filename.replace(settings.MEDIA_ROOT, ""))
- def create_temp_file(suffix=None, dir=settings.TEMP_DIRECTORY):
- tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=dir)
- return tf.name
- def create_temp_dir(suffix=None, dir=settings.TEMP_DIRECTORY):
- td = tempfile.mkdtemp(dir=dir)
- return td
- def produce_friendly_token(token_len=settings.FRIENDLY_TOKEN_LEN):
- token = ""
- while len(token) != token_len:
- token += CHARS[random.randint(0, len(CHARS) - 1)]
- return token
- def clean_friendly_token(token):
- # cleans token
- for char in token:
- if char not in CHARS:
- token.replace(char, "")
- return token
- def mask_ip(ip_address):
- return hashlib.md5(ip_address.encode("utf-8")).hexdigest()
- def run_command(cmd, cwd=None):
- """
- Run a command directly
- """
- if isinstance(cmd, str):
- cmd = cmd.split()
- ret = {}
- if cwd:
- process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
- else:
- process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = process.communicate()
- # TODO: catch unicodedecodeerrors here...
- if process.returncode == 0:
- try:
- ret["out"] = stdout.decode("utf-8")
- except BaseException:
- ret["out"] = ""
- try:
- ret["error"] = stderr.decode("utf-8")
- except BaseException:
- ret["error"] = ""
- else:
- try:
- ret["error"] = stderr.decode("utf-8")
- except BaseException:
- ret["error"] = ""
- return ret
- def media_file_info(input_file):
- """
- Get the info about an input file, as determined by ffprobe
- Returns a dict, with the keys:
- - `filename`: Filename
- - `file_size`: Size of the file in bytes
- - `video_duration`: Duration of the video in `s.msec`
- - `video_frame_rate_d`: Framerate franction denominator
- - `video_frame_rate_n`: Framerate fraction nominator
- - `video_bitrate`: Bitrate of the video stream in kBit/s
- - `video_width`: Width in pixels
- - `video_height`: Height in pixels
- - `interlaced` : True if the video is interlaced
- - `video_codec`: Video codec
- - `audio_duration`: Duration of the audio in `s.msec`
- - `audio_sample_rate`: Audio sample rate in Hz
- - `audio_codec`: Audio codec name (`aac`)
- - `audio_bitrate`: Bitrate of the video stream in kBit/s
- Also returns the video and audio info raw from ffprobe.
- """
- ret = {}
- if not os.path.isfile(input_file):
- ret["fail"] = True
- return ret
- video_info = {}
- audio_info = {}
- cmd = ["stat", "-c", "%s", input_file]
- stdout = run_command(cmd).get("out")
- if stdout:
- file_size = int(stdout.strip())
- else:
- ret["fail"] = True
- return ret
- cmd = ["md5sum", input_file]
- stdout = run_command(cmd).get("out")
- if stdout:
- md5sum = stdout.split()[0]
- else:
- md5sum = ""
- cmd = [
- settings.FFPROBE_COMMAND,
- "-loglevel",
- "error",
- "-show_streams",
- "-show_entries",
- "format=format_name",
- "-of",
- "json",
- input_file,
- ]
- stdout = run_command(cmd).get("out")
- try:
- info = json.loads(stdout)
- except TypeError:
- ret["fail"] = True
- return ret
- has_video = False
- has_audio = False
- for stream_info in info["streams"]:
- if stream_info["codec_type"] == "video":
- video_info = stream_info
- has_video = True
- if info.get("format") and info["format"].get("format_name", "") in [
- "tty",
- "image2",
- "image2pipe",
- "bin",
- "png_pipe",
- "gif",
- ]:
- ret["fail"] = True
- return ret
- elif stream_info["codec_type"] == "audio":
- audio_info = stream_info
- has_audio = True
- if not has_video:
- ret["is_video"] = False
- ret["is_audio"] = has_audio
- ret["audio_info"] = audio_info
- return ret
- if "duration" in video_info.keys():
- video_duration = float(video_info["duration"])
- elif "tags" in video_info.keys() and "DURATION" in video_info["tags"]:
- duration_str = video_info["tags"]["DURATION"]
- try:
- hms, msec = duration_str.split(".")
- except ValueError:
- hms, msec = duration_str.split(",")
- total_dur = sum(int(x) * 60**i for i, x in enumerate(reversed(hms.split(":"))))
- video_duration = total_dur + float("0." + msec)
- else:
- # fallback to format, eg for webm
- cmd = [
- settings.FFPROBE_COMMAND,
- "-loglevel",
- "error",
- "-show_format",
- "-of",
- "json",
- input_file,
- ]
- stdout = run_command(cmd).get("out")
- format_info = json.loads(stdout)["format"]
- try:
- video_duration = float(format_info["duration"])
- except KeyError:
- ret["fail"] = True
- return ret
- if "bit_rate" in video_info.keys():
- video_bitrate = round(float(video_info["bit_rate"]) / 1024.0, 2)
- else:
- cmd = [
- settings.FFPROBE_COMMAND,
- "-loglevel",
- "error",
- "-select_streams",
- "v",
- "-show_entries",
- "packet=size",
- "-of",
- "compact=p=0:nk=1",
- input_file,
- ]
- stdout = run_command(cmd).get("out")
- stream_size = sum([int(line) for line in stdout.split("\n") if line != ""])
- video_bitrate = round((stream_size * 8 / 1024.0) / video_duration, 2)
- if "r_frame_rate" in video_info.keys():
- video_frame_rate = video_info["r_frame_rate"].partition("/")
- video_frame_rate_n = video_frame_rate[0]
- video_frame_rate_d = video_frame_rate[2]
- interlaced = False
- if video_info.get("field_order") in ("tt", "tb", "bt", "bb"):
- interlaced = True
- ret = {
- "filename": input_file,
- "file_size": file_size,
- "video_duration": video_duration,
- "video_frame_rate_n": video_frame_rate_n,
- "video_frame_rate_d": video_frame_rate_d,
- "video_bitrate": video_bitrate,
- "video_width": video_info["width"],
- "video_height": video_info["height"],
- "video_codec": video_info["codec_name"],
- "has_video": has_video,
- "has_audio": has_audio,
- "color_range": video_info.get("color_range"),
- "color_space": video_info.get("color_space"),
- "color_transfer": video_info.get("color_space"),
- "color_primaries": video_info.get("color_primaries"),
- "interlaced": interlaced,
- "display_aspect_ratio": video_info.get("display_aspect_ratio"),
- "sample_aspect_ratio": video_info.get("sample_aspect_ratio"),
- }
- if has_audio:
- if "duration" in audio_info.keys():
- audio_duration = float(audio_info["duration"])
- elif "tags" in audio_info.keys() and "DURATION" in audio_info["tags"]:
- duration_str = audio_info["tags"]["DURATION"]
- try:
- hms, msec = duration_str.split(".")
- except ValueError:
- hms, msec = duration_str.split(",")
- total_dur = sum(int(x) * 60**i for i, x in enumerate(reversed(hms.split(":"))))
- audio_duration = total_dur + float("0." + msec)
- else:
- # fallback to format, eg for webm
- cmd = [
- settings.FFPROBE_COMMAND,
- "-loglevel",
- "error",
- "-show_format",
- "-of",
- "json",
- input_file,
- ]
- stdout = run_command(cmd).get("out")
- format_info = json.loads(stdout)["format"]
- audio_duration = float(format_info["duration"])
- if "bit_rate" in audio_info.keys():
- audio_bitrate = round(float(audio_info["bit_rate"]) / 1024.0, 2)
- else:
- # fall back to calculating from accumulated frame duration
- cmd = [
- settings.FFPROBE_COMMAND,
- "-loglevel",
- "error",
- "-select_streams",
- "a",
- "-show_entries",
- "packet=size",
- "-of",
- "compact=p=0:nk=1",
- input_file,
- ]
- stdout = run_command(cmd).get("out")
- # ffprobe appends a pipe at the end of the output, thus we have to remove it
- stream_size = sum([int(line.replace("|", "")) for line in stdout.split("\n") if line != ""])
- audio_bitrate = round((stream_size * 8 / 1024.0) / audio_duration, 2)
- ret.update(
- {
- "audio_duration": audio_duration,
- "audio_sample_rate": audio_info["sample_rate"],
- "audio_codec": audio_info["codec_name"],
- "audio_bitrate": audio_bitrate,
- "audio_channels": audio_info["channels"],
- }
- )
- ret["video_info"] = video_info
- ret["audio_info"] = audio_info
- ret["is_video"] = True
- ret["md5sum"] = md5sum
- return ret
- def calculate_seconds(duration):
- # returns seconds, given a ffmpeg extracted string
- ret = 0
- if isinstance(duration, str):
- duration = duration.split(":")
- if len(duration) != 3:
- return ret
- else:
- return ret
- ret += int(float(duration[2]))
- ret += int(float(duration[1])) * 60
- ret += int(float(duration[0])) * 60 * 60
- return ret
- def show_file_size(size):
- if size:
- size = size / 1000000
- size = round(size, 1)
- size = "{0}MB".format(str(size))
- return size
- def get_base_ffmpeg_command(
- input_file,
- output_file,
- has_audio,
- codec,
- encoder,
- audio_encoder,
- target_fps,
- interlaced,
- target_height,
- target_rate,
- target_rate_audio,
- pass_file,
- pass_number,
- enc_type,
- chunk,
- ):
- """Get the base command for a specific codec, height/rate, and pass
- Arguments:
- input_file {str} -- input file name
- output_file {str} -- output file name
- has_audio {bool} -- does the input have audio?
- codec {str} -- video codec
- encoder {str} -- video encoder
- audio_encoder {str} -- audio encoder
- target_fps {fractions.Fraction} -- target FPS
- interlaced {bool} -- true if interlaced
- target_height {int} -- height
- target_rate {int} -- target bitrate in kbps
- target_rate_audio {int} -- audio target bitrate
- pass_file {str} -- path to temp pass file
- pass_number {int} -- number of passes
- enc_type {str} -- encoding type (twopass or crf)
- """
- # avoid very high frame rates
- while target_fps > 60:
- target_fps = target_fps / 2
- if target_fps < 1:
- target_fps = 1
- filters = []
- if interlaced:
- filters.append("yadif")
- target_width = round(target_height * 16 / 9)
- scale_filter_opts = [
- f"if(lt(iw\\,ih)\\,{target_height}\\,{target_width})", # noqa
- f"if(lt(iw\\,ih)\\,{target_width}\\,{target_height})", # noqa
- "force_original_aspect_ratio=decrease",
- "force_divisible_by=2",
- "flags=lanczos",
- ]
- scale_filter_str = "scale=" + ":".join(scale_filter_opts)
- filters.append(scale_filter_str)
- fps_str = f"fps=fps={target_fps}"
- filters.append(fps_str)
- filters_str = ",".join(filters)
- base_cmd = [
- settings.FFMPEG_COMMAND,
- "-y",
- "-i",
- input_file,
- "-c:v",
- encoder,
- "-filter:v",
- filters_str,
- "-pix_fmt",
- "yuv420p",
- ]
- if enc_type == "twopass":
- base_cmd.extend(["-b:v", str(target_rate) + "k"])
- elif enc_type == "crf":
- base_cmd.extend(["-crf", str(VIDEO_CRFS[codec])])
- if encoder == "libvpx-vp9":
- base_cmd.extend(["-b:v", str(target_rate) + "k"])
- if has_audio:
- base_cmd.extend(
- [
- "-c:a",
- audio_encoder,
- "-b:a",
- str(target_rate_audio) + "k",
- # stereo audio only, see https://trac.ffmpeg.org/ticket/5718
- "-ac",
- "2",
- ]
- )
- # get keyframe distance in frames
- keyframe_distance = int(target_fps * KEYFRAME_DISTANCE)
- # start building the command
- cmd = base_cmd[:]
- # preset settings
- if encoder == "libvpx-vp9":
- if pass_number == 1:
- speed = 4
- else:
- speed = VP9_SPEED
- elif encoder in ["libx264"]:
- preset = X26x_PRESET
- elif encoder in ["libx265"]:
- preset = X265_PRESET
- if target_height >= 720:
- preset = X26x_PRESET_BIG_HEIGHT
- if encoder == "libx264":
- level = "4.2" if target_height <= 1080 else "5.2"
- x264_params = [
- "keyint=" + str(keyframe_distance * 2),
- "keyint_min=" + str(keyframe_distance),
- ]
- cmd.extend(
- [
- "-maxrate",
- str(int(int(target_rate) * MAX_RATE_MULTIPLIER)) + "k",
- "-bufsize",
- str(int(int(target_rate) * BUF_SIZE_MULTIPLIER)) + "k",
- "-force_key_frames",
- "expr:gte(t,n_forced*" + str(KEYFRAME_DISTANCE) + ")",
- "-x264-params",
- ":".join(x264_params),
- "-preset",
- preset,
- "-profile:v",
- VIDEO_PROFILES[codec],
- "-level",
- level,
- ]
- )
- if enc_type == "twopass":
- cmd.extend(["-passlogfile", pass_file, "-pass", pass_number])
- elif encoder == "libx265":
- x265_params = [
- "vbv-maxrate=" + str(int(int(target_rate) * MAX_RATE_MULTIPLIER)),
- "vbv-bufsize=" + str(int(int(target_rate) * BUF_SIZE_MULTIPLIER)),
- "keyint=" + str(keyframe_distance * 2),
- "keyint_min=" + str(keyframe_distance),
- ]
- if enc_type == "twopass":
- x265_params.extend(["stats=" + str(pass_file), "pass=" + str(pass_number)])
- cmd.extend(
- [
- "-force_key_frames",
- "expr:gte(t,n_forced*" + str(KEYFRAME_DISTANCE) + ")",
- "-x265-params",
- ":".join(x265_params),
- "-preset",
- preset,
- "-profile:v",
- VIDEO_PROFILES[codec],
- ]
- )
- elif encoder == "libvpx-vp9":
- cmd.extend(
- [
- "-g",
- str(keyframe_distance),
- "-keyint_min",
- str(keyframe_distance),
- "-maxrate",
- str(int(int(target_rate) * MAX_RATE_MULTIPLIER)) + "k",
- "-minrate",
- str(int(int(target_rate) * MIN_RATE_MULTIPLIER)) + "k",
- "-bufsize",
- str(int(int(target_rate) * BUF_SIZE_MULTIPLIER)) + "k",
- "-speed",
- speed,
- # '-deadline', 'realtime',
- ]
- )
- if enc_type == "twopass":
- cmd.extend(["-passlogfile", pass_file, "-pass", pass_number])
- cmd.extend(
- [
- "-strict",
- "-2",
- ]
- )
- # end of the command
- if pass_number == 1:
- cmd.extend(["-an", "-f", "null", "/dev/null"])
- elif pass_number == 2:
- if output_file.endswith("mp4") and chunk:
- cmd.extend(["-movflags", "+faststart"])
- cmd.extend([output_file])
- return cmd
- def produce_ffmpeg_commands(media_file, media_info, resolution, codec, output_filename, pass_file, chunk=False):
- try:
- media_info = json.loads(media_info)
- except BaseException:
- media_info = {}
- if codec == "h264":
- encoder = "libx264"
- # ext = "mp4"
- elif codec in ["h265", "hevc"]:
- encoder = "libx265"
- # ext = "mp4"
- elif codec == "vp9":
- encoder = "libvpx-vp9"
- # ext = "webm"
- else:
- return False
- target_fps = Fraction(int(media_info.get("video_frame_rate_n", 30)), int(media_info.get("video_frame_rate_d", 1)))
- if target_fps <= 30:
- target_rate = VIDEO_BITRATES[codec][25].get(resolution)
- else:
- target_rate = VIDEO_BITRATES[codec][60].get(resolution)
- if not target_rate: # INVESTIGATE MORE!
- target_rate = VIDEO_BITRATES[codec][25].get(resolution)
- if not target_rate:
- return False
- if media_info.get("video_height") < resolution:
- if resolution not in [240, 360]: # always get these two
- return False
- # if codec == "h264_baseline":
- # target_fps = 25
- # else:
- if media_info.get("video_duration") > CRF_ENCODING_NUM_SECONDS:
- enc_type = "crf"
- else:
- enc_type = "twopass"
- if enc_type == "twopass":
- passes = [1, 2]
- elif enc_type == "crf":
- passes = [2]
- interlaced = media_info.get("interlaced")
- cmds = []
- for pass_number in passes:
- cmds.append(
- get_base_ffmpeg_command(
- media_file,
- output_file=output_filename,
- has_audio=media_info.get("has_audio"),
- codec=codec,
- encoder=encoder,
- audio_encoder=AUDIO_ENCODERS[codec],
- target_fps=target_fps,
- interlaced=interlaced,
- target_height=resolution,
- target_rate=target_rate,
- target_rate_audio=AUDIO_BITRATES[codec],
- pass_file=pass_file,
- pass_number=pass_number,
- enc_type=enc_type,
- chunk=chunk,
- )
- )
- return cmds
- def clean_query(query):
- """This is used to clear text in order to comply with SearchQuery
- known exception cases
- :param query: str - the query text that we want to clean
- :return:
- """
- if not query:
- return ""
- chars = ["^", "{", "}", "&", "|", "<", ">", '"', ")", "(", "!", ":", ";", "'", "#"]
- for char in chars:
- query = query.replace(char, "")
- return query.lower()
- def get_alphanumeric_only(string):
- """Returns a query that contains only alphanumeric characters
- This include characters other than the English alphabet too
- """
- string = "".join([char for char in string if char.isalnum()])
- return string.lower()
|