Prechádzať zdrojové kódy

LibVideo: Scaffold LibVideo and implement simplistic Matroska parser

This commit initializes the LibVideo library and implements parsing
basic Matroska container files. Currently, it will only parse audio
and video tracks.
FalseHonesty 4 rokov pred
rodič
commit
403bb07443

+ 8 - 0
AK/Debug.h.in

@@ -258,6 +258,14 @@
 #cmakedefine01 MARKDOWN_DEBUG
 #endif
 
+#ifndef MATROSKA_DEBUG
+#cmakedefine01 MATROSKA_DEBUG
+#endif
+
+#ifndef MATROSKA_TRACE_DEBUG
+#cmakedefine01 MATROSKA_TRACE_DEBUG
+#endif
+
 #ifndef MEMORY_DEBUG
 #cmakedefine01 MEMORY_DEBUG
 #endif

+ 2 - 0
Meta/CMake/all_the_debug_macros.cmake

@@ -101,6 +101,8 @@ set(LOCK_TRACE_DEBUG ON)
 set(LOOKUPSERVER_DEBUG ON)
 set(MALLOC_DEBUG ON)
 set(MARKDOWN_DEBUG ON)
+set(MATROSKA_DEBUG ON)
+set(MATROSKA_TRACE_DEBUG ON)
 set(MASTERPTY_DEBUG ON)
 set(MBR_DEBUG ON)
 set(MEMORY_DEBUG ON)

+ 1 - 0
Userland/Libraries/CMakeLists.txt

@@ -41,6 +41,7 @@ add_subdirectory(LibTextCodec)
 add_subdirectory(LibThreading)
 add_subdirectory(LibTLS)
 add_subdirectory(LibTTF)
+add_subdirectory(LibVideo)
 add_subdirectory(LibVT)
 add_subdirectory(LibWasm)
 add_subdirectory(LibWeb)

+ 7 - 0
Userland/Libraries/LibVideo/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(SOURCES
+    MatroskaDocument.h
+    MatroskaReader.cpp
+)
+
+serenity_lib(LibVideo video)
+target_link_libraries(LibVideo LibAudio LibCore LibIPC)

+ 195 - 0
Userland/Libraries/LibVideo/MatroskaDocument.h

@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+#include <AK/FlyString.h>
+#include <AK/HashMap.h>
+#include <AK/NonnullOwnPtrVector.h>
+#include <AK/OwnPtr.h>
+#include <AK/String.h>
+#include <AK/Utf8View.h>
+
+namespace Video {
+
+struct EBMLHeader {
+    String doc_type;
+    u32 doc_type_version;
+};
+
+class SegmentInformation {
+public:
+    u64 timestamp_scale() const { return m_timestamp_scale; }
+    void set_timestamp_scale(u64 timestamp_scale) { m_timestamp_scale = timestamp_scale; }
+    Utf8View muxing_app() const { return Utf8View(m_muxing_app); }
+    void set_muxing_app(String muxing_app) { m_muxing_app = move(muxing_app); }
+    Utf8View writing_app() const { return Utf8View(m_writing_app); }
+    void set_writing_app(String writing_app) { m_writing_app = move(writing_app); }
+
+private:
+    u64 m_timestamp_scale { 1'000'000 };
+    String m_muxing_app;
+    String m_writing_app;
+};
+
+class TrackEntry {
+public:
+    enum TrackType : u8 {
+        Invalid = 0,
+        Video = 1,
+        Audio = 2,
+        Complex = 3,
+        Logo = 16,
+        Subtitle = 17,
+        Buttons = 18,
+        Control = 32,
+        Metadata = 33,
+    };
+
+    struct VideoTrack {
+        u64 pixel_width;
+        u64 pixel_height;
+    };
+
+    struct AudioTrack {
+        u64 channels;
+        u64 bit_depth;
+    };
+
+    u64 track_number() const { return m_track_number; }
+    void set_track_number(u64 track_number) { m_track_number = track_number; }
+    u64 track_uid() const { return m_track_uid; }
+    void set_track_uid(u64 track_uid) { m_track_uid = track_uid; }
+    TrackType track_type() const { return m_track_type; }
+    void set_track_type(TrackType track_type) { m_track_type = track_type; }
+    FlyString language() const { return m_language; }
+    void set_language(const FlyString& language) { m_language = language; }
+    FlyString codec_id() const { return m_codec_id; }
+    void set_codec_id(const FlyString& codec_id) { m_codec_id = codec_id; }
+    Optional<VideoTrack> video_track() const
+    {
+        if (track_type() != Video)
+            return {};
+        return m_video_track;
+    }
+    void set_video_track(VideoTrack video_track) { m_video_track = video_track; }
+    Optional<AudioTrack> audio_track() const
+    {
+        if (track_type() != Audio)
+            return {};
+        return m_audio_track;
+    }
+    void set_audio_track(AudioTrack audio_track) { m_audio_track = audio_track; }
+
+private:
+    u64 m_track_number { 0 };
+    u64 m_track_uid { 0 };
+    TrackType m_track_type { Invalid };
+    FlyString m_language = "eng";
+    FlyString m_codec_id;
+
+    union {
+        VideoTrack m_video_track;
+        AudioTrack m_audio_track;
+    };
+};
+
+class Block {
+public:
+    enum Lacing : u8 {
+        None = 0b00,
+        XIPH = 0b01,
+        FixedSize = 0b10,
+        EBML = 0b11,
+    };
+
+    Block() = default;
+
+    u64 track_number() const { return m_track_number; }
+    void set_track_number(u64 track_number) { m_track_number = track_number; }
+    i16 timestamp() const { return m_timestamp; }
+    void set_timestamp(i16 timestamp) { m_timestamp = timestamp; }
+    bool only_keyframes() const { return m_only_keyframes; }
+    void set_only_keyframes(bool only_keyframes) { m_only_keyframes = only_keyframes; }
+    bool invisible() const { return m_invisible; }
+    void set_invisible(bool invisible) { m_invisible = invisible; }
+    Lacing lacing() const { return m_lacing; }
+    void set_lacing(Lacing lacing) { m_lacing = lacing; }
+    bool discardable() const { return m_discardable; }
+    void set_discardable(bool discardable) { m_discardable = discardable; }
+    u64 frame_count() const { return m_frames.size(); }
+    const ByteBuffer& frame(size_t index) const { return m_frames.at(index); }
+    void add_frame(const ByteBuffer& frame) { m_frames.append(move(frame)); }
+
+private:
+    u64 m_track_number { 0 };
+    i16 m_timestamp { 0 };
+    bool m_only_keyframes { false };
+    bool m_invisible { false };
+    Lacing m_lacing { None };
+    bool m_discardable { true };
+    Vector<ByteBuffer> m_frames;
+};
+
+class Cluster {
+public:
+    u64 timestamp() const { return m_timestamp; }
+    void set_timestamp(u64 timestamp) { m_timestamp = timestamp; }
+    NonnullOwnPtrVector<Block>& blocks() { return m_blocks; }
+    const NonnullOwnPtrVector<Block>& blocks() const { return m_blocks; }
+
+private:
+    u64 m_timestamp { 0 };
+    NonnullOwnPtrVector<Block> m_blocks;
+};
+
+class MatroskaDocument {
+public:
+    explicit MatroskaDocument(EBMLHeader m_header)
+        : m_header(move(m_header))
+    {
+    }
+
+    const EBMLHeader& header() const { return m_header; }
+
+    Optional<SegmentInformation> segment_information() const
+    {
+        if (!m_segment_information)
+            return {};
+        return *m_segment_information;
+    }
+    void set_segment_information(OwnPtr<SegmentInformation> segment_information) { m_segment_information = move(segment_information); }
+    const HashMap<u64, NonnullOwnPtr<TrackEntry>>& tracks() const { return m_tracks; }
+    Optional<TrackEntry> track_for_track_number(u64 track_number) const
+    {
+        auto track = m_tracks.get(track_number);
+        if (!track.has_value())
+            return {};
+        return *track.value();
+    }
+    Optional<TrackEntry> track_for_track_type(TrackEntry::TrackType type) const
+    {
+        for (auto& track_entry : m_tracks) {
+            if (track_entry.value->track_type() == type)
+                return *track_entry.value;
+        }
+        return {};
+    }
+    void add_track(u64 track_number, NonnullOwnPtr<TrackEntry> track)
+    {
+        m_tracks.set(track_number, move(track));
+    }
+    NonnullOwnPtrVector<Cluster>& clusters() { return m_clusters; }
+
+private:
+    EBMLHeader m_header;
+    OwnPtr<SegmentInformation> m_segment_information;
+    HashMap<u64, NonnullOwnPtr<TrackEntry>> m_tracks;
+    NonnullOwnPtrVector<Cluster> m_clusters;
+};
+
+}

+ 458 - 0
Userland/Libraries/LibVideo/MatroskaReader.cpp

@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "MatroskaReader.h"
+#include <AK/Function.h>
+#include <AK/MappedFile.h>
+#include <AK/Optional.h>
+#include <AK/Utf8View.h>
+
+namespace Video {
+
+#define CHECK_HAS_VALUE(x) \
+    if (!(x).has_value())  \
+    return false
+
+constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
+constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
+constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
+constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
+constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
+constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
+constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
+constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
+constexpr u32 MUXING_APP_ID = 0x4D80;
+constexpr u32 WRITING_APP_ID = 0x5741;
+constexpr u32 TRACK_ENTRY_ID = 0xAE;
+constexpr u32 TRACK_NUMBER_ID = 0xD7;
+constexpr u32 TRACK_UID_ID = 0x73C5;
+constexpr u32 TRACK_TYPE_ID = 0x83;
+constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
+constexpr u32 TRACK_CODEC_ID = 0x86;
+constexpr u32 TRACK_VIDEO_ID = 0xE0;
+constexpr u32 TRACK_AUDIO_ID = 0xE1;
+constexpr u32 PIXEL_WIDTH_ID = 0xB0;
+constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
+constexpr u32 CHANNELS_ID = 0x9F;
+constexpr u32 BIT_DEPTH_ID = 0x6264;
+constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
+constexpr u32 TIMESTAMP_ID = 0xE7;
+
+OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_file(const StringView& path)
+{
+    auto mapped_file_result = MappedFile::map(path);
+    if (mapped_file_result.is_error())
+        return {};
+
+    auto mapped_file = mapped_file_result.release_value();
+    return parse_matroska_from_data((u8*)mapped_file->data(), mapped_file->size());
+}
+
+OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_data(const u8* data, size_t size)
+{
+    MatroskaReader reader(data, size);
+    return reader.parse();
+}
+
+OwnPtr<MatroskaDocument> MatroskaReader::parse()
+{
+    auto first_element_id = m_streamer.read_variable_size_integer(false);
+    dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id.value());
+    if (!first_element_id.has_value() || first_element_id.value() != EBML_MASTER_ELEMENT_ID)
+        return {};
+
+    auto header = parse_ebml_header();
+    if (!header.has_value())
+        return {};
+
+    dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
+
+    auto root_element_id = m_streamer.read_variable_size_integer(false);
+    if (!root_element_id.has_value() || root_element_id.value() != SEGMENT_ELEMENT_ID)
+        return {};
+
+    auto matroska_document = make<MatroskaDocument>(header.value());
+
+    auto segment_parse_success = parse_segment_elements(*matroska_document);
+    if (!segment_parse_success)
+        return {};
+
+    return matroska_document;
+}
+
+bool MatroskaReader::parse_master_element([[maybe_unused]] const StringView& element_name, Function<bool(u64)> element_consumer)
+{
+    auto element_data_size = m_streamer.read_variable_size_integer();
+    CHECK_HAS_VALUE(element_data_size);
+    dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size.value());
+
+    m_streamer.push_octets_read();
+    while (m_streamer.octets_read() < element_data_size.value()) {
+        dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading  element ======");
+        auto optional_element_id = m_streamer.read_variable_size_integer(false);
+        CHECK_HAS_VALUE(optional_element_id);
+
+        auto element_id = optional_element_id.value();
+        dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}\n", element_name, element_id);
+
+        if (!element_consumer(element_id)) {
+            dbgln_if(MATROSKA_DEBUG, "{:s} consumer failed on ID {:#010x}\n", element_name.to_string().characters(), element_id);
+            return false;
+        }
+
+        dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", m_streamer.octets_read(), element_name);
+    }
+    m_streamer.pop_octets_read();
+
+    return true;
+}
+
+Optional<EBMLHeader> MatroskaReader::parse_ebml_header()
+{
+    EBMLHeader header;
+    auto success = parse_master_element("Header", [&](u64 element_id) {
+        if (element_id == DOCTYPE_ELEMENT_ID) {
+            auto doc_type = read_string_element();
+            CHECK_HAS_VALUE(doc_type);
+            header.doc_type = doc_type.value();
+            dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", doc_type.value());
+        } else if (element_id == DOCTYPE_VERSION_ELEMENT_ID) {
+            auto doc_type_version = read_u64_element();
+            CHECK_HAS_VALUE(doc_type_version);
+            header.doc_type_version = doc_type_version.value();
+            dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", doc_type_version.value());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return header;
+}
+
+bool MatroskaReader::parse_segment_elements(MatroskaDocument& matroska_document)
+{
+    dbgln_if(MATROSKA_DEBUG, "Parsing segment elements");
+    auto success = parse_master_element("Segment", [&](u64 element_id) {
+        if (element_id == SEGMENT_INFORMATION_ELEMENT_ID) {
+            auto segment_information = parse_information();
+            if (!segment_information)
+                return false;
+            matroska_document.set_segment_information(move(segment_information));
+        } else if (element_id == TRACK_ELEMENT_ID) {
+            return parse_tracks(matroska_document);
+        } else if (element_id == CLUSTER_ELEMENT_ID) {
+            auto cluster = parse_cluster();
+            if (!cluster)
+                return false;
+            matroska_document.clusters().append(cluster.release_nonnull());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    dbgln("Success {}", success);
+    return success;
+}
+
+OwnPtr<SegmentInformation> MatroskaReader::parse_information()
+{
+    auto segment_information = make<SegmentInformation>();
+    auto success = parse_master_element("Segment Information", [&](u64 element_id) {
+        if (element_id == TIMESTAMP_SCALE_ID) {
+            auto timestamp_scale = read_u64_element();
+            CHECK_HAS_VALUE(timestamp_scale);
+            segment_information->set_timestamp_scale(timestamp_scale.value());
+            dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", timestamp_scale.value());
+        } else if (element_id == MUXING_APP_ID) {
+            auto muxing_app = read_string_element();
+            CHECK_HAS_VALUE(muxing_app);
+            segment_information->set_muxing_app(muxing_app.value());
+            dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", muxing_app.value());
+        } else if (element_id == WRITING_APP_ID) {
+            auto writing_app = read_string_element();
+            CHECK_HAS_VALUE(writing_app);
+            segment_information->set_writing_app(writing_app.value());
+            dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", writing_app.value());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return segment_information;
+}
+
+bool MatroskaReader::parse_tracks(MatroskaDocument& matroska_document)
+{
+    auto success = parse_master_element("Tracks", [&](u64 element_id) {
+        if (element_id == TRACK_ENTRY_ID) {
+            dbgln_if(MATROSKA_DEBUG, "Parsing track");
+            auto track_entry = parse_track_entry();
+            if (!track_entry)
+                return false;
+            auto track_number = track_entry->track_number();
+            matroska_document.add_track(track_number, track_entry.release_nonnull());
+            dbgln_if(MATROSKA_DEBUG, "Track {} added to document", track_number);
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    return success;
+}
+
+OwnPtr<TrackEntry> MatroskaReader::parse_track_entry()
+{
+    auto track_entry = make<TrackEntry>();
+    auto success = parse_master_element("Track", [&](u64 element_id) {
+        if (element_id == TRACK_NUMBER_ID) {
+            auto track_number = read_u64_element();
+            CHECK_HAS_VALUE(track_number);
+            track_entry->set_track_number(track_number.value());
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_number.value());
+        } else if (element_id == TRACK_UID_ID) {
+            auto track_uid = read_u64_element();
+            CHECK_HAS_VALUE(track_uid);
+            track_entry->set_track_uid(track_uid.value());
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_uid.value());
+        } else if (element_id == TRACK_TYPE_ID) {
+            auto track_type = read_u64_element();
+            CHECK_HAS_VALUE(track_type);
+            track_entry->set_track_type(static_cast<TrackEntry::TrackType>(track_type.value()));
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", track_type.value());
+        } else if (element_id == TRACK_LANGUAGE_ID) {
+            auto language = read_string_element();
+            CHECK_HAS_VALUE(language);
+            track_entry->set_language(language.value());
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", language.value());
+        } else if (element_id == TRACK_CODEC_ID) {
+            auto codec_id = read_string_element();
+            CHECK_HAS_VALUE(codec_id);
+            track_entry->set_codec_id(codec_id.value());
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", codec_id.value());
+        } else if (element_id == TRACK_VIDEO_ID) {
+            auto video_track = parse_video_track_information();
+            CHECK_HAS_VALUE(video_track);
+            track_entry->set_video_track(video_track.value());
+        } else if (element_id == TRACK_AUDIO_ID) {
+            auto audio_track = parse_audio_track_information();
+            CHECK_HAS_VALUE(audio_track);
+            track_entry->set_audio_track(audio_track.value());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return track_entry;
+}
+
+Optional<TrackEntry::VideoTrack> MatroskaReader::parse_video_track_information()
+{
+    TrackEntry::VideoTrack video_track {};
+
+    auto success = parse_master_element("VideoTrack", [&](u64 element_id) {
+        if (element_id == PIXEL_WIDTH_ID) {
+            auto pixel_width = read_u64_element();
+            CHECK_HAS_VALUE(pixel_width);
+            video_track.pixel_width = pixel_width.value();
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", pixel_width.value());
+        } else if (element_id == PIXEL_HEIGHT_ID) {
+            auto pixel_height = read_u64_element();
+            CHECK_HAS_VALUE(pixel_height);
+            video_track.pixel_height = pixel_height.value();
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", pixel_height.value());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return video_track;
+}
+
+Optional<TrackEntry::AudioTrack> MatroskaReader::parse_audio_track_information()
+{
+    TrackEntry::AudioTrack audio_track {};
+
+    auto success = parse_master_element("AudioTrack", [&](u64 element_id) {
+        if (element_id == CHANNELS_ID) {
+            auto channels = read_u64_element();
+            CHECK_HAS_VALUE(channels);
+            audio_track.channels = channels.value();
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", channels.value());
+        } else if (element_id == BIT_DEPTH_ID) {
+            auto bit_depth = read_u64_element();
+            CHECK_HAS_VALUE(bit_depth);
+            audio_track.bit_depth = bit_depth.value();
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", bit_depth.value());
+        } else {
+            return read_unknown_element();
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return audio_track;
+}
+
+OwnPtr<Cluster> MatroskaReader::parse_cluster()
+{
+    auto cluster = make<Cluster>();
+
+    auto success = parse_master_element("Cluster", [&](u64 element_id) {
+        if (element_id == SIMPLE_BLOCK_ID) {
+            auto simple_block = parse_simple_block();
+            if (!simple_block)
+                return false;
+            cluster->blocks().append(simple_block.release_nonnull());
+        } else if (element_id == TIMESTAMP_ID) {
+            auto timestamp = read_u64_element();
+            if (!timestamp.has_value())
+                return false;
+            cluster->set_timestamp(timestamp.value());
+        } else {
+            auto success = read_unknown_element();
+            if (!success)
+                return false;
+        }
+
+        return true;
+    });
+
+    if (!success)
+        return {};
+    return cluster;
+}
+
+OwnPtr<Block> MatroskaReader::parse_simple_block()
+{
+    auto block = make<Block>();
+
+    auto content_size = m_streamer.read_variable_size_integer();
+    if (!content_size.has_value())
+        return {};
+
+    auto octets_read_before_track_number = m_streamer.octets_read();
+    auto track_number = m_streamer.read_variable_size_integer();
+    if (!track_number.has_value())
+        return {};
+    block->set_track_number(track_number.value());
+
+    if (m_streamer.remaining() < 3)
+        return {};
+    block->set_timestamp(m_streamer.read_i16());
+
+    auto flags = m_streamer.read_octet();
+    block->set_only_keyframes(flags & (1u << 7u));
+    block->set_invisible(flags & (1u << 3u));
+    block->set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
+    block->set_discardable(flags & 1u);
+
+    auto total_frame_content_size = content_size.value() - (m_streamer.octets_read() - octets_read_before_track_number);
+    if (block->lacing() == Block::Lacing::EBML) {
+        auto octets_read_before_frame_sizes = m_streamer.octets_read();
+        auto frame_count = m_streamer.read_octet() + 1;
+        Vector<u64> frame_sizes;
+        frame_sizes.ensure_capacity(frame_count);
+
+        u64 frame_size_sum = 0;
+        u64 previous_frame_size;
+        auto first_frame_size = m_streamer.read_variable_size_integer();
+        if (!first_frame_size.has_value())
+            return {};
+        frame_sizes.append(first_frame_size.value());
+        frame_size_sum += first_frame_size.value();
+        previous_frame_size = first_frame_size.value();
+
+        for (int i = 0; i < frame_count - 2; i++) {
+            auto frame_size_difference = m_streamer.read_variable_sized_signed_integer();
+            if (!frame_size_difference.has_value())
+                return {};
+            u64 frame_size;
+            if (frame_size_difference.value() < 0)
+                frame_size = previous_frame_size - (-frame_size_difference.value());
+            else
+                frame_size = previous_frame_size + frame_size_difference.value();
+            frame_sizes.append(frame_size);
+            frame_size_sum += frame_size;
+            previous_frame_size = frame_size;
+        }
+        frame_sizes.append(total_frame_content_size - frame_size_sum - (m_streamer.octets_read() - octets_read_before_frame_sizes));
+
+        for (int i = 0; i < frame_count; i++) {
+            auto current_frame_size = frame_sizes.at(i);
+            block->add_frame(ByteBuffer::copy(m_streamer.data(), current_frame_size));
+            m_streamer.drop_octets(current_frame_size);
+        }
+    } else if (block->lacing() == Block::Lacing::FixedSize) {
+        auto frame_count = m_streamer.read_octet() + 1;
+        auto individual_frame_size = total_frame_content_size / frame_count;
+        for (int i = 0; i < frame_count; i++) {
+            block->add_frame(ByteBuffer::copy(m_streamer.data(), individual_frame_size));
+            m_streamer.drop_octets(individual_frame_size);
+        }
+    } else {
+        block->add_frame(ByteBuffer::copy(m_streamer.data(), total_frame_content_size));
+        m_streamer.drop_octets(total_frame_content_size);
+    }
+    return block;
+}
+
+Optional<String> MatroskaReader::read_string_element()
+{
+    auto string_length = m_streamer.read_variable_size_integer();
+    if (!string_length.has_value() || m_streamer.remaining() < string_length.value())
+        return {};
+    auto string_value = String(m_streamer.data_as_chars(), string_length.value());
+    m_streamer.drop_octets(string_length.value());
+    return string_value;
+}
+
+Optional<u64> MatroskaReader::read_u64_element()
+{
+    auto integer_length = m_streamer.read_variable_size_integer();
+    if (!integer_length.has_value() || m_streamer.remaining() < integer_length.value())
+        return {};
+    u64 result = 0;
+    for (size_t i = 0; i < integer_length.value(); i++) {
+        if (!m_streamer.has_octet())
+            return {};
+        result = (result << 8u) + m_streamer.read_octet();
+    }
+    return result;
+}
+
+bool MatroskaReader::read_unknown_element()
+{
+    auto element_length = m_streamer.read_variable_size_integer();
+    if (!element_length.has_value() || m_streamer.remaining() < element_length.value())
+        return false;
+
+    m_streamer.drop_octets(element_length.value());
+    return true;
+}
+
+}

+ 169 - 0
Userland/Libraries/LibVideo/MatroskaReader.h

@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include "MatroskaDocument.h"
+#include <AK/Debug.h>
+#include <AK/NonnullOwnPtrVector.h>
+#include <AK/Optional.h>
+#include <AK/OwnPtr.h>
+#include <math.h>
+
+namespace Video {
+
+class MatroskaReader {
+public:
+    MatroskaReader(const u8* data, size_t size)
+        : m_streamer(data, size)
+    {
+    }
+
+    static OwnPtr<MatroskaDocument> parse_matroska_from_file(const StringView& path);
+    static OwnPtr<MatroskaDocument> parse_matroska_from_data(const u8*, size_t);
+
+    OwnPtr<MatroskaDocument> parse();
+
+private:
+    class Streamer {
+    public:
+        Streamer(const u8* data, size_t size)
+            : m_data_ptr(data)
+            , m_size_remaining(size)
+        {
+        }
+
+        const u8* data() { return m_data_ptr; }
+
+        const char* data_as_chars() { return reinterpret_cast<const char*>(m_data_ptr); }
+
+        u8 read_octet()
+        {
+            VERIFY(m_size_remaining >= 1);
+            m_size_remaining--;
+            m_octets_read.last()++;
+            return *(m_data_ptr++);
+        }
+
+        i16 read_i16()
+        {
+            return (read_octet() << 8) | read_octet();
+        }
+
+        size_t octets_read() { return m_octets_read.last(); }
+
+        void push_octets_read() { m_octets_read.append(0); }
+
+        void pop_octets_read()
+        {
+            auto popped = m_octets_read.take_last();
+            if (!m_octets_read.is_empty())
+                m_octets_read.last() += popped;
+        }
+
+        Optional<u64> read_variable_size_integer(bool mask_length = true)
+        {
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", m_data_ptr);
+            auto length_descriptor = read_octet();
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
+            if (length_descriptor == 0)
+                return {};
+            size_t length = 0;
+            while (length < 8) {
+                if (length_descriptor & (1u << (8 - length)))
+                    break;
+                length++;
+            }
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
+            if (length > 8)
+                return {};
+
+            u64 result;
+            if (mask_length)
+                result = length_descriptor & ~(1u << (8 - length));
+            else
+                result = length_descriptor;
+            dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
+            for (size_t i = 1; i < length; i++) {
+                if (!has_octet()) {
+                    dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
+                    return {};
+                }
+                u8 next_octet = read_octet();
+                dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
+                result = (result << 8u) | next_octet;
+                dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
+            }
+            return result;
+        }
+
+        Optional<i64> read_variable_sized_signed_integer()
+        {
+            auto length_descriptor = read_octet();
+            if (length_descriptor == 0)
+                return {};
+            size_t length = 0;
+            while (length < 8) {
+                if (length_descriptor & (1u << (8 - length)))
+                    break;
+                length++;
+            }
+            if (length > 8)
+                return {};
+
+            i64 result = length_descriptor & ~(1u << (8 - length));
+            for (size_t i = 1; i < length; i++) {
+                if (!has_octet()) {
+                    return {};
+                }
+                u8 next_octet = read_octet();
+                result = (result << 8u) | next_octet;
+            }
+            result -= pow(2, length * 7 - 1) - 1;
+            return result;
+        }
+
+        void drop_octets(size_t num_octets)
+        {
+            VERIFY(m_size_remaining >= num_octets);
+            m_size_remaining -= num_octets;
+            m_octets_read.last() += num_octets;
+            m_data_ptr += num_octets;
+        }
+
+        bool at_end() const { return !m_size_remaining; }
+        bool has_octet() const { return m_size_remaining >= 1; }
+
+        size_t remaining() const { return m_size_remaining; }
+        void set_remaining(size_t remaining) { m_size_remaining = remaining; }
+
+    private:
+        const u8* m_data_ptr { nullptr };
+        size_t m_size_remaining { 0 };
+        Vector<size_t> m_octets_read { 0 };
+    };
+
+    bool parse_master_element(const StringView& element_name, Function<bool(u64 element_id)> element_consumer);
+    Optional<EBMLHeader> parse_ebml_header();
+
+    bool parse_segment_elements(MatroskaDocument&);
+    OwnPtr<SegmentInformation> parse_information();
+
+    bool parse_tracks(MatroskaDocument&);
+    OwnPtr<TrackEntry> parse_track_entry();
+    Optional<TrackEntry::VideoTrack> parse_video_track_information();
+    Optional<TrackEntry::AudioTrack> parse_audio_track_information();
+    OwnPtr<Cluster> parse_cluster();
+    OwnPtr<Block> parse_simple_block();
+
+    Optional<String> read_string_element();
+    Optional<u64> read_u64_element();
+    bool read_unknown_element();
+
+    Streamer m_streamer;
+};
+
+}