Explorar o código

LibVideo: Add support for VP9 superframes

This allows the second shown frame of the VP9 test video to be decoded,
as the second chunk uses a superframe to encode a reference frame and
a second to inter predict between the keyframe and the reference frame.
Zaggy1024 %!s(int64=2) %!d(string=hai) anos
pai
achega
be0760871e

+ 1 - 1
Userland/Applications/VideoPlayer/main.cpp

@@ -59,7 +59,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
         if (!optional_sample.has_value())
             return;
 
-        auto result = vp9_decoder.decode_frame(optional_sample.release_value());
+        auto result = vp9_decoder.decode(optional_sample.release_value());
 
         if (result.is_error()) {
             outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());

+ 58 - 12
Userland/Libraries/LibVideo/VP9/Decoder.cpp

@@ -18,23 +18,74 @@ Decoder::Decoder()
 {
 }
 
-DecoderErrorOr<void> Decoder::decode_frame(ByteBuffer const& frame_data)
+DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
 {
-    TRY(m_parser->parse_frame(frame_data));
-    // TODO:
-    //  - #2
-    //  - #3
-    //  - #4
-    TRY(update_reference_frames());
+    auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
+
+    if (superframe_sizes.is_empty()) {
+        return decode_frame(chunk_data);
+    }
+
+    size_t offset = 0;
+
+    for (auto superframe_size : superframe_sizes) {
+        auto frame_data = chunk_data.slice(offset, superframe_size);
+        TRY(decode_frame(frame_data));
+        offset += superframe_size;
+    }
 
     return {};
 }
 
+DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
+{
+    return decode(chunk_data.span());
+}
+
 void Decoder::dump_frame_info()
 {
     m_parser->dump_info();
 }
 
+inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
+{
+    return row * stride + column;
+}
+
+DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
+{
+    // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
+    // tables include function calls indicating when the block decode processes should be triggered.
+    TRY(m_parser->parse_frame(frame_data));
+
+    // 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the
+    // coded frame has been decoded.
+    // FIXME: Implement loop filtering.
+
+    // 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
+    // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
+    // − show_existing_frame is equal to 0,
+    // − segmentation_enabled is equal to 1,
+    // − segmentation_update_map is equal to 1.
+    if (!m_parser->m_show_existing_frame && m_parser->m_segmentation_enabled && m_parser->m_segmentation_update_map) {
+        for (auto row = 0u; row < m_parser->m_mi_rows; row++) {
+            for (auto column = 0u; column < m_parser->m_mi_cols; column++) {
+                auto index = index_from_row_and_column(row, column, m_parser->m_mi_rows);
+                m_parser->m_prev_segment_ids[index] = m_parser->m_segment_ids[index];
+            }
+        }
+    }
+
+    // 4. The output process as specified in section 8.9 is invoked.
+    // FIXME: Create a struct to store an output frame along with all information needed to display
+    //        it. This function will need to append the images to a vector to ensure that if a superframe
+    //        with multiple output frames is encountered, all of them can be displayed.
+
+    // 5. The reference frame update process as specified in section 8.10 is invoked.
+    TRY(update_reference_frames());
+    return {};
+}
+
 inline size_t buffer_size(size_t width, size_t height)
 {
     return width * height;
@@ -215,11 +266,6 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2])
     return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
 }
 
-inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
-{
-    return row * stride + column;
-}
-
 DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
 {
     auto& frame_buffer = get_output_buffer(plane);

+ 6 - 1
Userland/Libraries/LibVideo/VP9/Decoder.h

@@ -9,6 +9,7 @@
 
 #include <AK/ByteBuffer.h>
 #include <AK/Error.h>
+#include <AK/Span.h>
 #include <LibVideo/DecoderError.h>
 
 #include "Parser.h"
@@ -20,7 +21,9 @@ class Decoder {
 
 public:
     Decoder();
-    DecoderErrorOr<void> decode_frame(ByteBuffer const&);
+    /* (8.1) General */
+    DecoderErrorOr<void> decode(Span<const u8>);
+    DecoderErrorOr<void> decode(ByteBuffer const&);
     void dump_frame_info();
 
     // FIXME: These functions should be replaced by a struct that contains
@@ -33,6 +36,8 @@ public:
 private:
     typedef i32 Intermediate;
 
+    DecoderErrorOr<void> decode_frame(Span<const u8>);
+
     DecoderErrorOr<void> allocate_buffers();
     Vector<Intermediate>& get_temp_buffer(u8 plane);
     Vector<u16>& get_output_buffer(u8 plane);

+ 48 - 1
Userland/Libraries/LibVideo/VP9/Parser.cpp

@@ -28,8 +28,55 @@ Parser::~Parser()
 {
 }
 
+Vector<size_t> Parser::parse_superframe_sizes(Span<const u8> frame_data)
+{
+    if (frame_data.size() < 1)
+        return {};
+
+    // The decoder determines the presence of a superframe by:
+    // 1. parsing the final byte of the chunk and checking that the superframe_marker equals 0b110,
+
+    // If the checks in steps 1 and 3 both pass, then the chunk is determined to contain a superframe and each
+    // frame in the superframe is passed to the decoding process in turn.
+    // Otherwise, the chunk is determined to not contain a superframe, and the whole chunk is passed to the
+    // decoding process.
+
+    // NOTE: Reading from span data will be quicker than spinning up a BitStream.
+    u8 superframe_byte = frame_data[frame_data.size() - 1];
+
+    // NOTE: We have to read out of the byte from the little end first, hence the padding bits in the masks below.
+    u8 superframe_marker = superframe_byte & 0b1110'0000;
+    if (superframe_marker == 0b1100'0000) {
+        u8 bytes_per_framesize = ((superframe_byte >> 3) & 0b11) + 1;
+        u8 frames_in_superframe = (superframe_byte & 0b111) + 1;
+        // 2. setting the total size of the superframe_index SzIndex equal to 2 + NumFrames * SzBytes,
+        size_t index_size = 2 + bytes_per_framesize * frames_in_superframe;
+
+        if (index_size > frame_data.size())
+            return {};
+
+        auto superframe_header_data = frame_data.data() + frame_data.size() - index_size;
+
+        u8 start_superframe_byte = *(superframe_header_data++);
+        // 3. checking that the first byte of the superframe_index matches the final byte.
+        if (superframe_byte != start_superframe_byte)
+            return {};
+
+        Vector<size_t> result;
+        for (u8 i = 0; i < frames_in_superframe; i++) {
+            size_t frame_size = 0;
+            for (u8 j = 0; j < bytes_per_framesize; j++)
+                frame_size |= (static_cast<size_t>(*(superframe_header_data++)) << (j * 8));
+            result.append(frame_size);
+        }
+        return result;
+    }
+
+    return {};
+}
+
 /* (6.1) */
-DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
+DecoderErrorOr<void> Parser::parse_frame(Span<const u8> frame_data)
 {
     m_bit_stream = make<BitStream>(frame_data.data(), frame_data.size());
     m_syntax_element_counter = make<SyntaxElementCounter>();

+ 6 - 2
Userland/Libraries/LibVideo/VP9/Parser.h

@@ -8,8 +8,8 @@
 #pragma once
 
 #include <AK/Array.h>
-#include <AK/ByteBuffer.h>
 #include <AK/OwnPtr.h>
+#include <AK/Span.h>
 #include <AK/Vector.h>
 #include <LibGfx/Forward.h>
 #include <LibVideo/DecoderError.h>
@@ -32,10 +32,14 @@ class Parser {
 public:
     explicit Parser(Decoder&);
     ~Parser();
-    DecoderErrorOr<void> parse_frame(ByteBuffer const&);
+    DecoderErrorOr<void> parse_frame(Span<const u8>);
     void dump_info();
 
 private:
+    /* Annex B: Superframes are a method of storing multiple coded frames into a single chunk
+     * See also section 5.26. */
+    Vector<size_t> parse_superframe_sizes(Span<const u8>);
+
     DecoderErrorOr<FrameType> read_frame_type();
     DecoderErrorOr<ColorRange> read_color_range();