LibVideo: Add support for VP9 superframes

This allows the second shown frame of the VP9 test video to be decoded,
as the second chunk uses a superframe to encode a reference frame and
a second to inter predict between the keyframe and the reference frame.
This commit is contained in:
Zaggy1024 2022-09-22 21:49:10 -05:00 committed by Andrew Kaster
parent b0187dfc27
commit be0760871e
Notes: sideshowbarker 2024-07-17 06:07:32 +09:00
5 changed files with 119 additions and 17 deletions

View file

@ -59,7 +59,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
if (!optional_sample.has_value())
return;
auto result = vp9_decoder.decode_frame(optional_sample.release_value());
auto result = vp9_decoder.decode(optional_sample.release_value());
if (result.is_error()) {
outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());

View file

@ -18,23 +18,74 @@ Decoder::Decoder()
{
}
DecoderErrorOr<void> Decoder::decode_frame(ByteBuffer const& frame_data)
DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
{
TRY(m_parser->parse_frame(frame_data));
// TODO:
// - #2
// - #3
// - #4
TRY(update_reference_frames());
auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
if (superframe_sizes.is_empty()) {
return decode_frame(chunk_data);
}
size_t offset = 0;
for (auto superframe_size : superframe_sizes) {
auto frame_data = chunk_data.slice(offset, superframe_size);
TRY(decode_frame(frame_data));
offset += superframe_size;
}
return {};
}
DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
{
return decode(chunk_data.span());
}
void Decoder::dump_frame_info()
{
m_parser->dump_info();
}
inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
{
return row * stride + column;
}
DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
{
// 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
// tables include function calls indicating when the block decode processes should be triggered.
TRY(m_parser->parse_frame(frame_data));
// 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the
// coded frame has been decoded.
// FIXME: Implement loop filtering.
// 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
// SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
// show_existing_frame is equal to 0,
// segmentation_enabled is equal to 1,
// segmentation_update_map is equal to 1.
if (!m_parser->m_show_existing_frame && m_parser->m_segmentation_enabled && m_parser->m_segmentation_update_map) {
for (auto row = 0u; row < m_parser->m_mi_rows; row++) {
for (auto column = 0u; column < m_parser->m_mi_cols; column++) {
auto index = index_from_row_and_column(row, column, m_parser->m_mi_rows);
m_parser->m_prev_segment_ids[index] = m_parser->m_segment_ids[index];
}
}
}
// 4. The output process as specified in section 8.9 is invoked.
// FIXME: Create a struct to store an output frame along with all information needed to display
// it. This function will need to append the images to a vector to ensure that if a superframe
// with multiple output frames is encountered, all of them can be displayed.
// 5. The reference frame update process as specified in section 8.10 is invoked.
TRY(update_reference_frames());
return {};
}
inline size_t buffer_size(size_t width, size_t height)
{
return width * height;
@ -215,11 +266,6 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2])
return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
}
inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
{
return row * stride + column;
}
DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
{
auto& frame_buffer = get_output_buffer(plane);

View file

@ -9,6 +9,7 @@
#include <AK/ByteBuffer.h>
#include <AK/Error.h>
#include <AK/Span.h>
#include <LibVideo/DecoderError.h>
#include "Parser.h"
@ -20,7 +21,9 @@ class Decoder {
public:
Decoder();
DecoderErrorOr<void> decode_frame(ByteBuffer const&);
/* (8.1) General */
DecoderErrorOr<void> decode(Span<const u8>);
DecoderErrorOr<void> decode(ByteBuffer const&);
void dump_frame_info();
// FIXME: These functions should be replaced by a struct that contains
@ -33,6 +36,8 @@ public:
private:
typedef i32 Intermediate;
DecoderErrorOr<void> decode_frame(Span<const u8>);
DecoderErrorOr<void> allocate_buffers();
Vector<Intermediate>& get_temp_buffer(u8 plane);
Vector<u16>& get_output_buffer(u8 plane);

View file

@ -28,8 +28,55 @@ Parser::~Parser()
{
}
Vector<size_t> Parser::parse_superframe_sizes(Span<const u8> frame_data)
{
if (frame_data.size() < 1)
return {};
// The decoder determines the presence of a superframe by:
// 1. parsing the final byte of the chunk and checking that the superframe_marker equals 0b110,
// If the checks in steps 1 and 3 both pass, then the chunk is determined to contain a superframe and each
// frame in the superframe is passed to the decoding process in turn.
// Otherwise, the chunk is determined to not contain a superframe, and the whole chunk is passed to the
// decoding process.
// NOTE: Reading from span data will be quicker than spinning up a BitStream.
u8 superframe_byte = frame_data[frame_data.size() - 1];
// NOTE: We have to read out of the byte from the little end first, hence the padding bits in the masks below.
u8 superframe_marker = superframe_byte & 0b1110'0000;
if (superframe_marker == 0b1100'0000) {
u8 bytes_per_framesize = ((superframe_byte >> 3) & 0b11) + 1;
u8 frames_in_superframe = (superframe_byte & 0b111) + 1;
// 2. setting the total size of the superframe_index SzIndex equal to 2 + NumFrames * SzBytes,
size_t index_size = 2 + bytes_per_framesize * frames_in_superframe;
if (index_size > frame_data.size())
return {};
auto superframe_header_data = frame_data.data() + frame_data.size() - index_size;
u8 start_superframe_byte = *(superframe_header_data++);
// 3. checking that the first byte of the superframe_index matches the final byte.
if (superframe_byte != start_superframe_byte)
return {};
Vector<size_t> result;
for (u8 i = 0; i < frames_in_superframe; i++) {
size_t frame_size = 0;
for (u8 j = 0; j < bytes_per_framesize; j++)
frame_size |= (static_cast<size_t>(*(superframe_header_data++)) << (j * 8));
result.append(frame_size);
}
return result;
}
return {};
}
/* (6.1) */
DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
DecoderErrorOr<void> Parser::parse_frame(Span<const u8> frame_data)
{
m_bit_stream = make<BitStream>(frame_data.data(), frame_data.size());
m_syntax_element_counter = make<SyntaxElementCounter>();

View file

@ -8,8 +8,8 @@
#pragma once
#include <AK/Array.h>
#include <AK/ByteBuffer.h>
#include <AK/OwnPtr.h>
#include <AK/Span.h>
#include <AK/Vector.h>
#include <LibGfx/Forward.h>
#include <LibVideo/DecoderError.h>
@ -32,10 +32,14 @@ class Parser {
public:
explicit Parser(Decoder&);
~Parser();
DecoderErrorOr<void> parse_frame(ByteBuffer const&);
DecoderErrorOr<void> parse_frame(Span<const u8>);
void dump_info();
private:
/* Annex B: Superframes are a method of storing multiple coded frames into a single chunk
* See also section 5.26. */
Vector<size_t> parse_superframe_sizes(Span<const u8>);
DecoderErrorOr<FrameType> read_frame_type();
DecoderErrorOr<ColorRange> read_color_range();