LibVideo: Implement VP9 intra-predicted frame decoding

The first keyframe of the test video can be decoded with these changes. Raw memory allocations in the Parser have been replaced with Vector or Array to avoid memory leaks and OOBs.
Author: https://github.com/Zaggy1024 Commit: https://github.com/SerenityOS/serenity/commit/1514004cd5 Pull-request: https://github.com/SerenityOS/serenity/pull/15363 Reviewed-by: https://github.com/ADKaster Reviewed-by: https://github.com/FireFox317 Reviewed-by: https://github.com/MacDue Reviewed-by: https://github.com/davidot
2024-11-22 15:40:19 +00:00 · 2022-10-08 21:54:20 -05:00 · 2022-10-08 21:54:20 -05:00 · 1514004cd5 · 2024-07-17 10:16:43 +09:00
commit 1514004cd5
parent da9ff31166
10 changed files with 1445 additions and 146 deletions
--- a/Userland/Libraries/LibVideo/CMakeLists.txt
+++ b/Userland/Libraries/LibVideo/CMakeLists.txt
@ -7,8 +7,7 @@ set(SOURCES
    VP9/ProbabilityTables.cpp
    VP9/SyntaxElementCounter.cpp
    VP9/TreeParser.cpp
-    VP9/Utilities.cpp
 )

 serenity_lib(LibVideo video)
-target_link_libraries(LibVideo LibAudio LibCore LibIPC)
+target_link_libraries(LibVideo LibAudio LibCore LibIPC LibGfx)
--- a/Userland/Libraries/LibVideo/DecoderError.h
+++ b/Userland/Libraries/LibVideo/DecoderError.h
@ -22,6 +22,7 @@ using DecoderErrorOr = ErrorOr<T, DecoderError>;
 enum class DecoderErrorCategory : u32 {
    Unknown,
    IO,
+    Memory,
    // The input is corrupted.
    Corrupted,
    // The input uses features that are not yet implemented.
@ -79,4 +80,6 @@ private:
        _result.release_value();                                           \
    })

+#define DECODER_TRY_ALLOC(expression) DECODER_TRY(DecoderErrorCategory::Memory, expression)
+
 }
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
--- a/Userland/Libraries/LibVideo/VP9/Decoder.h
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.h
@ -23,7 +23,20 @@ public:
    DecoderErrorOr<void> decode_frame(ByteBuffer const&);
    void dump_frame_info();

+    // FIXME: These functions should be replaced by a struct that contains
+    //        all the information needed to display a frame.
+    Vector<u16> const& get_output_buffer_for_plane(u8 plane) const;
+    Gfx::Size<size_t> get_y_plane_size();
+    bool get_uv_subsampling_y();
+    bool get_uv_subsampling_x();
+
 private:
+    typedef i32 Intermediate;
+
+    DecoderErrorOr<void> allocate_buffers();
+    Vector<Intermediate>& get_temp_buffer(u8 plane);
+    Vector<u16>& get_output_buffer(u8 plane);
+
    /* (8.4) Probability Adaptation Process */
    u8 merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor);
    u8 merge_probs(int const* tree, int index, u8* probs, u8* counts, u8 count_sat, u8 max_update_factor);
@ -33,16 +46,101 @@ private:
    u8 adapt_prob(u8 prob, u8 counts[2]);

    /* (8.5) Prediction Processes */
-    DecoderErrorOr<void> predict_intra(size_t plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index);
-    DecoderErrorOr<void> predict_inter(size_t plane, u32 x, u32 y, u32 w, u32 h, u32 block_index);
+    // (8.5.1) Intra prediction process
+    DecoderErrorOr<void> predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index);
+
+    // (8.5.1) Inter prediction process
+    DecoderErrorOr<void> predict_inter(u8 plane, u32 x, u32 y, u32 w, u32 h, u32 block_index);

    /* (8.6) Reconstruction and Dequantization */
-    DecoderErrorOr<void> reconstruct(size_t plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size);
+
+    u16 dc_q(u8 b);
+    u16 ac_q(u8 b);
+    // Returns the quantizer index for the current block
+    u8 get_qindex();
+    // Returns the quantizer value for the dc coefficient for a particular plane
+    u16 get_dc_quant(u8 plane);
+    // Returns the quantizer value for the ac coefficient for a particular plane
+    u16 get_ac_quant(u8 plane);
+
+    // (8.6.2) Reconstruct process
+    DecoderErrorOr<void> reconstruct(u8 plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size);
+
+    // (8.7) Inverse transform process
+    DecoderErrorOr<void> inverse_transform_2d(Vector<Intermediate>& dequantized, u8 log2_of_block_size);
+
+    // (8.7.1) 1D Transforms
+    // (8.7.1.1) Butterfly functions
+
+    inline i32 cos64(u8 angle);
+    inline i32 sin64(u8 angle);
+    // The function B( a, b, angle, 0 ) performs a butterfly rotation.
+    inline void butterfly_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, u8 angle, bool flip);
+    // The function H( a, b, 0 ) performs a Hadamard rotation.
+    inline void hadamard_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, bool flip);
+    // The function SB( a, b, angle, 0 ) performs a butterfly rotation.
+    // Spec defines the source as array T, and the destination array as S.
+    template<typename S, typename D>
+    inline void butterfly_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b, u8 angle, bool flip);
+    // The function SH( a, b ) performs a Hadamard rotation and rounding.
+    // Spec defines the source array as S, and the destination array as T.
+    template<typename S, typename D>
+    inline void hadamard_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b);
+
+    template<typename T>
+    inline i32 round_2(T value, u8 bits);
+
+    // Checks whether the value is representable by a signed integer with (8 + bit_depth) bits.
+    inline bool check_intermediate_bounds(Intermediate value);
+
+    // (8.7.1.10) This process does an in-place Walsh-Hadamard transform of the array T (of length 4).
+    inline DecoderErrorOr<void> inverse_walsh_hadamard_transform(Vector<Intermediate>& data, u8 log2_of_block_size, u8 shift);
+
+    // (8.7.1.2) Inverse DCT array permutation process
+    inline DecoderErrorOr<void> inverse_discrete_cosine_transform_array_permutation(Vector<Intermediate>& data, u8 log2_of_block_size);
+    // (8.7.1.3) Inverse DCT process
+    inline DecoderErrorOr<void> inverse_discrete_cosine_transform(Vector<Intermediate>& data, u8 log2_of_block_size);
+
+    // (8.7.1.4) This process performs the in-place permutation of the array T of length 2 n which is required as the first step of
+    // the inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_input_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size);
+    // (8.7.1.5) This process performs the in-place permutation of the array T of length 2 n which is required before the final
+    // step of the inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_output_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size);
+
+    // (8.7.1.6) This process does an in-place transform of the array T to perform an inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_4(Vector<Intermediate>& data);
+    // (8.7.1.7) This process does an in-place transform of the array T using a higher precision array S for intermediate
+    // results.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform_8(Vector<Intermediate>& data);
+    // (8.7.1.8) This process does an in-place transform of the array T using a higher precision array S for intermediate
+    // results.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform_16(Vector<Intermediate>& data);
+    // (8.7.1.9) This process performs an in-place inverse ADST process on the array T of size 2 n for 2 ≤ n ≤ 4.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform(Vector<Intermediate>& data, u8 log2_of_block_size);

    /* (8.10) Reference Frame Update Process */
    DecoderErrorOr<void> update_reference_frames();

    NonnullOwnPtr<Parser> m_parser;
+
+    struct {
+        // FIXME: We may be able to consolidate some of these to reduce memory consumption.
+        Vector<Intermediate> dequantized;
+        Vector<Intermediate> row_or_column;
+
+        // predict_intra
+        Vector<Intermediate> above_row;
+        Vector<Intermediate> left_column;
+        Vector<Intermediate> predicted_samples;
+
+        // transforms (dct, adst)
+        Vector<Intermediate> transform_temp;
+        Vector<i64> adst_temp;
+
+        Vector<Intermediate> intermediate[3];
+        Vector<u16> output[3];
+    } m_buffers;
 };

 }
--- a/Userland/Libraries/LibVideo/VP9/Parser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp
@ -5,10 +5,13 @@
 * SPDX-License-Identifier: BSD-2-Clause
 */

-#include "Parser.h"
-#include "Decoder.h"
-#include "Utilities.h"
 #include <AK/String.h>
+#include <LibGfx/Point.h>
+#include <LibGfx/Size.h>
+
+#include "Decoder.h"
+#include "Parser.h"
+#include "Utilities.h"

 namespace Video::VP9 {

@ -23,22 +26,22 @@ Parser::Parser(Decoder& decoder)

 Parser::~Parser()
 {
-    cleanup_tile_allocations();
-    free(m_prev_segment_ids);
 }

 void Parser::cleanup_tile_allocations()
 {
-    free(m_skips);
-    free(m_tx_sizes);
-    free(m_mi_sizes);
-    free(m_y_modes);
-    free(m_segment_ids);
-    free(m_ref_frames);
-    free(m_interp_filters);
-    free(m_mvs);
-    free(m_sub_mvs);
-    free(m_sub_modes);
+    // FIXME: Is this necessary? Data should be truncated and
+    //        overwritten by the next tile.
+    m_skips.clear_with_capacity();
+    m_tx_sizes.clear_with_capacity();
+    m_mi_sizes.clear_with_capacity();
+    m_y_modes.clear_with_capacity();
+    m_segment_ids.clear_with_capacity();
+    m_ref_frames.clear_with_capacity();
+    m_interp_filters.clear_with_capacity();
+    m_mvs.clear_with_capacity();
+    m_sub_mvs.clear_with_capacity();
+    m_sub_modes.clear_with_capacity();
 }

 /* (6.1) */
@ -63,6 +66,8 @@ DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
    dbgln("Finished reading compressed header");
    TRY_READ(m_bit_stream->exit_bool());

+    TRY(m_decoder.allocate_buffers());
+
    TRY(decode_tiles());
    TRY(refresh_probs());

@ -335,11 +340,11 @@ DecoderErrorOr<void> Parser::loop_filter_params()

 DecoderErrorOr<void> Parser::quantization_params()
 {
-    auto base_q_idx = TRY_READ(m_bit_stream->read_f8());
-    auto delta_q_y_dc = TRY(read_delta_q());
-    auto delta_q_uv_dc = TRY(read_delta_q());
-    auto delta_q_uv_ac = TRY(read_delta_q());
-    m_lossless = base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 && delta_q_uv_ac == 0;
+    m_base_q_idx = TRY_READ(m_bit_stream->read_f8());
+    m_delta_q_y_dc = TRY(read_delta_q());
+    m_delta_q_uv_dc = TRY(read_delta_q());
+    m_delta_q_uv_ac = TRY(read_delta_q());
+    m_lossless = m_base_q_idx == 0 && m_delta_q_y_dc == 0 && m_delta_q_uv_dc == 0 && m_delta_q_uv_ac == 0;
    return {};
 }

@ -441,9 +446,8 @@ void Parser::setup_past_independence()
        }
    }
    m_segmentation_abs_or_delta_update = false;
-    if (m_prev_segment_ids)
-        free(m_prev_segment_ids);
-    m_prev_segment_ids = static_cast<u8*>(kmalloc_array(m_mi_rows, m_mi_cols));
+    m_prev_segment_ids.clear_with_capacity();
+    m_prev_segment_ids.resize_and_keep_capacity(m_mi_rows * m_mi_cols);
    m_loop_filter_delta_enabled = true;
    m_loop_filter_ref_deltas[IntraFrame] = 1;
    m_loop_filter_ref_deltas[LastFrame] = 0;
@ -561,8 +565,8 @@ DecoderErrorOr<void> Parser::read_coef_probs()
                        auto max_l = (k == 0) ? 3 : 6;
                        for (auto l = 0; l < max_l; l++) {
                            for (auto m = 0; m < 3; m++) {
-                                auto& coef_probs = m_probability_tables->coef_probs()[tx_size];
-                                coef_probs[i][j][k][l][m] = TRY(diff_update_prob(coef_probs[i][j][k][l][m]));
+                                auto& prob = m_probability_tables->coef_probs()[tx_size][i][j][k][l][m];
+                                prob = TRY(diff_update_prob(prob));
                            }
                        }
                    }
@ -748,30 +752,28 @@ void Parser::setup_compound_reference_mode()
    }
 }

-void Parser::allocate_tile_data()
+DecoderErrorOr<void> Parser::allocate_tile_data()
 {
    auto dimensions = m_mi_rows * m_mi_cols;
-    if (dimensions == m_allocated_dimensions)
-        return;
    cleanup_tile_allocations();
-    m_skips = static_cast<bool*>(kmalloc_array(dimensions, sizeof(bool)));
-    m_tx_sizes = static_cast<TXSize*>(kmalloc_array(dimensions, sizeof(TXSize)));
-    m_mi_sizes = static_cast<u32*>(kmalloc_array(dimensions, sizeof(u32)));
-    m_y_modes = static_cast<u8*>(kmalloc_array(dimensions, sizeof(u8)));
-    m_segment_ids = static_cast<u8*>(kmalloc_array(dimensions, sizeof(u8)));
-    m_ref_frames = static_cast<ReferenceFrame*>(kmalloc_array(dimensions, 2, sizeof(ReferenceFrame)));
-    m_interp_filters = static_cast<InterpolationFilter*>(kmalloc_array(dimensions, sizeof(InterpolationFilter)));
-    m_mvs = static_cast<MV*>(kmalloc_array(dimensions, 2, sizeof(MV)));
-    m_sub_mvs = static_cast<MV*>(kmalloc_array(dimensions, 8, sizeof(MV)));
-    m_sub_modes = static_cast<IntraMode*>(kmalloc_array(dimensions, 4, sizeof(IntraMode)));
-    m_allocated_dimensions = dimensions;
+    DECODER_TRY_ALLOC(m_skips.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_tx_sizes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_mi_sizes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_y_modes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_segment_ids.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_ref_frames.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_interp_filters.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_mvs.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_sub_mvs.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_sub_modes.try_resize_and_keep_capacity(dimensions));
+    return {};
 }

 DecoderErrorOr<void> Parser::decode_tiles()
 {
    auto tile_cols = 1 << m_tile_cols_log2;
    auto tile_rows = 1 << m_tile_rows_log2;
-    allocate_tile_data();
+    TRY(allocate_tile_data());
    clear_above_context();
    for (auto tile_row = 0; tile_row < tile_rows; tile_row++) {
        for (auto tile_col = 0; tile_col < tile_cols; tile_col++) {
@ -826,9 +828,7 @@ DecoderErrorOr<void> Parser::decode_tile()
 {
    for (auto row = m_mi_row_start; row < m_mi_row_end; row += 8) {
        clear_left_context();
-        m_row = row;
        for (auto col = m_mi_col_start; col < m_mi_col_end; col += 8) {
-            m_col = col;
            TRY(decode_partition(row, col, Block_64x64));
        }
    }
@ -845,14 +845,16 @@ void Parser::clear_left_context()
 DecoderErrorOr<void> Parser::decode_partition(u32 row, u32 col, u8 block_subsize)
 {
    if (row >= m_mi_rows || col >= m_mi_cols)
-        return DecoderError::corrupted("Row or column were outside valid ranges"sv);
+        return {};
    m_block_subsize = block_subsize;
    m_num_8x8 = num_8x8_blocks_wide_lookup[block_subsize];
    auto half_block_8x8 = m_num_8x8 >> 1;
    m_has_rows = (row + half_block_8x8) < m_mi_rows;
    m_has_cols = (col + half_block_8x8) < m_mi_cols;
-
+    m_row = row;
+    m_col = col;
    auto partition = TRY_READ(m_tree_parser->parse_tree(SyntaxElementType::Partition));
+
    auto subsize = subsize_lookup[partition][block_subsize];
    if (subsize < Block_8x8 || partition == PartitionNone) {
        TRY(decode_block(row, col, subsize));
@ -871,15 +873,22 @@ DecoderErrorOr<void> Parser::decode_partition(u32 row, u32 col, u8 block_subsize
        TRY(decode_partition(row + half_block_8x8, col + half_block_8x8, subsize));
    }
    if (block_subsize == Block_8x8 || partition != PartitionSplit) {
+        auto above_context = 15 >> b_width_log2_lookup[subsize];
+        auto left_context = 15 >> b_height_log2_lookup[subsize];
        for (size_t i = 0; i < m_num_8x8; i++) {
-            m_above_partition_context[col + i] = 15 >> b_width_log2_lookup[subsize];
-            m_left_partition_context[row + i] = 15 >> b_width_log2_lookup[subsize];
+            m_above_partition_context[col + i] = above_context;
+            m_left_partition_context[row + i] = left_context;
        }
    }
    return {};
 }

-DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, u8 subsize)
+size_t Parser::get_image_index(u32 row, u32 column)
+{
+    return row * m_mi_cols + column;
+}
+
+DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, BlockSubsize subsize)
 {
    m_mi_row = row;
    m_mi_col = col;
@ -893,25 +902,24 @@ DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, u8 subsize)
        m_skip = true;
    for (size_t y = 0; y < num_8x8_blocks_high_lookup[subsize]; y++) {
        for (size_t x = 0; x < num_8x8_blocks_wide_lookup[subsize]; x++) {
-            auto pos = (row + y) * m_mi_cols + (col + x);
+            auto pos = get_image_index(row + y, col + x);
            m_skips[pos] = m_skip;
            m_tx_sizes[pos] = m_tx_size;
            m_mi_sizes[pos] = m_mi_size;
            m_y_modes[pos] = m_y_mode;
            m_segment_ids[pos] = m_segment_id;
            for (size_t ref_list = 0; ref_list < 2; ref_list++)
-                m_ref_frames[(pos * 2) + ref_list] = m_ref_frame[ref_list];
+                m_ref_frames[pos][ref_list] = m_ref_frame[ref_list];
            if (m_is_inter) {
                m_interp_filters[pos] = m_interp_filter;
                for (size_t ref_list = 0; ref_list < 2; ref_list++) {
-                    auto pos_with_ref_list = (pos * 2 + ref_list) * sizeof(MV);
-                    m_mvs[pos_with_ref_list] = m_block_mvs[ref_list][3];
+                    m_mvs[pos][ref_list] = m_block_mvs[ref_list][3];
                    for (size_t b = 0; b < 4; b++)
-                        m_sub_mvs[pos_with_ref_list * 4 + b * sizeof(MV)] = m_block_mvs[ref_list][b];
+                        m_sub_mvs[pos][ref_list][b] = m_block_mvs[ref_list][b];
                }
            } else {
                for (size_t b = 0; b < 4; b++)
-                    m_sub_modes[pos * 4 + b] = static_cast<IntraMode>(m_block_sub_modes[b]);
+                    m_sub_modes[pos][b] = static_cast<IntraMode>(m_block_sub_modes[b]);
            }
        }
    }
@ -998,10 +1006,10 @@ DecoderErrorOr<void> Parser::read_tx_size(bool allow_select)

 DecoderErrorOr<void> Parser::inter_frame_mode_info()
 {
-    m_left_ref_frame[0] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1)] : IntraFrame;
-    m_above_ref_frame[0] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col] : IntraFrame;
-    m_left_ref_frame[1] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1) + 1] : None;
-    m_above_ref_frame[1] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col + 1] : None;
+    m_left_ref_frame[0] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][0] : IntraFrame;
+    m_above_ref_frame[0] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][0] : IntraFrame;
+    m_left_ref_frame[1] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][1] : None;
+    m_above_ref_frame[1] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][1] : None;
    m_left_intra = m_left_ref_frame[0] <= IntraFrame;
    m_above_intra = m_above_ref_frame[0] <= IntraFrame;
    m_left_single = m_left_ref_frame[1] <= None;
@ -1234,10 +1242,23 @@ DecoderErrorOr<i32> Parser::read_mv_component(u8)
    return (mv_sign ? -1 : 1) * static_cast<i32>(mag);
 }

+Gfx::Point<size_t> Parser::get_decoded_point_for_plane(u8 column, u8 row, u8 plane)
+{
+    if (plane == 0)
+        return { column * 8, row * 8 };
+    return { (column * 8) >> m_subsampling_x, (row * 8) >> m_subsampling_y };
+}
+
+Gfx::Size<size_t> Parser::get_decoded_size_for_plane(u8 plane)
+{
+    auto point = get_decoded_point_for_plane(m_mi_cols, m_mi_rows, plane);
+    return { point.x(), point.y() };
+}
+
 DecoderErrorOr<void> Parser::residual()
 {
    auto block_size = m_mi_size < Block_8x8 ? Block_8x8 : static_cast<BlockSubsize>(m_mi_size);
-    for (size_t plane = 0; plane < 3; plane++) {
+    for (u8 plane = 0; plane < 3; plane++) {
        auto tx_size = (plane > 0) ? get_uv_tx_size() : m_tx_size;
        auto step = 1 << tx_size;
        auto plane_size = get_plane_block_size(block_size, plane);
@ -1274,10 +1295,8 @@ DecoderErrorOr<void> Parser::residual()
                        TRY(m_decoder.reconstruct(plane, start_x, start_y, tx_size));
                    }
                }
-                auto above_sub_context = m_above_nonzero_context[plane];
-                auto left_sub_context = m_left_nonzero_context[plane];
-                above_sub_context.resize_and_keep_capacity((start_x >> 2) + step);
-                left_sub_context.resize_and_keep_capacity((start_y >> 2) + step);
+                auto& above_sub_context = m_above_nonzero_context[plane];
+                auto& left_sub_context = m_left_nonzero_context[plane];
                for (auto i = 0; i < step; i++) {
                    above_sub_context[(start_x >> 2) + i] = non_zero;
                    left_sub_context[(start_y >> 2) + i] = non_zero;
@ -1378,7 +1397,7 @@ DecoderErrorOr<i32> Parser::read_coef(Token token)
 {
    auto cat = extra_bits[token][0];
    auto num_extra = extra_bits[token][1];
-    auto coef = extra_bits[token][2];
+    u32 coef = extra_bits[token][2];
    if (token == DctValCat6) {
        for (size_t e = 0; e < (u8)(m_bit_depth - 8); e++) {
            auto high_bit = TRY_READ(m_bit_stream->read_bool(255));
--- a/Userland/Libraries/LibVideo/VP9/Parser.h
+++ b/Userland/Libraries/LibVideo/VP9/Parser.h
@ -40,7 +40,7 @@ private:
    /* Utilities */
    void clear_context(Vector<u8>& context, size_t size);
    void clear_context(Vector<Vector<u8>>& context, size_t outer_size, size_t inner_size);
-    void allocate_tile_data();
+    DecoderErrorOr<void> allocate_tile_data();
    void cleanup_tile_allocations();

    /* (6.1) Frame Syntax */
@ -94,7 +94,7 @@ private:
    DecoderErrorOr<void> decode_tile();
    void clear_left_context();
    DecoderErrorOr<void> decode_partition(u32 row, u32 col, u8 block_subsize);
-    DecoderErrorOr<void> decode_block(u32 row, u32 col, u8 subsize);
+    DecoderErrorOr<void> decode_block(u32 row, u32 col, BlockSubsize subsize);
    DecoderErrorOr<void> mode_info();
    DecoderErrorOr<void> intra_frame_mode_info();
    DecoderErrorOr<void> intra_segment_id();
@ -123,6 +123,10 @@ private:
    DecoderErrorOr<void> find_best_ref_mvs(int ref_list);
    DecoderErrorOr<void> append_sub8x8_mvs(u8 block, u8 ref_list);
    DecoderErrorOr<bool> use_mv_hp(MV const& delta_mv);
+    size_t get_image_index(u32 row, u32 column);
+
+    Gfx::Point<size_t> get_decoded_point_for_plane(u8 row, u8 column, u8 plane);
+    Gfx::Size<size_t> get_decoded_size_for_plane(u8 plane);

    u8 m_profile { 0 };
    u8 m_frame_to_show_map_index { 0 };
@ -131,8 +135,8 @@ private:
    u8 m_loop_filter_level { 0 };
    u8 m_loop_filter_sharpness { 0 };
    bool m_loop_filter_delta_enabled { false };
-    FrameType m_frame_type;
-    FrameType m_last_frame_type;
+    FrameType m_frame_type { FrameType::KeyFrame };
+    FrameType m_last_frame_type { FrameType::KeyFrame };
    bool m_show_frame { false };
    bool m_error_resilient_mode { false };
    bool m_frame_is_intra { false };
@ -157,7 +161,11 @@ private:
    u32 m_mi_rows { 0 };
    u32 m_sb64_cols { 0 };
    u32 m_sb64_rows { 0 };
-    InterpolationFilter m_interpolation_filter;
+    InterpolationFilter m_interpolation_filter { 0xf };
+    u8 m_base_q_idx { 0 };
+    i8 m_delta_q_y_dc { 0 };
+    i8 m_delta_q_uv_dc { 0 };
+    i8 m_delta_q_uv_ac { 0 };
    bool m_lossless { false };
    u8 m_segmentation_tree_probs[7];
    u8 m_segmentation_pred_prob[3];
@ -184,17 +192,24 @@ private:
    u32 m_mi_col_end { 0 };
    u32 m_mi_row { 0 };
    u32 m_mi_col { 0 };
-    u32 m_mi_size { 0 };
+    BlockSubsize m_mi_size { 0 };
    bool m_available_u { false };
    bool m_available_l { false };
    u8 m_segment_id { 0 };
+    // FIXME: Should this be an enum?
+    // skip equal to 0 indicates that there may be some transform coefficients to read for this block; skip equal to 1
+    // indicates that there are no transform coefficients.
+    //
+    // skip may be set to 0 even if transform blocks contain immediate end of block markers.
    bool m_skip { false };
    u8 m_num_8x8 { 0 };
    bool m_has_rows { false };
    bool m_has_cols { false };
    TXSize m_max_tx_size { TX_4x4 };
    u8 m_block_subsize { 0 };
+    // The row to use for getting partition tree probability lookups.
    u32 m_row { 0 };
+    // The column to use for getting partition tree probability lookups.
    u32 m_col { 0 };
    TXSize m_tx_size { TX_4x4 };
    ReferenceFrame m_ref_frame[2];
@ -228,19 +243,18 @@ private:
    ReferenceFrame m_comp_fixed_ref;
    ReferenceFrame m_comp_var_ref[2];
    MV m_block_mvs[2][4];
-    u8* m_prev_segment_ids { nullptr };
+    Vector<u8> m_prev_segment_ids;

-    u32 m_allocated_dimensions { 0 };
-    bool* m_skips { nullptr };
-    TXSize* m_tx_sizes { nullptr };
-    u32* m_mi_sizes { nullptr };
-    u8* m_y_modes { nullptr };
-    u8* m_segment_ids { nullptr };
-    ReferenceFrame* m_ref_frames { nullptr };
-    InterpolationFilter* m_interp_filters { nullptr };
-    MV* m_mvs { nullptr };
-    MV* m_sub_mvs { nullptr };
-    IntraMode* m_sub_modes { nullptr };
+    Vector<bool> m_skips;
+    Vector<TXSize> m_tx_sizes;
+    Vector<u32> m_mi_sizes;
+    Vector<u8> m_y_modes;
+    Vector<u8> m_segment_ids;
+    Vector<Array<ReferenceFrame, 2>> m_ref_frames;
+    Vector<InterpolationFilter> m_interp_filters;
+    Vector<Array<MV, 2>> m_mvs;
+    Vector<Array<Array<MV, 4>, 2>> m_sub_mvs;
+    Vector<Array<IntraMode, 4>> m_sub_modes;

    OwnPtr<BitStream> m_bit_stream;
    OwnPtr<ProbabilityTables> m_probability_tables;
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
@ -212,17 +212,17 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node)
    u32 above_mode, left_mode;
    if (m_decoder.m_mi_size >= Block_8x8) {
        above_mode = AVAIL_U
-            ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2]
+            ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2]
            : DcPred;
        left_mode = AVAIL_L
-            ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1]
+            ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1]
            : DcPred;
    } else {
        if (m_idy) {
            above_mode = m_decoder.m_block_sub_modes[m_idx];
        } else {
            above_mode = AVAIL_U
-                ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2 + m_idx]
+                ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2 + m_idx]
                : DcPred;
        }

@ -230,7 +230,7 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node)
            left_mode = m_decoder.m_block_sub_modes[m_idy * 2];
        } else {
            left_mode = AVAIL_L
-                ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1 + m_idy * 2]
+                ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1 + m_idy * 2]
                : DcPred;
        }
    }
@ -544,12 +544,16 @@ u8 TreeParser::calculate_tx_size_probability(u8 node)
 {
    auto above = m_decoder.m_max_tx_size;
    auto left = m_decoder.m_max_tx_size;
-    auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col;
-    if (AVAIL_U && !m_decoder.m_skips[u_pos])
-        above = m_decoder.m_tx_sizes[u_pos];
-    auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1;
-    if (AVAIL_L && !m_decoder.m_skips[l_pos])
-        left = m_decoder.m_tx_sizes[l_pos];
+    if (AVAIL_U) {
+        auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col;
+        if (!m_decoder.m_skips[u_pos])
+            above = m_decoder.m_tx_sizes[u_pos];
+    }
+    if (AVAIL_L) {
+        auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1;
+        if (!m_decoder.m_skips[l_pos])
+            left = m_decoder.m_tx_sizes[l_pos];
+    }
    if (!AVAIL_L)
        left = above;
    if (!AVAIL_U)
@ -582,20 +586,14 @@ u8 TreeParser::calculate_interp_filter_probability(u8 node)
    return m_decoder.m_probability_tables->interp_filter_probs()[m_ctx][node];
 }

-u8 TreeParser::calculate_token_probability(u8 node)
+void TreeParser::set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos)
 {
-    auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)];
-    if (node < 2)
-        return prob;
-    auto x = (prob - 1) / 2;
-    auto& pareto_table = m_decoder.m_probability_tables->pareto_table();
-    if (prob & 1)
-        return pareto_table[x][node - 2];
-    return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1;
-}
+    m_band = band;
+    m_c = c;
+    m_plane = plane;
+    m_tx_size = tx_size;
+    m_pos = pos;

-u8 TreeParser::calculate_more_coefs_probability()
-{
    if (m_c == 0) {
        auto sx = m_plane > 0 ? m_decoder.m_subsampling_x : 0;
        auto sy = m_plane > 0 ? m_decoder.m_subsampling_y : 0;
@ -618,7 +616,7 @@ u8 TreeParser::calculate_more_coefs_probability()
        auto n = 4 << m_tx_size;
        auto i = m_pos / n;
        auto j = m_pos % n;
-        auto a = (i - 1) * n + j;
+        auto a = i > 0 ? (i - 1) * n + j : 0;
        auto a2 = i * n + j - 1;
        if (i > 0 && j > 0) {
            if (m_decoder.m_tx_type == DCT_ADST) {
@ -640,9 +638,25 @@ u8 TreeParser::calculate_more_coefs_probability()
        }
        m_ctx = (1 + m_decoder.m_token_cache[neighbor_0] + m_decoder.m_token_cache[neighbor_1]) >> 1;
    }
+}
+
+u8 TreeParser::calculate_more_coefs_probability()
+{
    return m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][0];
 }

+u8 TreeParser::calculate_token_probability(u8 node)
+{
+    auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)];
+    if (node < 2)
+        return prob;
+    auto x = (prob - 1) / 2;
+    auto& pareto_table = m_decoder.m_probability_tables->pareto_table();
+    if (prob & 1)
+        return pareto_table[x][node - 2];
+    return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1;
+}
+
 void TreeParser::count_syntax_element(SyntaxElementType type, int value)
 {
    switch (type) {
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.h
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h
@ -57,14 +57,7 @@ public:
        m_idy = idy;
    }

-    void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos)
-    {
-        m_band = band;
-        m_c = c;
-        m_plane = plane;
-        m_tx_size = tx_size;
-        m_pos = pos;
-    }
+    void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos);

    void set_start_x_and_y(u32 start_x, u32 start_y)
    {
--- a/Userland/Libraries/LibVideo/VP9/Utilities.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Utilities.cpp
@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include "Utilities.h"
-
-namespace Video::VP9 {
-
-u8 clip_3(u8 x, u8 y, u8 z)
-{
-    return clamp(z, x, y);
-}
-
-u8 round_2(u8 x, u8 n)
-{
-    return (x + (1 << (n - 1))) >> n;
-}
-
-}
--- a/Userland/Libraries/LibVideo/VP9/Utilities.h
+++ b/Userland/Libraries/LibVideo/VP9/Utilities.h
@ -11,7 +11,35 @@

 namespace Video::VP9 {

-u8 clip_3(u8 x, u8 y, u8 z);
-u8 round_2(u8 x, u8 n);
+// FIXME: Once everything is working, replace this with plain clamp
+// since parameter order is different
+template<typename T>
+T clip_3(T x, T y, T z)
+{
+    return clamp(z, x, y);
+}
+
+template<typename T>
+u16 clip_1(u8 bit_depth, T x)
+{
+    if (x < 0) {
+        return 0u;
+    }
+    const T max = (1u << bit_depth) - 1u;
+    if (x > max)
+        return max;
+    return x;
+}
+
+template<typename T, typename C>
+inline T brev(C bit_count, T value)
+{
+    T result = 0;
+    for (C i = 0; i < bit_count; i++) {
+        auto bit = (value >> i) & 1;
+        result |= bit << (bit_count - 1 - i);
+    }
+    return result;
+}

 }