From 1514004cd59798f9e2eeb44f381d78a9c1caa877 Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Sat, 8 Oct 2022 21:54:20 -0500 Subject: [PATCH] LibVideo: Implement VP9 intra-predicted frame decoding The first keyframe of the test video can be decoded with these changes. Raw memory allocations in the Parser have been replaced with Vector or Array to avoid memory leaks and OOBs. --- Userland/Libraries/LibVideo/CMakeLists.txt | 3 +- Userland/Libraries/LibVideo/DecoderError.h | 3 + Userland/Libraries/LibVideo/VP9/Decoder.cpp | 1164 ++++++++++++++++- Userland/Libraries/LibVideo/VP9/Decoder.h | 104 +- Userland/Libraries/LibVideo/VP9/Parser.cpp | 145 +- Userland/Libraries/LibVideo/VP9/Parser.h | 50 +- .../Libraries/LibVideo/VP9/TreeParser.cpp | 60 +- Userland/Libraries/LibVideo/VP9/TreeParser.h | 9 +- Userland/Libraries/LibVideo/VP9/Utilities.cpp | 21 - Userland/Libraries/LibVideo/VP9/Utilities.h | 32 +- 10 files changed, 1445 insertions(+), 146 deletions(-) delete mode 100644 Userland/Libraries/LibVideo/VP9/Utilities.cpp diff --git a/Userland/Libraries/LibVideo/CMakeLists.txt b/Userland/Libraries/LibVideo/CMakeLists.txt index 293a106d77f..ec1a4f517b8 100644 --- a/Userland/Libraries/LibVideo/CMakeLists.txt +++ b/Userland/Libraries/LibVideo/CMakeLists.txt @@ -7,8 +7,7 @@ set(SOURCES VP9/ProbabilityTables.cpp VP9/SyntaxElementCounter.cpp VP9/TreeParser.cpp - VP9/Utilities.cpp ) serenity_lib(LibVideo video) -target_link_libraries(LibVideo LibAudio LibCore LibIPC) +target_link_libraries(LibVideo LibAudio LibCore LibIPC LibGfx) diff --git a/Userland/Libraries/LibVideo/DecoderError.h b/Userland/Libraries/LibVideo/DecoderError.h index bfde7e7edbf..02d5160233e 100644 --- a/Userland/Libraries/LibVideo/DecoderError.h +++ b/Userland/Libraries/LibVideo/DecoderError.h @@ -22,6 +22,7 @@ using DecoderErrorOr = ErrorOr; enum class DecoderErrorCategory : u32 { Unknown, IO, + Memory, // The input is corrupted. Corrupted, // The input uses features that are not yet implemented. @@ -79,4 +80,6 @@ private: _result.release_value(); \ }) +#define DECODER_TRY_ALLOC(expression) DECODER_TRY(DecoderErrorCategory::Memory, expression) + } diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp index e6b539010bc..87fce2cddd4 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp +++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp @@ -5,6 +5,9 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include +#include + #include "Decoder.h" #include "Utilities.h" @@ -32,6 +35,66 @@ void Decoder::dump_frame_info() m_parser->dump_info(); } +inline size_t buffer_size(size_t width, size_t height) +{ + return width * height; +} + +inline size_t buffer_size(Gfx::Size size) +{ + return buffer_size(size.width(), size.height()); +} + +DecoderErrorOr Decoder::allocate_buffers() +{ + // FIXME: Confirm that we need to clear buffers between frames. + m_buffers = {}; + + for (size_t plane = 0; plane < 3; plane++) { + auto size = m_parser->get_decoded_size_for_plane(plane); + dbgln("Allocating buffers for plane {} with size {}x{} ({})", plane, size.width(), size.height(), buffer_size(size)); + + auto& temp_buffer = get_temp_buffer(plane); + temp_buffer.clear_with_capacity(); + DECODER_TRY_ALLOC(temp_buffer.try_resize_and_keep_capacity(buffer_size(size))); + + auto& output_buffer = get_output_buffer(plane); + output_buffer.clear_with_capacity(); + DECODER_TRY_ALLOC(output_buffer.try_resize_and_keep_capacity(buffer_size(size))); + } + return {}; +} + +Vector& Decoder::get_temp_buffer(u8 plane) +{ + return m_buffers.intermediate[plane]; +} + +Vector& Decoder::get_output_buffer(u8 plane) +{ + return m_buffers.output[plane]; +} + +Vector const& Decoder::get_output_buffer_for_plane(u8 plane) const +{ + return m_buffers.output[plane]; +} + +Gfx::Size Decoder::get_y_plane_size() +{ + return m_parser->get_decoded_size_for_plane(0); +} + +bool Decoder::get_uv_subsampling_y() +{ + return m_parser->m_subsampling_y; +} + +bool Decoder::get_uv_subsampling_x() +{ + return m_parser->m_subsampling_x; +} + u8 Decoder::merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor) { auto total_decode_count = count_0 + count_1; @@ -152,22 +215,1111 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2]) return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR); } -DecoderErrorOr Decoder::predict_intra(size_t, u32, u32, bool, bool, bool, TXSize, u32) +inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride) +{ + return row * stride + column; +} + +DecoderErrorOr Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index) +{ + auto& frame_buffer = get_output_buffer(plane); + + // 8.5.1 Intra prediction process + + // The intra prediction process is invoked for intra coded blocks to predict a part of the block corresponding to a + // transform block. When the transform size is smaller than the block size, this process can be invoked multiple + // times within a single block for the same plane, and the invocations are in raster order within the block. + + // The variable mode is specified by: + // 1. If plane is greater than 0, mode is set equal to uv_mode. + // 2. Otherwise, if MiSize is greater than or equal to BLOCK_8X8, mode is set equal to y_mode. + // 3. Otherwise, mode is set equal to sub_modes[ blockIdx ]. + IntraMode mode; + if (plane > 0) + mode = static_cast(m_parser->m_uv_mode); + else if (m_parser->m_mi_size >= Block_8x8) + mode = static_cast(m_parser->m_y_mode); + else + mode = static_cast(m_parser->m_block_sub_modes[block_index]); + + // The variable log2Size specifying the base 2 logarithm of the width of the transform block is set equal to txSz + 2. + u8 log2_of_block_size = tx_size + 2; + // The variable size is set equal to 1 << log2Size. + u8 block_size = 1 << log2_of_block_size; + + // The variable maxX is set equal to (MiCols * 8) - 1. + // The variable maxY is set equal to (MiRows * 8) - 1. + // If plane is greater than 0, then: + // − maxX is set equal to ((MiCols * 8) >> subsampling_x) - 1. + // − maxY is set equal to ((MiRows * 8) >> subsampling_y) - 1. + auto subsampling_x = plane > 0 ? m_parser->m_subsampling_x : false; + auto subsampling_y = plane > 0 ? m_parser->m_subsampling_y : false; + auto max_x = ((m_parser->m_mi_cols * 8u) >> subsampling_x) - 1u; + auto max_y = ((m_parser->m_mi_rows * 8u) >> subsampling_y) - 1u; + + auto const frame_buffer_at = [&](u32 row, u32 column) -> u16& { + const auto frame_stride = max_x + 1u; + return frame_buffer[index_from_row_and_column(row, column, frame_stride)]; + }; + + // The array aboveRow[ i ] for i = 0..size-1 is specified by: + // .. + // The array aboveRow[ i ] for i = size..2*size-1 is specified by: + // .. + // The array aboveRow[ i ] for i = -1 is specified by: + // .. + + // NOTE: above_row is an array ranging from 0 to (2*block_size). + // There are three sections to the array: + // - [0] + // - [1 .. block_size] + // - [block_size + 1 .. block_size * 2] + // The array indices must be offset by 1 to accomodate index -1. + Vector& above_row = m_buffers.above_row; + DECODER_TRY_ALLOC(above_row.try_resize_and_keep_capacity(block_size * 2 + 1)); + auto above_row_at = [&](i32 index) -> Intermediate& { + return above_row[index + 1]; + }; + + // NOTE: This value is pre-calculated since it is reused in spec below. + // Use this to replace spec text "(1<<(BitDepth-1))". + auto half_sample_value = (1u << (m_parser->m_bit_depth - 1u)); + + // The array aboveRow[ i ] for i = 0..size-1 is specified by: + if (!have_above) { + // 1. If haveAbove is equal to 0, aboveRow[ i ] is set equal to (1<<(BitDepth-1)) - 1. + // FIXME: Use memset? + for (auto i = 0u; i < block_size; i++) + above_row_at(i) = half_sample_value - 1; + } else { + // 2. Otherwise, aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ]. + for (auto i = 0u; i < block_size; i++) + above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i)); + } + + // The array aboveRow[ i ] for i = size..2*size-1 is specified by: + if (have_above && not_on_right && tx_size == TXSize::TX_4x4) { + // 1. If haveAbove is equal to 1 and notOnRight is equal to 1 and txSz is equal to 0, + // aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ]. + for (auto i = block_size; i < block_size * 2; i++) + above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i)); + } else { + // 2. Otherwise, aboveRow[ i ] is set equal to aboveRow[ size-1 ]. + for (auto i = block_size; i < block_size * 2; i++) + above_row_at(i) = above_row_at(block_size - 1); + } + + // The array aboveRow[ i ] for i = -1 is specified by: + if (have_above && have_left) { + // 1. If haveAbove is equal to 1 and haveLeft is equal to 1, aboveRow[ -1 ] is set equal to + // CurrFrame[ plane ][ y-1 ][ Min(maxX, x-1) ]. + above_row_at(-1) = frame_buffer_at(y - 1, min(max_x, x - 1)); + } else if (have_above) { + // 2. Otherwise if haveAbove is equal to 1, aboveRow[ -1] is set equal to (1<<(BitDepth-1)) + 1. + above_row_at(-1) = half_sample_value + 1; + } else { + // 3. Otherwise, aboveRow[ -1 ] is set equal to (1<<(BitDepth-1)) - 1 + above_row_at(-1) = half_sample_value - 1; + } + + // The array leftCol[ i ] for i = 0..size-1 is specified by: + Vector& left_column = m_buffers.left_column; + DECODER_TRY_ALLOC(left_column.try_resize_and_keep_capacity(block_size)); + if (have_left) { + // − If haveLeft is equal to 1, leftCol[ i ] is set equal to CurrFrame[ plane ][ Min(maxY, y+i) ][ x-1 ]. + for (auto i = 0u; i < block_size; i++) + left_column[i] = frame_buffer_at(min(max_y, y + i), x - 1); + } else { + // − Otherwise, leftCol[ i ] is set equal to (1<<(BitDepth-1)) + 1. + for (auto i = 0u; i < block_size; i++) + left_column[i] = half_sample_value + 1; + } + + // A 2D array named pred containing the intra predicted samples is constructed as follows: + Vector& predicted_samples = m_buffers.predicted_samples; + DECODER_TRY_ALLOC(predicted_samples.try_resize_and_keep_capacity(block_size * block_size)); + auto const predicted_sample_at = [&](u32 row, u32 column) -> Intermediate& { + return predicted_samples[index_from_row_and_column(row, column, block_size)]; + }; + + // FIXME: One of the two below should be a simple memcpy of 1D arrays. + switch (mode) { + case IntraMode::VPred: + // − If mode is equal to V_PRED, pred[ i ][ j ] is set equal to aboveRow[ j ] with j = 0..size-1 and i = 0..size-1 + // (each row of the block is filled with a copy of aboveRow). + for (auto j = 0u; j < block_size; j++) { + for (auto i = 0u; i < block_size; i++) + predicted_sample_at(i, j) = above_row_at(j); + } + break; + case IntraMode::HPred: + // − Otherwise if mode is equal to H_PRED, pred[ i ][ j ] is set equal to leftCol[ i ] with j = 0..size-1 and i = + // 0..size-1 (each column of the block is filled with a copy of leftCol). + for (auto j = 0u; j < block_size; j++) { + for (auto i = 0u; i < block_size; i++) + predicted_sample_at(i, j) = left_column[i]; + } + break; + case IntraMode::D207Pred: + // − Otherwise if mode is equal to D207_PRED, the following applies: + // 1. pred[ size - 1 ][ j ] = leftCol[ size - 1] for j = 0..size-1 + for (auto j = 0u; j < block_size; j++) + predicted_sample_at(block_size - 1, j) = left_column[block_size - 1]; + // 2. pred[ i ][ 0 ] = Round2( leftCol[ i ] + leftCol[ i + 1 ], 1 ) for i = 0..size-2 + for (auto i = 0u; i < block_size - 1u; i++) + predicted_sample_at(i, 0) = round_2(left_column[i] + left_column[i + 1], 1); + // 3. pred[ i ][ 1 ] = Round2( leftCol[ i ] + 2 * leftCol[ i + 1 ] + leftCol[ i + 2 ], 2 ) for i = 0..size-3 + for (auto i = 0u; i < block_size - 2u; i++) + predicted_sample_at(i, 1) = round_2(left_column[i] + (2 * left_column[i + 1]) + left_column[i + 2], 2); + // 4. pred[ size - 2 ][ 1 ] = Round2( leftCol[ size - 2 ] + 3 * leftCol[ size - 1 ], 2 ) + predicted_sample_at(block_size - 2, 1) = round_2(left_column[block_size - 2] + (3 * left_column[block_size - 1]), 2); + // 5. pred[ i ][ j ] = pred[ i + 1 ][ j - 2 ] for i = (size-2)..0, for j = 2..size-1 + // NOTE – In the last step i iterates in reverse order. + for (auto i = block_size - 2u;;) { + for (auto j = 2u; j < block_size; j++) + predicted_sample_at(i, j) = predicted_sample_at(i + 1, j - 2); + if (i == 0) + break; + i--; + } + break; + case IntraMode::D45Pred: + // Otherwise if mode is equal to D45_PRED, + // for i = 0..size-1, for j = 0..size-1. + for (auto i = 0u; i < block_size; i++) { + for (auto j = 0; j < block_size; j++) { + // pred[ i ][ j ] is set equal to (i + j + 2 < size * 2) ? + if (i + j + 2 < block_size * 2) + // Round2( aboveRow[ i + j ] + aboveRow[ i + j + 1 ] * 2 + aboveRow[ i + j + 2 ], 2 ) : + predicted_sample_at(i, j) = round_2(above_row_at(i + j) + above_row_at(i + j + 1) * 2 + above_row_at(i + j + 2), 2); + else + // aboveRow[ 2 * size - 1 ] + predicted_sample_at(i, j) = above_row_at(2 * block_size - 1); + } + } + break; + case IntraMode::D63Pred: + // Otherwise if mode is equal to D63_PRED, + for (auto i = 0u; i < block_size; i++) { + for (auto j = 0u; j < block_size; j++) { + // i/2 + j + auto row_index = (i / 2) + j; + // pred[ i ][ j ] is set equal to (i & 1) ? + if (i & 1) + // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ] * 2 + aboveRow[ i/2 + j + 2 ], 2 ) : + predicted_sample_at(i, j) = round_2(above_row_at(row_index) + above_row_at(row_index + 1) * 2 + above_row_at(row_index + 2), 2); + else + // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ], 1 ) for i = 0..size-1, for j = 0..size-1. + predicted_sample_at(i, j) = round_2(above_row_at(row_index) + above_row_at(row_index + 1), 1); + } + } + break; + case IntraMode::D117Pred: + // Otherwise if mode is equal to D117_PRED, the following applies: + // 1. pred[ 0 ][ j ] = Round2( aboveRow[ j - 1 ] + aboveRow[ j ], 1 ) for j = 0..size-1 + for (auto j = 0; j < block_size; j++) + predicted_sample_at(0, j) = round_2(above_row_at(j - 1) + above_row_at(j), 1); + // 2. pred[ 1 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) + predicted_sample_at(1, 0) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); + // 3. pred[ 1 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1 + for (auto j = 1; j < block_size; j++) + predicted_sample_at(1, j) = round_2(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2); + // 4. pred[ 2 ][ 0 ] = Round2( aboveRow[ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) + predicted_sample_at(2, 0) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); + // 5. pred[ i ][ 0 ] = Round2( leftCol[ i - 3 ] + 2 * leftCol[ i - 2 ] + leftCol[ i - 1 ], 2 ) for i = 3..size-1 + for (auto i = 3u; i < block_size; i++) + predicted_sample_at(i, 0) = round_2(left_column[i - 3] + 2 * left_column[i - 2] + left_column[i - 1], 2); + // 6. pred[ i ][ j ] = pred[ i - 2 ][ j - 1 ] for i = 2..size-1, for j = 1..size-1 + for (auto i = 2u; i < block_size; i++) { + for (auto j = 1u; j < block_size; j++) + predicted_sample_at(i, j) = predicted_sample_at(i - 2, j - 1); + } + break; + case IntraMode::D135Pred: + // Otherwise if mode is equal to D135_PRED, the following applies: + // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) + predicted_sample_at(0, 0) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); + // 2. pred[ 0 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1 + for (auto j = 1; j < block_size; j++) + predicted_sample_at(0, j) = round_2(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2); + // 3. pred[ 1 ][ 0 ] = Round2( aboveRow [ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) for i = 1..size-1 + predicted_sample_at(1, 0) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); + // 4. pred[ i ][ 0 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1 + for (auto i = 2u; i < block_size; i++) + predicted_sample_at(i, 0) = round_2(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2); + // 5. pred[ i ][ j ] = pred[ i - 1 ][ j - 1 ] for i = 1..size-1, for j = 1..size-1 + for (auto i = 1u; i < block_size; i++) { + for (auto j = 1; j < block_size; j++) + predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 1); + } + break; + case IntraMode::D153Pred: + // Otherwise if mode is equal to D153_PRED, the following applies: + // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + aboveRow[ -1 ], 1 ) + predicted_sample_at(0, 0) = round_2(left_column[0] + above_row_at(-1), 1); + // 2. pred[ i ][ 0 ] = Round2( leftCol[ i - 1] + leftCol[ i ], 1 ) for i = 1..size-1 + for (auto i = 1u; i < block_size; i++) + predicted_sample_at(i, 0) = round_2(left_column[i - 1] + left_column[i], 1); + // 3. pred[ 0 ][ 1 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) + predicted_sample_at(0, 1) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); + // 4. pred[ 1 ][ 1 ] = Round2( aboveRow[ -1 ] + 2 * leftCol [ 0 ] + leftCol [ 1 ], 2 ) + predicted_sample_at(1, 1) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); + // 5. pred[ i ][ 1 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1 + for (auto i = 2u; i < block_size; i++) + predicted_sample_at(i, 1) = round_2(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2); + // 6. pred[ 0 ][ j ] = Round2( aboveRow[ j - 3 ] + 2 * aboveRow[ j - 2 ] + aboveRow[ j - 1 ], 2 ) for j = 2..size-1 + for (auto j = 2; j < block_size; j++) + predicted_sample_at(0, j) = round_2(above_row_at(j - 3) + 2 * above_row_at(j - 2) + above_row_at(j - 1), 2); + // 7. pred[ i ][ j ] = pred[ i - 1 ][ j - 2 ] for i = 1..size-1, for j = 2..size-1 + for (auto i = 1u; i < block_size; i++) { + for (auto j = 2u; j < block_size; j++) + predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 2); + } + break; + case IntraMode::TmPred: + // Otherwise if mode is equal to TM_PRED, + // pred[ i ][ j ] is set equal to Clip1( aboveRow[ j ] + leftCol[ i ] - aboveRow[ -1 ] ) + // for i = 0..size-1, for j = 0..size-1. + for (auto i = 0u; i < block_size; i++) { + for (auto j = 0u; j < block_size; j++) + predicted_sample_at(i, j) = clip_1(m_parser->m_bit_depth, above_row_at(j) + left_column[i] - above_row_at(-1)); + } + break; + case IntraMode::DcPred: { + // FIXME: All indices are set equally below, use memset. + Intermediate average = 0; + + if (have_left && have_above) { + // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 1, + // The variable avg (the average of the samples in union of aboveRow and leftCol) + // is specified as follows: + // sum = 0 + // for ( k = 0; k < size; k++ ) { + // sum += leftCol[ k ] + // sum += aboveRow[ k ] + // } + // avg = (sum + size) >> (log2Size + 1) + Intermediate sum = 0; + for (auto k = 0u; k < block_size; k++) { + sum += left_column[k]; + sum += above_row_at(k); + } + average = (sum + block_size) >> (log2_of_block_size + 1); + } else if (have_left && !have_above) { + // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 0, + // The variable leftAvg is specified as follows: + // sum = 0 + // for ( k = 0; k < size; k++ ) { + // sum += leftCol[ k ] + // } + // leftAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size + Intermediate sum = 0; + for (auto k = 0u; k < block_size; k++) + sum += left_column[k]; + average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size; + } else if (!have_left && have_above) { + // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 0 and haveAbove is equal to 1, + // The variable aboveAvg is specified as follows: + // sum = 0 + // for ( k = 0; k < size; k++ ) { + // sum += aboveRow[ k ] + // } + // aboveAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size + Intermediate sum = 0; + for (auto k = 0u; k < block_size; k++) + sum += above_row_at(k); + average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size; + } else { + // Otherwise (mode is DC_PRED), + // pred[ i ][ j ] is set equal to 1<<(BitDepth - 1) with i = 0..size-1 and j = 0..size-1. + average = 1 << (m_parser->m_bit_depth - 1); + } + + // pred[ i ][ j ] is set equal to avg with i = 0..size-1 and j = 0..size-1. + for (auto i = 0u; i < block_size; i++) { + for (auto j = 0u; j < block_size; j++) + predicted_sample_at(i, j) = average; + } + break; + } + default: + dbgln("Unknown prediction mode {}", static_cast(mode)); + VERIFY_NOT_REACHED(); + } + + // The current frame is updated as follows: + // − CurrFrame[ plane ][ y + i ][ x + j ] is set equal to pred[ i ][ j ] for i = 0..size-1 and j = 0..size-1. + auto width_in_frame_buffer = min(static_cast(block_size), max_x - x + 1); + auto height_in_frame_buffer = min(static_cast(block_size), max_y - y + 1); + + for (auto i = 0u; i < height_in_frame_buffer; i++) { + for (auto j = 0u; j < width_in_frame_buffer; j++) + frame_buffer_at(y + i, x + j) = predicted_sample_at(i, j); + } + + return {}; +} + +DecoderErrorOr Decoder::predict_inter(u8, u32, u32, u32, u32, u32) { // TODO: Implement return DecoderError::not_implemented(); } -DecoderErrorOr Decoder::predict_inter(size_t, u32, u32, u32, u32, u32) +u16 Decoder::dc_q(u8 b) { - // TODO: Implement + // The function dc_q( b ) is specified as dc_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where dc_lookup is + // defined as follows: + static const u16 dc_qlookup[3][256] = { + { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336 }, + { 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347 }, + { 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387 } + }; + + return dc_qlookup[(m_parser->m_bit_depth - 8) >> 1][clip_3(0, 255, b)]; +} + +u16 Decoder::ac_q(u8 b) +{ + // The function ac_q( b ) is specified as ac_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where ac_lookup is + // defined as follows: + static const u16 ac_qlookup[3][256] = { + { 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828 }, + { 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312 }, + { 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247 } + }; + + return ac_qlookup[(m_parser->m_bit_depth - 8) >> 1][clip_3(0, 255, b)]; +} + +u8 Decoder::get_qindex() +{ + // The function get_qindex( ) returns the quantizer index for the current block and is specified by the following: + // − If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply: + if (m_parser->seg_feature_active(SEG_LVL_ALT_Q)) { + // 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ]. + auto data = m_parser->m_feature_data[m_parser->m_segment_id][SEG_LVL_ALT_Q]; + + // 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data + if (!m_parser->m_segmentation_abs_or_delta_update) { + data += m_parser->m_base_q_idx; + } + + // 3. Return Clip3( 0, 255, data ). + return clip_3(0, 255, data); + } + + // − Otherwise, return base_q_idx. + return m_parser->m_base_q_idx; +} + +u16 Decoder::get_dc_quant(u8 plane) +{ + // The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and + // is derived as follows: + // − If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ). + // − Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ). + // Instead of if { return }, select the value to add and return. + i8 offset = plane == 0 ? m_parser->m_delta_q_y_dc : m_parser->m_delta_q_uv_dc; + return dc_q(static_cast(get_qindex() + offset)); +} + +u16 Decoder::get_ac_quant(u8 plane) +{ + // The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and + // is derived as follows: + // − If plane is equal to 0, return ac_q( get_qindex( ) ). + // − Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ). + // Instead of if { return }, select the value to add and return. + i8 offset = plane == 0 ? 0 : m_parser->m_delta_q_uv_ac; + return ac_q(static_cast(get_qindex() + offset)); +} + +DecoderErrorOr Decoder::reconstruct(u8 plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size) +{ + // 8.6.2 Reconstruct process + + // The variable dqDenom is set equal to 2 if txSz is equal to TX_32X32, otherwise dqDenom is set equal to 1. + Intermediate dq_denominator = transform_block_size == TX_32x32 ? 2 : 1; + // The variable n (specifying the base 2 logarithm of the width of the transform block) is set equal to 2 + txSz. + u8 log2_of_block_size = 2u + transform_block_size; + // The variable n0 (specifying the width of the transform block) is set equal to 1 << n. + auto block_size = 1u << log2_of_block_size; + + // 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom + // for i = 0..(n0-1), for j = 0..(n0-1) + Vector& dequantized = m_buffers.dequantized; + DECODER_TRY_ALLOC(dequantized.try_resize_and_keep_capacity(buffer_size(block_size, block_size))); + Intermediate ac_quant = get_ac_quant(plane); + for (auto i = 0u; i < block_size; i++) { + for (auto j = 0u; j < block_size; j++) { + auto index = index_from_row_and_column(i, j, block_size); + if (index == 0) + continue; + dequantized[index] = (m_parser->m_tokens[index] * ac_quant) / dq_denominator; + } + } + + // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom + dequantized[0] = (m_parser->m_tokens[0] * get_dc_quant(plane)) / dq_denominator; + + // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2 + // are representable by a signed integer with 8 + BitDepth bits. + for (auto i = 0u; i < block_size * block_size; i++) + VERIFY(check_intermediate_bounds(dequantized[i])); + + // 3. Invoke the 2D inverse transform block process defined in section 8.7.2 with the variable n as input. + // The inverse transform outputs are stored back to the Dequant buffer. + TRY(inverse_transform_2d(dequantized, log2_of_block_size)); + + // 4. CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Clip1( CurrFrame[ plane ][ y + i ][ x + j ] + Dequant[ i ][ j ] ) + // for i = 0..(n0-1) and j = 0..(n0-1). + auto& current_buffer = get_output_buffer(plane); + auto subsampling_x = (plane > 0 ? m_parser->m_subsampling_x : 0); + auto subsampling_y = (plane > 0 ? m_parser->m_subsampling_y : 0); + auto frame_width = (m_parser->m_mi_cols * 8) >> subsampling_x; + auto frame_height = (m_parser->m_mi_rows * 8) >> subsampling_y; + auto width_in_frame_buffer = min(block_size, frame_width - transform_block_x); + auto height_in_frame_buffer = min(block_size, frame_height - transform_block_y); + + for (auto i = 0u; i < height_in_frame_buffer; i++) { + for (auto j = 0u; j < width_in_frame_buffer; j++) { + auto index = index_from_row_and_column(transform_block_y + i, transform_block_x + j, frame_width); + auto dequantized_value = dequantized[index_from_row_and_column(i, j, block_size)]; + current_buffer[index] = clip_1(m_parser->m_bit_depth, current_buffer[index] + dequantized_value); + } + } + + return {}; +} + +inline DecoderErrorOr Decoder::inverse_walsh_hadamard_transform(Vector& data, u8 log2_of_block_size, u8 shift) +{ + (void)data; + (void)shift; + // The input to this process is a variable shift that specifies the amount of pre-scaling. + // This process does an in-place transform of the array T (of length 4) by the following ordered steps: + if (1 << log2_of_block_size != 4) + return DecoderError::corrupted("Block size was not 4"sv); + return DecoderError::not_implemented(); } -DecoderErrorOr Decoder::reconstruct(size_t, u32, u32, TXSize) +inline i32 Decoder::cos64(u8 angle) { - // TODO: Implement - return DecoderError::not_implemented(); + const i32 cos64_lookup[33] = { 16384, 16364, 16305, 16207, 16069, 15893, 15679, 15426, 15137, 14811, 14449, 14053, 13623, 13160, 12665, 12140, 11585, 11003, 10394, 9760, 9102, 8423, 7723, 7005, 6270, 5520, 4756, 3981, 3196, 2404, 1606, 804, 0 }; + + // 1. Set a variable angle2 equal to angle & 127. + angle &= 127; + // 2. If angle2 is greater than or equal to 0 and less than or equal to 32, return cos64_lookup[ angle2 ]. + if (angle <= 32) + return cos64_lookup[angle]; + // 3. If angle2 is greater than 32 and less than or equal to 64, return cos64_lookup[ 64 - angle2 ] * -1. + if (angle <= 64) + return -cos64_lookup[64 - angle]; + // 4. If angle2 is greater than 64 and less than or equal to 96, return cos64_lookup[ angle2 - 64 ] * -1. + if (angle <= 96) + return -cos64_lookup[angle - 64]; + // 5. Otherwise (if angle2 is greater than 96 and less than 128), return cos64_lookup[ 128 - angle2 ]. + return cos64_lookup[128 - angle]; +} + +inline i32 Decoder::sin64(u8 angle) +{ + if (angle < 32) + angle += 128; + return cos64(angle - 32u); +} + +template +inline i32 Decoder::round_2(T value, u8 bits) +{ + value = (value + static_cast(1u << (bits - 1u))) >> bits; + return static_cast(value); +} + +inline bool check_bounds(i64 value, u8 bits) +{ + const i64 maximum = (1u << (bits - 1u)) - 1u; + return value >= ~maximum && value <= maximum; +} + +inline bool Decoder::check_intermediate_bounds(Intermediate value) +{ + i32 maximum = (1 << (8 + m_parser->m_bit_depth - 1)) - 1; + return value >= ~maximum && value <= maximum; +} + +// (8.7.1.1) The function B( a, b, angle, 0 ) performs a butterfly rotation. +inline void Decoder::butterfly_rotation_in_place(Vector& data, size_t index_a, size_t index_b, u8 angle, bool flip) +{ + auto cos = cos64(angle); + auto sin = sin64(angle); + // 1. The variable x is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ). + i64 rotated_a = data[index_a] * cos - data[index_b] * sin; + // 2. The variable y is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ). + i64 rotated_b = data[index_a] * sin + data[index_b] * cos; + // 3. T[ a ] is set equal to Round2( x, 14 ). + data[index_a] = round_2(rotated_a, 14); + // 4. T[ b ] is set equal to Round2( y, 14 ). + data[index_b] = round_2(rotated_b, 14); + + // The function B( a ,b, angle, 1 ) performs a butterfly rotation and flip specified by the following ordered steps: + // 1. The function B( a, b, angle, 0 ) is invoked. + // 2. The contents of T[ a ] and T[ b ] are exchanged. + if (flip) + swap(data[index_a], data[index_b]); + + // It is a requirement of bitstream conformance that the values saved into the array T by this function are + // representable by a signed integer using 8 + BitDepth bits of precision. + VERIFY(check_intermediate_bounds(data[index_a])); + VERIFY(check_intermediate_bounds(data[index_b])); +} + +// (8.7.1.1) The function H( a, b, 0 ) performs a Hadamard rotation. +inline void Decoder::hadamard_rotation_in_place(Vector& data, size_t index_a, size_t index_b, bool flip) +{ + // The function H( a, b, 1 ) performs a Hadamard rotation with flipped indices and is specified as follows: + // 1. The function H( b, a, 0 ) is invoked. + if (flip) + swap(index_a, index_b); + + // The function H( a, b, 0 ) performs a Hadamard rotation specified by the following ordered steps: + + // 1. The variable x is set equal to T[ a ]. + auto a_value = data[index_a]; + // 2. The variable y is set equal to T[ b ]. + auto b_value = data[index_b]; + // 3. T[ a ] is set equal to x + y. + data[index_a] = a_value + b_value; + // 4. T[ b ] is set equal to x - y. + data[index_b] = a_value - b_value; + + // It is a requirement of bitstream conformance that the values saved into the array T by this function are + // representable by a signed integer using 8 + BitDepth bits of precision. + VERIFY(check_intermediate_bounds(data[index_a])); + VERIFY(check_intermediate_bounds(data[index_b])); +} + +inline DecoderErrorOr Decoder::inverse_discrete_cosine_transform_array_permutation(Vector& data, u8 log2_of_block_size) +{ + u8 block_size = 1 << log2_of_block_size; + + // This process performs an in-place permutation of the array T of length 2^n for 2 ≤ n ≤ 5 which is required before + // execution of the inverse DCT process. + if (log2_of_block_size < 2 || log2_of_block_size > 5) + return DecoderError::corrupted("Block size was out of range"sv); + + // 1.1. A temporary array named copyT is set equal to T. + Vector& data_copy = m_buffers.transform_temp; + data_copy.clear_with_capacity(); + DECODER_TRY_ALLOC(data_copy.try_resize_and_keep_capacity(buffer_size(block_size, block_size))); + data_copy = data; + + // 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1< Decoder::inverse_discrete_cosine_transform(Vector& data, u8 log2_of_block_size) +{ + // 2.1. The variable n0 is set equal to 1<> 1; + // 2.3 The variable n2 is set equal to 1<<(n-2). + u8 quarter_block_size = half_block_size >> 1; + // 2.4 The variable n3 is set equal to 1<<(n-3). + u8 eighth_block_size = quarter_block_size >> 1; + + // 2.5 If n is equal to 2, invoke B( 0, 1, 16, 1 ), otherwise recursively invoke the inverse DCT defined in this + // section with the variable n set equal to n - 1. + if (log2_of_block_size == 2) + butterfly_rotation_in_place(data, 0, 1, 16, true); + else + TRY(inverse_discrete_cosine_transform(data, log2_of_block_size - 1)); + + // 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1). + for (auto i = 0u; i < quarter_block_size; i++) { + auto index = half_block_size + i; + butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev(5, index), false); + } + + // 2.7 If n is greater than or equal to 3: + if (log2_of_block_size >= 3) { + // a. Invoke H( n1+4*i+2*j, n1+1+4*i+2*j, j ) for i = 0..(n3-1), j = 0..1. + for (auto i = 0u; i < eighth_block_size; i++) { + for (auto j = 0u; j < 2; j++) { + auto index = half_block_size + (4 * i) + (2 * j); + hadamard_rotation_in_place(data, index, index + 1, j); + } + } + } + + // 4. If n is equal to 5: + if (log2_of_block_size == 5) { + // a. Invoke B( n0-n+3-n2*j-4*i, n1+n-4+n2*j+4*i, 28-16*i+56*j, 1 ) for i = 0..1, j = 0..1. + for (auto i = 0u; i < 2; i++) { + for (auto j = 0u; j < 2; j++) { + auto index_a = block_size - log2_of_block_size + 3 - (quarter_block_size * j) - (4 * i); + auto index_b = half_block_size + log2_of_block_size - 4 + (quarter_block_size * j) + (4 * i); + auto angle = 28 - (16 * i) + (56 * j); + butterfly_rotation_in_place(data, index_a, index_b, angle, true); + } + } + + // b. Invoke H( n1+n3*j+i, n1+n2-5+n3*j-i, j&1 ) for i = 0..1, j = 0..3. + for (auto i = 0u; i < 2; i++) { + for (auto j = 0u; j < 4; j++) { + auto index_a = half_block_size + (eighth_block_size * j) + i; + auto index_b = half_block_size + quarter_block_size - 5 + (eighth_block_size * j) - i; + hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0); + } + } + } + + // 5. If n is greater than or equal to 4: + if (log2_of_block_size >= 4) { + // a. Invoke B( n0-n+2-i-n2*j, n1+n-3+i+n2*j, 24+48*j, 1 ) for i = 0..(n==5), j = 0..1. + for (auto i = 0u; i <= (log2_of_block_size == 5); i++) { + for (auto j = 0u; j < 2; j++) { + auto index_a = block_size - log2_of_block_size + 2 - i - (quarter_block_size * j); + auto index_b = half_block_size + log2_of_block_size - 3 + i + (quarter_block_size * j); + butterfly_rotation_in_place(data, index_a, index_b, 24 + (48 * j), true); + } + } + + // b. Invoke H( n1+n2*j+i, n1+n2-1+n2*j-i, j&1 ) for i = 0..(2n-7), j = 0..1. + for (auto i = 0u; i < (2 * log2_of_block_size) - 6u; i++) { + for (auto j = 0u; j < 2; j++) { + auto index_a = half_block_size + (quarter_block_size * j) + i; + auto index_b = half_block_size + quarter_block_size - 1 + (quarter_block_size * j) - i; + hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0); + } + } + } + + // 6. If n is greater than or equal to 3: + if (log2_of_block_size >= 3) { + // a. Invoke B( n0-n3-1-i, n1+n3+i, 16, 1 ) for i = 0..(n3-1). + for (auto i = 0u; i < eighth_block_size; i++) { + auto index_a = block_size - eighth_block_size - 1 - i; + auto index_b = half_block_size + eighth_block_size + i; + butterfly_rotation_in_place(data, index_a, index_b, 16, true); + } + } + + // 7. Invoke H( i, n0-1-i, 0 ) for i = 0..(n1-1). + for (auto i = 0u; i < half_block_size; i++) + hadamard_rotation_in_place(data, i, block_size - 1 - i, false); + + return {}; +} + +inline void Decoder::inverse_asymmetric_discrete_sine_transform_input_array_permutation(Vector& data, Vector& temp, u8 log2_of_block_size) +{ + // The variable n0 is set equal to 1<& data, Vector& temp, u8 log2_of_block_size) +{ + // A temporary array named copyT is set equal to T. + temp = data; + + // The permutation depends on n as follows: + if (log2_of_block_size == 4) { + // − If n is equal to 4, + // T[ 8*a + 4*b + 2*c + d ] is set equal to copyT[ 8*(d^c) + 4*(c^b) + 2*(b^a) + a ] for a = 0..1 + // and b = 0..1 and c = 0..1 and d = 0..1. + for (auto a = 0u; a < 2; a++) + for (auto b = 0u; b < 2; b++) + for (auto c = 0u; c < 2; c++) + for (auto d = 0u; d < 2; d++) + data[(8 * a) + (4 * b) + (2 * c) + d] = temp[8 * (d ^ c) + 4 * (c ^ b) + 2 * (b ^ a) + a]; + } else { + VERIFY(log2_of_block_size == 3); + // − Otherwise (n is equal to 3), + // T[ 4*a + 2*b + c ] is set equal to copyT[ 4*(c^b) + 2*(b^a) + a ] for a = 0..1 and + // b = 0..1 and c = 0..1. + for (auto a = 0u; a < 2; a++) + for (auto b = 0u; b < 2; b++) + for (auto c = 0u; c < 2; c++) + data[4 * a + 2 * b + c] = temp[4 * (c ^ b) + 2 * (b ^ a) + a]; + } +} + +inline void Decoder::inverse_asymmetric_discrete_sine_transform_4(Vector& data) +{ + VERIFY(data.size() == 4); + const i64 sinpi_1_9 = 5283; + const i64 sinpi_2_9 = 9929; + const i64 sinpi_3_9 = 13377; + const i64 sinpi_4_9 = 15212; + + // Steps are derived from pseudocode in (8.7.1.6): + // s0 = SINPI_1_9 * T[ 0 ] + i64 s0 = sinpi_1_9 * data[0]; + // s1 = SINPI_2_9 * T[ 0 ] + i64 s1 = sinpi_2_9 * data[0]; + // s2 = SINPI_3_9 * T[ 1 ] + i64 s2 = sinpi_3_9 * data[1]; + // s3 = SINPI_4_9 * T[ 2 ] + i64 s3 = sinpi_4_9 * data[2]; + // s4 = SINPI_1_9 * T[ 2 ] + i64 s4 = sinpi_1_9 * data[2]; + // s5 = SINPI_2_9 * T[ 3 ] + i64 s5 = sinpi_2_9 * data[3]; + // s6 = SINPI_4_9 * T[ 3 ] + i64 s6 = sinpi_4_9 * data[3]; + // v = T[ 0 ] - T[ 2 ] + T[ 3 ] + // s7 = SINPI_3_9 * v + i64 s7 = sinpi_3_9 * (data[0] - data[2] + data[3]); + + // x0 = s0 + s3 + s5 + auto x0 = s0 + s3 + s5; + // x1 = s1 - s4 - s6 + auto x1 = s1 - s4 - s6; + // x2 = s7 + auto x2 = s7; + // x3 = s2 + auto x3 = s2; + + // s0 = x0 + x3 + s0 = x0 + x3; + // s1 = x1 + x3 + s1 = x1 + x3; + // s2 = x2 + s2 = x2; + // s3 = x0 + x1 - x3 + s3 = x0 + x1 - x3; + + // T[ 0 ] = Round2( s0, 14 ) + data[0] = round_2(s0, 14); + // T[ 1 ] = Round2( s1, 14 ) + data[1] = round_2(s1, 14); + // T[ 2 ] = Round2( s2, 14 ) + data[2] = round_2(s2, 14); + // T[ 3 ] = Round2( s3, 14 ) + data[3] = round_2(s3, 14); + + // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S. + // The values in this array require higher precision to avoid overflow. Using signed integers with 24 + + // BitDepth bits of precision is enough to avoid overflow. + const u8 bits = 24 + m_parser->m_bit_depth; + VERIFY(check_bounds(data[0], bits)); + VERIFY(check_bounds(data[1], bits)); + VERIFY(check_bounds(data[2], bits)); + VERIFY(check_bounds(data[3], bits)); +} + +// The function SB( a, b, angle, 0 ) performs a butterfly rotation. +// Spec defines the source as array T, and the destination array as S. +template +inline void Decoder::butterfly_rotation(Vector& source, Vector& destination, size_t index_a, size_t index_b, u8 angle, bool flip) +{ + // The function SB( a, b, angle, 0 ) performs a butterfly rotation according to the following ordered steps: + auto cos = cos64(angle); + auto sin = sin64(angle); + // Expand to the destination buffer's precision. + D a = source[index_a]; + D b = source[index_b]; + // 1. S[ a ] is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ). + destination[index_a] = a * cos - b * sin; + // 2. S[ b ] is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ). + destination[index_b] = a * sin + b * cos; + + // The function SB( a, b, angle, 1 ) performs a butterfly rotation and flip according to the following ordered steps: + // 1. The function SB( a, b, angle, 0 ) is invoked. + // 2. The contents of S[ a ] and S[ b ] are exchanged. + if (flip) + swap(destination[index_a], destination[index_b]); +} + +// The function SH( a, b ) performs a Hadamard rotation and rounding. +// Spec defines the source array as S, and the destination array as T. +template +inline void Decoder::hadamard_rotation(Vector& source, Vector& destination, size_t index_a, size_t index_b) +{ + // Keep the source buffer's precision until rounding. + S a = source[index_a]; + S b = source[index_b]; + // 1. T[ a ] is set equal to Round2( S[ a ] + S[ b ], 14 ). + destination[index_a] = round_2(a + b, 14); + // 2. T[ b ] is set equal to Round2( S[ a ] - S[ b ], 14 ). + destination[index_b] = round_2(a - b, 14); +} + +inline DecoderErrorOr Decoder::inverse_asymmetric_discrete_sine_transform_8(Vector& data) +{ + VERIFY(data.size() == 8); + + // This process does an in-place transform of the array T using: + + // A higher precision array S for intermediate results. + Vector& high_precision_temp = m_buffers.adst_temp; + high_precision_temp.clear_with_capacity(); + DECODER_TRY_ALLOC(high_precision_temp.try_resize_and_keep_capacity(8)); + + // The following ordered steps apply: + + // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set + // equal to 3. + inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, m_buffers.transform_temp, 3); + + // 2. Invoke SB( 2*i, 1+2*i, 30-8*i, 1 ) for i = 0..3. + for (auto i = 0u; i < 4; i++) + butterfly_rotation(data, high_precision_temp, 2 * i, 1 + (2 * i), 30 - (8 * i), true); + // (8.7.1.1) NOTE - The values in array S require higher precision to avoid overflow. Using signed integers with + // 24 + BitDepth bits of precision is enough to avoid overflow. + const u8 bits = 24 + m_parser->m_bit_depth; + for (auto i = 0u; i < 8; i++) + VERIFY(check_bounds(high_precision_temp[i], bits)); + // 3. Invoke SH( i, 4+i ) for i = 0..3. + for (auto i = 0u; i < 4; i++) + hadamard_rotation(high_precision_temp, data, i, 4 + i); + + // 4. Invoke SB( 4+3*i, 5+i, 24-16*i, 1 ) for i = 0..1. + for (auto i = 0u; i < 2; i++) + butterfly_rotation(data, high_precision_temp, 4 + (3 * i), 5 + i, 24 - (16 * i), true); + // Check again that we haven't exceeded the integer bounds. + for (auto i = 0u; i < 8; i++) + VERIFY(check_bounds(high_precision_temp[i], bits)); + // 5. Invoke SH( 4+i, 6+i ) for i = 0..1. + for (auto i = 0u; i < 2; i++) + hadamard_rotation(high_precision_temp, data, 4 + i, 6 + i); + + // 6. Invoke H( i, 2+i, 0 ) for i = 0..1. + for (auto i = 0u; i < 2; i++) + hadamard_rotation_in_place(data, i, 2 + i, false); + + // 7. Invoke B( 2+4*i, 3+4*i, 16, 1 ) for i = 0..1. + for (auto i = 0u; i < 2; i++) + butterfly_rotation_in_place(data, 2 + (4 * i), 3 + (4 * i), 16, true); + + // 8. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n + // set equal to 3. + inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, m_buffers.transform_temp, 3); + + // 9. Set T[ 1+2*i ] equal to -T[ 1+2*i ] for i = 0..3. + for (auto i = 0u; i < 4; i++) { + auto index = 1 + (2 * i); + data[index] = -data[index]; + } + return {}; +} + +inline DecoderErrorOr Decoder::inverse_asymmetric_discrete_sine_transform_16(Vector& data) +{ + VERIFY(data.size() == 16); + // This process does an in-place transform of the array T using: + + // A higher precision array S for intermediate results. + Vector& high_precision_temp = m_buffers.adst_temp; + high_precision_temp.clear_with_capacity(); + DECODER_TRY_ALLOC(high_precision_temp.try_resize_and_keep_capacity(16)); + + // The following ordered steps apply: + + // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set + // equal to 4. + inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, m_buffers.transform_temp, 4); + + // 2. Invoke SB( 2*i, 1+2*i, 31-4*i, 1 ) for i = 0..7. + for (auto i = 0u; i < 8; i++) + butterfly_rotation(data, high_precision_temp, 2 * i, 1 + (2 * i), 31 - (4 * i), true); + // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S. + // The values in this array require higher precision to avoid overflow. Using signed integers with 24 + + // BitDepth bits of precision is enough to avoid overflow. + const u8 bits = 24 + m_parser->m_bit_depth; + for (auto i = 0u; i < 16; i++) + VERIFY(check_bounds(data[i], bits)); + // 3. Invoke SH( i, 8+i ) for i = 0..7. + for (auto i = 0u; i < 8; i++) + hadamard_rotation(high_precision_temp, data, i, 8 + i); + + // 4. Invoke SB( 8+2*i, 9+2*i, 28-16*i, 1 ) for i = 0..3. + for (auto i = 0u; i < 4; i++) + butterfly_rotation(data, high_precision_temp, 8 + (2 * i), 9 + (2 * i), 128 + 28 - (16 * i), true); + // Check again that we haven't exceeded the integer bounds. + for (auto i = 0u; i < 16; i++) + VERIFY(check_bounds(data[i], bits)); + // 5. Invoke SH( 8+i, 12+i ) for i = 0..3. + for (auto i = 0u; i < 4; i++) + hadamard_rotation(high_precision_temp, data, 8 + i, 12 + i); + + // 6. Invoke H( i, 4+i, 0 ) for i = 0..3. + for (auto i = 0u; i < 4; i++) + hadamard_rotation_in_place(data, i, 4 + i, false); + + // 7. Invoke SB( 4+8*i+3*j, 5+8*i+j, 24-16*j, 1 ) for i = 0..1, for j = 0..1. + for (auto i = 0u; i < 2; i++) + for (auto j = 0u; j < 2; j++) + butterfly_rotation(data, high_precision_temp, 4 + (8 * i) + (3 * j), 5 + (8 * i) + j, 24 - (16 * j), true); + // Check again that we haven't exceeded the integer bounds. + for (auto i = 0u; i < 16; i++) + VERIFY(check_bounds(data[i], bits)); + // 8. Invoke SH( 4+8*j+i, 6+8*j+i ) for i = 0..1, j = 0..1. + for (auto i = 0u; i < 2; i++) + for (auto j = 0u; j < 2; j++) + hadamard_rotation(high_precision_temp, data, 4 + (8 * j) + i, 6 + (8 * j) + i); + + // 9. Invoke H( 8*j+i, 2+8*j+i, 0 ) for i = 0..1, for j = 0..1. + for (auto i = 0u; i < 2; i++) + for (auto j = 0u; j < 2; j++) + hadamard_rotation_in_place(data, (8 * j) + i, 2 + (8 * j) + i, false); + // 10. Invoke B( 2+4*j+8*i, 3+4*j+8*i, 48+64*(i^j), 0 ) for i = 0..1, for j = 0..1. + for (auto i = 0u; i < 2; i++) + for (auto j = 0u; j < 2; j++) + butterfly_rotation_in_place(data, 2 + (4 * j) + (8 * i), 3 + (4 * j) + (8 * i), 48 + (64 * (i ^ j)), false); + + // 11. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n + // set equal to 4. + inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, m_buffers.transform_temp, 4); + + // 12. Set T[ 1+12*j+2*i ] equal to -T[ 1+12*j+2*i ] for i = 0..1, for j = 0..1. + for (auto i = 0u; i < 2; i++) { + for (auto j = 0u; j < 2; j++) { + auto index = 1 + (12 * j) + (2 * i); + data[index] = -data[index]; + } + } + return {}; +} + +inline DecoderErrorOr Decoder::inverse_asymmetric_discrete_sine_transform(Vector& data, u8 log2_of_block_size) +{ + // 8.7.1.9 Inverse ADST Process + + // This process performs an in-place inverse ADST process on the array T of size 2^n for 2 ≤ n ≤ 4. + if (log2_of_block_size < 2 || log2_of_block_size > 4) + return DecoderError::corrupted("Block size was out of range"sv); + + // The process to invoke depends on n as follows: + if (log2_of_block_size == 2) { + // − If n is equal to 2, invoke the Inverse ADST4 process specified in section 8.7.1.6. + inverse_asymmetric_discrete_sine_transform_4(data); + return {}; + } else if (log2_of_block_size == 3) { + // − Otherwise if n is equal to 3, invoke the Inverse ADST8 process specified in section 8.7.1.7. + return inverse_asymmetric_discrete_sine_transform_8(data); + } + // − Otherwise (n is equal to 4), invoke the Inverse ADST16 process specified in section 8.7.1.8. + return inverse_asymmetric_discrete_sine_transform_16(data); +} + +DecoderErrorOr Decoder::inverse_transform_2d(Vector& dequantized, u8 log2_of_block_size) +{ + // This process performs a 2D inverse transform for an array of size 2^n by 2^n stored in the 2D array Dequant. + // The input to this process is a variable n (log2_of_block_size) that specifies the base 2 logarithm of the width of the transform. + + // 1. Set the variable n0 (block_size) equal to 1 << n. + auto block_size = 1u << log2_of_block_size; + + Vector& row_or_column = m_buffers.row_or_column; + DECODER_TRY_ALLOC(row_or_column.try_resize_and_keep_capacity(block_size)); + + // 2. The row transforms with i = 0..(n0-1) are applied as follows: + for (auto i = 0u; i < block_size; i++) { + // 1. Set T[ j ] equal to Dequant[ i ][ j ] for j = 0..(n0-1). + for (auto j = 0u; j < block_size; j++) + row_or_column[j] = dequantized[index_from_row_and_column(i, j, block_size)]; + + // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal + // to 2. + if (m_parser->m_lossless) { + TRY(inverse_walsh_hadamard_transform(row_or_column, log2_of_block_size, 2)); + continue; + } + switch (m_parser->m_tx_type) { + case DCT_DCT: + case ADST_DCT: + // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to ADST_DCT, apply an inverse DCT as + // follows: + // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n. + TRY(inverse_discrete_cosine_transform_array_permutation(row_or_column, log2_of_block_size)); + // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n. + TRY(inverse_discrete_cosine_transform(row_or_column, log2_of_block_size)); + break; + case DCT_ADST: + case ADST_ADST: + // 4. Otherwise (TxType is equal to DCT_ADST or TxType is equal to ADST_ADST), invoke the inverse ADST + // process as specified in section 8.7.1.9 with input variable n. + TRY(inverse_asymmetric_discrete_sine_transform(row_or_column, log2_of_block_size)); + break; + default: + return DecoderError::corrupted("Unknown tx_type"sv); + } + + // 5. Set Dequant[ i ][ j ] equal to T[ j ] for j = 0..(n0-1). + for (auto j = 0u; j < block_size; j++) + dequantized[index_from_row_and_column(i, j, block_size)] = row_or_column[j]; + } + + // 3. The column transforms with j = 0..(n0-1) are applied as follows: + for (auto j = 0u; j < block_size; j++) { + // 1. Set T[ i ] equal to Dequant[ i ][ j ] for i = 0..(n0-1). + for (auto i = 0u; i < block_size; i++) + row_or_column[i] = dequantized[index_from_row_and_column(i, j, block_size)]; + + // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal + // to 0. + if (m_parser->m_lossless) { + TRY(inverse_walsh_hadamard_transform(row_or_column, log2_of_block_size, 2)); + continue; + } + switch (m_parser->m_tx_type) { + case DCT_DCT: + case DCT_ADST: + // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to DCT_ADST, apply an inverse DCT as + // follows: + // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n. + TRY(inverse_discrete_cosine_transform_array_permutation(row_or_column, log2_of_block_size)); + // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n. + TRY(inverse_discrete_cosine_transform(row_or_column, log2_of_block_size)); + break; + case ADST_DCT: + case ADST_ADST: + // 4. Otherwise (TxType is equal to ADST_DCT or TxType is equal to ADST_ADST), invoke the inverse ADST + // process as specified in section 8.7.1.9 with input variable n. + TRY(inverse_asymmetric_discrete_sine_transform(row_or_column, log2_of_block_size)); + break; + default: + VERIFY_NOT_REACHED(); + } + + // 5. If Lossless is equal to 1, set Dequant[ i ][ j ] equal to T[ i ] for i = 0..(n0-1). + for (auto i = 0u; i < block_size; i++) + dequantized[index_from_row_and_column(i, j, block_size)] = row_or_column[i]; + + // 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) ) + // for i = 0..(n0-1). + if (!m_parser->m_lossless) { + for (auto i = 0u; i < block_size; i++) { + auto index = index_from_row_and_column(i, j, block_size); + dequantized[index] = round_2(dequantized[index], min(6, log2_of_block_size + 2)); + } + } + } + + return {}; } DecoderErrorOr Decoder::update_reference_frames() diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.h b/Userland/Libraries/LibVideo/VP9/Decoder.h index b73b6017aa7..e32510edb0a 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.h +++ b/Userland/Libraries/LibVideo/VP9/Decoder.h @@ -23,7 +23,20 @@ public: DecoderErrorOr decode_frame(ByteBuffer const&); void dump_frame_info(); + // FIXME: These functions should be replaced by a struct that contains + // all the information needed to display a frame. + Vector const& get_output_buffer_for_plane(u8 plane) const; + Gfx::Size get_y_plane_size(); + bool get_uv_subsampling_y(); + bool get_uv_subsampling_x(); + private: + typedef i32 Intermediate; + + DecoderErrorOr allocate_buffers(); + Vector& get_temp_buffer(u8 plane); + Vector& get_output_buffer(u8 plane); + /* (8.4) Probability Adaptation Process */ u8 merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor); u8 merge_probs(int const* tree, int index, u8* probs, u8* counts, u8 count_sat, u8 max_update_factor); @@ -33,16 +46,101 @@ private: u8 adapt_prob(u8 prob, u8 counts[2]); /* (8.5) Prediction Processes */ - DecoderErrorOr predict_intra(size_t plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index); - DecoderErrorOr predict_inter(size_t plane, u32 x, u32 y, u32 w, u32 h, u32 block_index); + // (8.5.1) Intra prediction process + DecoderErrorOr predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index); + + // (8.5.1) Inter prediction process + DecoderErrorOr predict_inter(u8 plane, u32 x, u32 y, u32 w, u32 h, u32 block_index); /* (8.6) Reconstruction and Dequantization */ - DecoderErrorOr reconstruct(size_t plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size); + + u16 dc_q(u8 b); + u16 ac_q(u8 b); + // Returns the quantizer index for the current block + u8 get_qindex(); + // Returns the quantizer value for the dc coefficient for a particular plane + u16 get_dc_quant(u8 plane); + // Returns the quantizer value for the ac coefficient for a particular plane + u16 get_ac_quant(u8 plane); + + // (8.6.2) Reconstruct process + DecoderErrorOr reconstruct(u8 plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size); + + // (8.7) Inverse transform process + DecoderErrorOr inverse_transform_2d(Vector& dequantized, u8 log2_of_block_size); + + // (8.7.1) 1D Transforms + // (8.7.1.1) Butterfly functions + + inline i32 cos64(u8 angle); + inline i32 sin64(u8 angle); + // The function B( a, b, angle, 0 ) performs a butterfly rotation. + inline void butterfly_rotation_in_place(Vector& data, size_t index_a, size_t index_b, u8 angle, bool flip); + // The function H( a, b, 0 ) performs a Hadamard rotation. + inline void hadamard_rotation_in_place(Vector& data, size_t index_a, size_t index_b, bool flip); + // The function SB( a, b, angle, 0 ) performs a butterfly rotation. + // Spec defines the source as array T, and the destination array as S. + template + inline void butterfly_rotation(Vector& source, Vector& destination, size_t index_a, size_t index_b, u8 angle, bool flip); + // The function SH( a, b ) performs a Hadamard rotation and rounding. + // Spec defines the source array as S, and the destination array as T. + template + inline void hadamard_rotation(Vector& source, Vector& destination, size_t index_a, size_t index_b); + + template + inline i32 round_2(T value, u8 bits); + + // Checks whether the value is representable by a signed integer with (8 + bit_depth) bits. + inline bool check_intermediate_bounds(Intermediate value); + + // (8.7.1.10) This process does an in-place Walsh-Hadamard transform of the array T (of length 4). + inline DecoderErrorOr inverse_walsh_hadamard_transform(Vector& data, u8 log2_of_block_size, u8 shift); + + // (8.7.1.2) Inverse DCT array permutation process + inline DecoderErrorOr inverse_discrete_cosine_transform_array_permutation(Vector& data, u8 log2_of_block_size); + // (8.7.1.3) Inverse DCT process + inline DecoderErrorOr inverse_discrete_cosine_transform(Vector& data, u8 log2_of_block_size); + + // (8.7.1.4) This process performs the in-place permutation of the array T of length 2 n which is required as the first step of + // the inverse ADST. + inline void inverse_asymmetric_discrete_sine_transform_input_array_permutation(Vector& data, Vector& temp, u8 log2_of_block_size); + // (8.7.1.5) This process performs the in-place permutation of the array T of length 2 n which is required before the final + // step of the inverse ADST. + inline void inverse_asymmetric_discrete_sine_transform_output_array_permutation(Vector& data, Vector& temp, u8 log2_of_block_size); + + // (8.7.1.6) This process does an in-place transform of the array T to perform an inverse ADST. + inline void inverse_asymmetric_discrete_sine_transform_4(Vector& data); + // (8.7.1.7) This process does an in-place transform of the array T using a higher precision array S for intermediate + // results. + inline DecoderErrorOr inverse_asymmetric_discrete_sine_transform_8(Vector& data); + // (8.7.1.8) This process does an in-place transform of the array T using a higher precision array S for intermediate + // results. + inline DecoderErrorOr inverse_asymmetric_discrete_sine_transform_16(Vector& data); + // (8.7.1.9) This process performs an in-place inverse ADST process on the array T of size 2 n for 2 ≤ n ≤ 4. + inline DecoderErrorOr inverse_asymmetric_discrete_sine_transform(Vector& data, u8 log2_of_block_size); /* (8.10) Reference Frame Update Process */ DecoderErrorOr update_reference_frames(); NonnullOwnPtr m_parser; + + struct { + // FIXME: We may be able to consolidate some of these to reduce memory consumption. + Vector dequantized; + Vector row_or_column; + + // predict_intra + Vector above_row; + Vector left_column; + Vector predicted_samples; + + // transforms (dct, adst) + Vector transform_temp; + Vector adst_temp; + + Vector intermediate[3]; + Vector output[3]; + } m_buffers; }; } diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp index 3de4ab0dbab..64d68d8ff07 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.cpp +++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp @@ -5,10 +5,13 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include "Parser.h" -#include "Decoder.h" -#include "Utilities.h" #include +#include +#include + +#include "Decoder.h" +#include "Parser.h" +#include "Utilities.h" namespace Video::VP9 { @@ -23,22 +26,22 @@ Parser::Parser(Decoder& decoder) Parser::~Parser() { - cleanup_tile_allocations(); - free(m_prev_segment_ids); } void Parser::cleanup_tile_allocations() { - free(m_skips); - free(m_tx_sizes); - free(m_mi_sizes); - free(m_y_modes); - free(m_segment_ids); - free(m_ref_frames); - free(m_interp_filters); - free(m_mvs); - free(m_sub_mvs); - free(m_sub_modes); + // FIXME: Is this necessary? Data should be truncated and + // overwritten by the next tile. + m_skips.clear_with_capacity(); + m_tx_sizes.clear_with_capacity(); + m_mi_sizes.clear_with_capacity(); + m_y_modes.clear_with_capacity(); + m_segment_ids.clear_with_capacity(); + m_ref_frames.clear_with_capacity(); + m_interp_filters.clear_with_capacity(); + m_mvs.clear_with_capacity(); + m_sub_mvs.clear_with_capacity(); + m_sub_modes.clear_with_capacity(); } /* (6.1) */ @@ -63,6 +66,8 @@ DecoderErrorOr Parser::parse_frame(ByteBuffer const& frame_data) dbgln("Finished reading compressed header"); TRY_READ(m_bit_stream->exit_bool()); + TRY(m_decoder.allocate_buffers()); + TRY(decode_tiles()); TRY(refresh_probs()); @@ -335,11 +340,11 @@ DecoderErrorOr Parser::loop_filter_params() DecoderErrorOr Parser::quantization_params() { - auto base_q_idx = TRY_READ(m_bit_stream->read_f8()); - auto delta_q_y_dc = TRY(read_delta_q()); - auto delta_q_uv_dc = TRY(read_delta_q()); - auto delta_q_uv_ac = TRY(read_delta_q()); - m_lossless = base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 && delta_q_uv_ac == 0; + m_base_q_idx = TRY_READ(m_bit_stream->read_f8()); + m_delta_q_y_dc = TRY(read_delta_q()); + m_delta_q_uv_dc = TRY(read_delta_q()); + m_delta_q_uv_ac = TRY(read_delta_q()); + m_lossless = m_base_q_idx == 0 && m_delta_q_y_dc == 0 && m_delta_q_uv_dc == 0 && m_delta_q_uv_ac == 0; return {}; } @@ -441,9 +446,8 @@ void Parser::setup_past_independence() } } m_segmentation_abs_or_delta_update = false; - if (m_prev_segment_ids) - free(m_prev_segment_ids); - m_prev_segment_ids = static_cast(kmalloc_array(m_mi_rows, m_mi_cols)); + m_prev_segment_ids.clear_with_capacity(); + m_prev_segment_ids.resize_and_keep_capacity(m_mi_rows * m_mi_cols); m_loop_filter_delta_enabled = true; m_loop_filter_ref_deltas[IntraFrame] = 1; m_loop_filter_ref_deltas[LastFrame] = 0; @@ -561,8 +565,8 @@ DecoderErrorOr Parser::read_coef_probs() auto max_l = (k == 0) ? 3 : 6; for (auto l = 0; l < max_l; l++) { for (auto m = 0; m < 3; m++) { - auto& coef_probs = m_probability_tables->coef_probs()[tx_size]; - coef_probs[i][j][k][l][m] = TRY(diff_update_prob(coef_probs[i][j][k][l][m])); + auto& prob = m_probability_tables->coef_probs()[tx_size][i][j][k][l][m]; + prob = TRY(diff_update_prob(prob)); } } } @@ -748,30 +752,28 @@ void Parser::setup_compound_reference_mode() } } -void Parser::allocate_tile_data() +DecoderErrorOr Parser::allocate_tile_data() { auto dimensions = m_mi_rows * m_mi_cols; - if (dimensions == m_allocated_dimensions) - return; cleanup_tile_allocations(); - m_skips = static_cast(kmalloc_array(dimensions, sizeof(bool))); - m_tx_sizes = static_cast(kmalloc_array(dimensions, sizeof(TXSize))); - m_mi_sizes = static_cast(kmalloc_array(dimensions, sizeof(u32))); - m_y_modes = static_cast(kmalloc_array(dimensions, sizeof(u8))); - m_segment_ids = static_cast(kmalloc_array(dimensions, sizeof(u8))); - m_ref_frames = static_cast(kmalloc_array(dimensions, 2, sizeof(ReferenceFrame))); - m_interp_filters = static_cast(kmalloc_array(dimensions, sizeof(InterpolationFilter))); - m_mvs = static_cast(kmalloc_array(dimensions, 2, sizeof(MV))); - m_sub_mvs = static_cast(kmalloc_array(dimensions, 8, sizeof(MV))); - m_sub_modes = static_cast(kmalloc_array(dimensions, 4, sizeof(IntraMode))); - m_allocated_dimensions = dimensions; + DECODER_TRY_ALLOC(m_skips.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_tx_sizes.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_mi_sizes.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_y_modes.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_segment_ids.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_ref_frames.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_interp_filters.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_mvs.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_sub_mvs.try_resize_and_keep_capacity(dimensions)); + DECODER_TRY_ALLOC(m_sub_modes.try_resize_and_keep_capacity(dimensions)); + return {}; } DecoderErrorOr Parser::decode_tiles() { auto tile_cols = 1 << m_tile_cols_log2; auto tile_rows = 1 << m_tile_rows_log2; - allocate_tile_data(); + TRY(allocate_tile_data()); clear_above_context(); for (auto tile_row = 0; tile_row < tile_rows; tile_row++) { for (auto tile_col = 0; tile_col < tile_cols; tile_col++) { @@ -826,9 +828,7 @@ DecoderErrorOr Parser::decode_tile() { for (auto row = m_mi_row_start; row < m_mi_row_end; row += 8) { clear_left_context(); - m_row = row; for (auto col = m_mi_col_start; col < m_mi_col_end; col += 8) { - m_col = col; TRY(decode_partition(row, col, Block_64x64)); } } @@ -845,14 +845,16 @@ void Parser::clear_left_context() DecoderErrorOr Parser::decode_partition(u32 row, u32 col, u8 block_subsize) { if (row >= m_mi_rows || col >= m_mi_cols) - return DecoderError::corrupted("Row or column were outside valid ranges"sv); + return {}; m_block_subsize = block_subsize; m_num_8x8 = num_8x8_blocks_wide_lookup[block_subsize]; auto half_block_8x8 = m_num_8x8 >> 1; m_has_rows = (row + half_block_8x8) < m_mi_rows; m_has_cols = (col + half_block_8x8) < m_mi_cols; - + m_row = row; + m_col = col; auto partition = TRY_READ(m_tree_parser->parse_tree(SyntaxElementType::Partition)); + auto subsize = subsize_lookup[partition][block_subsize]; if (subsize < Block_8x8 || partition == PartitionNone) { TRY(decode_block(row, col, subsize)); @@ -871,15 +873,22 @@ DecoderErrorOr Parser::decode_partition(u32 row, u32 col, u8 block_subsize TRY(decode_partition(row + half_block_8x8, col + half_block_8x8, subsize)); } if (block_subsize == Block_8x8 || partition != PartitionSplit) { + auto above_context = 15 >> b_width_log2_lookup[subsize]; + auto left_context = 15 >> b_height_log2_lookup[subsize]; for (size_t i = 0; i < m_num_8x8; i++) { - m_above_partition_context[col + i] = 15 >> b_width_log2_lookup[subsize]; - m_left_partition_context[row + i] = 15 >> b_width_log2_lookup[subsize]; + m_above_partition_context[col + i] = above_context; + m_left_partition_context[row + i] = left_context; } } return {}; } -DecoderErrorOr Parser::decode_block(u32 row, u32 col, u8 subsize) +size_t Parser::get_image_index(u32 row, u32 column) +{ + return row * m_mi_cols + column; +} + +DecoderErrorOr Parser::decode_block(u32 row, u32 col, BlockSubsize subsize) { m_mi_row = row; m_mi_col = col; @@ -893,25 +902,24 @@ DecoderErrorOr Parser::decode_block(u32 row, u32 col, u8 subsize) m_skip = true; for (size_t y = 0; y < num_8x8_blocks_high_lookup[subsize]; y++) { for (size_t x = 0; x < num_8x8_blocks_wide_lookup[subsize]; x++) { - auto pos = (row + y) * m_mi_cols + (col + x); + auto pos = get_image_index(row + y, col + x); m_skips[pos] = m_skip; m_tx_sizes[pos] = m_tx_size; m_mi_sizes[pos] = m_mi_size; m_y_modes[pos] = m_y_mode; m_segment_ids[pos] = m_segment_id; for (size_t ref_list = 0; ref_list < 2; ref_list++) - m_ref_frames[(pos * 2) + ref_list] = m_ref_frame[ref_list]; + m_ref_frames[pos][ref_list] = m_ref_frame[ref_list]; if (m_is_inter) { m_interp_filters[pos] = m_interp_filter; for (size_t ref_list = 0; ref_list < 2; ref_list++) { - auto pos_with_ref_list = (pos * 2 + ref_list) * sizeof(MV); - m_mvs[pos_with_ref_list] = m_block_mvs[ref_list][3]; + m_mvs[pos][ref_list] = m_block_mvs[ref_list][3]; for (size_t b = 0; b < 4; b++) - m_sub_mvs[pos_with_ref_list * 4 + b * sizeof(MV)] = m_block_mvs[ref_list][b]; + m_sub_mvs[pos][ref_list][b] = m_block_mvs[ref_list][b]; } } else { for (size_t b = 0; b < 4; b++) - m_sub_modes[pos * 4 + b] = static_cast(m_block_sub_modes[b]); + m_sub_modes[pos][b] = static_cast(m_block_sub_modes[b]); } } } @@ -998,10 +1006,10 @@ DecoderErrorOr Parser::read_tx_size(bool allow_select) DecoderErrorOr Parser::inter_frame_mode_info() { - m_left_ref_frame[0] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1)] : IntraFrame; - m_above_ref_frame[0] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col] : IntraFrame; - m_left_ref_frame[1] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1) + 1] : None; - m_above_ref_frame[1] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col + 1] : None; + m_left_ref_frame[0] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][0] : IntraFrame; + m_above_ref_frame[0] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][0] : IntraFrame; + m_left_ref_frame[1] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][1] : None; + m_above_ref_frame[1] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][1] : None; m_left_intra = m_left_ref_frame[0] <= IntraFrame; m_above_intra = m_above_ref_frame[0] <= IntraFrame; m_left_single = m_left_ref_frame[1] <= None; @@ -1234,10 +1242,23 @@ DecoderErrorOr Parser::read_mv_component(u8) return (mv_sign ? -1 : 1) * static_cast(mag); } +Gfx::Point Parser::get_decoded_point_for_plane(u8 column, u8 row, u8 plane) +{ + if (plane == 0) + return { column * 8, row * 8 }; + return { (column * 8) >> m_subsampling_x, (row * 8) >> m_subsampling_y }; +} + +Gfx::Size Parser::get_decoded_size_for_plane(u8 plane) +{ + auto point = get_decoded_point_for_plane(m_mi_cols, m_mi_rows, plane); + return { point.x(), point.y() }; +} + DecoderErrorOr Parser::residual() { auto block_size = m_mi_size < Block_8x8 ? Block_8x8 : static_cast(m_mi_size); - for (size_t plane = 0; plane < 3; plane++) { + for (u8 plane = 0; plane < 3; plane++) { auto tx_size = (plane > 0) ? get_uv_tx_size() : m_tx_size; auto step = 1 << tx_size; auto plane_size = get_plane_block_size(block_size, plane); @@ -1274,10 +1295,8 @@ DecoderErrorOr Parser::residual() TRY(m_decoder.reconstruct(plane, start_x, start_y, tx_size)); } } - auto above_sub_context = m_above_nonzero_context[plane]; - auto left_sub_context = m_left_nonzero_context[plane]; - above_sub_context.resize_and_keep_capacity((start_x >> 2) + step); - left_sub_context.resize_and_keep_capacity((start_y >> 2) + step); + auto& above_sub_context = m_above_nonzero_context[plane]; + auto& left_sub_context = m_left_nonzero_context[plane]; for (auto i = 0; i < step; i++) { above_sub_context[(start_x >> 2) + i] = non_zero; left_sub_context[(start_y >> 2) + i] = non_zero; @@ -1378,7 +1397,7 @@ DecoderErrorOr Parser::read_coef(Token token) { auto cat = extra_bits[token][0]; auto num_extra = extra_bits[token][1]; - auto coef = extra_bits[token][2]; + u32 coef = extra_bits[token][2]; if (token == DctValCat6) { for (size_t e = 0; e < (u8)(m_bit_depth - 8); e++) { auto high_bit = TRY_READ(m_bit_stream->read_bool(255)); diff --git a/Userland/Libraries/LibVideo/VP9/Parser.h b/Userland/Libraries/LibVideo/VP9/Parser.h index 50a6dd059f4..dae569757fb 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.h +++ b/Userland/Libraries/LibVideo/VP9/Parser.h @@ -40,7 +40,7 @@ private: /* Utilities */ void clear_context(Vector& context, size_t size); void clear_context(Vector>& context, size_t outer_size, size_t inner_size); - void allocate_tile_data(); + DecoderErrorOr allocate_tile_data(); void cleanup_tile_allocations(); /* (6.1) Frame Syntax */ @@ -94,7 +94,7 @@ private: DecoderErrorOr decode_tile(); void clear_left_context(); DecoderErrorOr decode_partition(u32 row, u32 col, u8 block_subsize); - DecoderErrorOr decode_block(u32 row, u32 col, u8 subsize); + DecoderErrorOr decode_block(u32 row, u32 col, BlockSubsize subsize); DecoderErrorOr mode_info(); DecoderErrorOr intra_frame_mode_info(); DecoderErrorOr intra_segment_id(); @@ -123,6 +123,10 @@ private: DecoderErrorOr find_best_ref_mvs(int ref_list); DecoderErrorOr append_sub8x8_mvs(u8 block, u8 ref_list); DecoderErrorOr use_mv_hp(MV const& delta_mv); + size_t get_image_index(u32 row, u32 column); + + Gfx::Point get_decoded_point_for_plane(u8 row, u8 column, u8 plane); + Gfx::Size get_decoded_size_for_plane(u8 plane); u8 m_profile { 0 }; u8 m_frame_to_show_map_index { 0 }; @@ -131,8 +135,8 @@ private: u8 m_loop_filter_level { 0 }; u8 m_loop_filter_sharpness { 0 }; bool m_loop_filter_delta_enabled { false }; - FrameType m_frame_type; - FrameType m_last_frame_type; + FrameType m_frame_type { FrameType::KeyFrame }; + FrameType m_last_frame_type { FrameType::KeyFrame }; bool m_show_frame { false }; bool m_error_resilient_mode { false }; bool m_frame_is_intra { false }; @@ -157,7 +161,11 @@ private: u32 m_mi_rows { 0 }; u32 m_sb64_cols { 0 }; u32 m_sb64_rows { 0 }; - InterpolationFilter m_interpolation_filter; + InterpolationFilter m_interpolation_filter { 0xf }; + u8 m_base_q_idx { 0 }; + i8 m_delta_q_y_dc { 0 }; + i8 m_delta_q_uv_dc { 0 }; + i8 m_delta_q_uv_ac { 0 }; bool m_lossless { false }; u8 m_segmentation_tree_probs[7]; u8 m_segmentation_pred_prob[3]; @@ -184,17 +192,24 @@ private: u32 m_mi_col_end { 0 }; u32 m_mi_row { 0 }; u32 m_mi_col { 0 }; - u32 m_mi_size { 0 }; + BlockSubsize m_mi_size { 0 }; bool m_available_u { false }; bool m_available_l { false }; u8 m_segment_id { 0 }; + // FIXME: Should this be an enum? + // skip equal to 0 indicates that there may be some transform coefficients to read for this block; skip equal to 1 + // indicates that there are no transform coefficients. + // + // skip may be set to 0 even if transform blocks contain immediate end of block markers. bool m_skip { false }; u8 m_num_8x8 { 0 }; bool m_has_rows { false }; bool m_has_cols { false }; TXSize m_max_tx_size { TX_4x4 }; u8 m_block_subsize { 0 }; + // The row to use for getting partition tree probability lookups. u32 m_row { 0 }; + // The column to use for getting partition tree probability lookups. u32 m_col { 0 }; TXSize m_tx_size { TX_4x4 }; ReferenceFrame m_ref_frame[2]; @@ -228,19 +243,18 @@ private: ReferenceFrame m_comp_fixed_ref; ReferenceFrame m_comp_var_ref[2]; MV m_block_mvs[2][4]; - u8* m_prev_segment_ids { nullptr }; + Vector m_prev_segment_ids; - u32 m_allocated_dimensions { 0 }; - bool* m_skips { nullptr }; - TXSize* m_tx_sizes { nullptr }; - u32* m_mi_sizes { nullptr }; - u8* m_y_modes { nullptr }; - u8* m_segment_ids { nullptr }; - ReferenceFrame* m_ref_frames { nullptr }; - InterpolationFilter* m_interp_filters { nullptr }; - MV* m_mvs { nullptr }; - MV* m_sub_mvs { nullptr }; - IntraMode* m_sub_modes { nullptr }; + Vector m_skips; + Vector m_tx_sizes; + Vector m_mi_sizes; + Vector m_y_modes; + Vector m_segment_ids; + Vector> m_ref_frames; + Vector m_interp_filters; + Vector> m_mvs; + Vector, 2>> m_sub_mvs; + Vector> m_sub_modes; OwnPtr m_bit_stream; OwnPtr m_probability_tables; diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp index a568168c677..83c2a9f63a0 100644 --- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp @@ -212,17 +212,17 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node) u32 above_mode, left_mode; if (m_decoder.m_mi_size >= Block_8x8) { above_mode = AVAIL_U - ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2] + ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2] : DcPred; left_mode = AVAIL_L - ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1] + ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1] : DcPred; } else { if (m_idy) { above_mode = m_decoder.m_block_sub_modes[m_idx]; } else { above_mode = AVAIL_U - ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2 + m_idx] + ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2 + m_idx] : DcPred; } @@ -230,7 +230,7 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node) left_mode = m_decoder.m_block_sub_modes[m_idy * 2]; } else { left_mode = AVAIL_L - ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1 + m_idy * 2] + ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1 + m_idy * 2] : DcPred; } } @@ -544,12 +544,16 @@ u8 TreeParser::calculate_tx_size_probability(u8 node) { auto above = m_decoder.m_max_tx_size; auto left = m_decoder.m_max_tx_size; - auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col; - if (AVAIL_U && !m_decoder.m_skips[u_pos]) - above = m_decoder.m_tx_sizes[u_pos]; - auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1; - if (AVAIL_L && !m_decoder.m_skips[l_pos]) - left = m_decoder.m_tx_sizes[l_pos]; + if (AVAIL_U) { + auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col; + if (!m_decoder.m_skips[u_pos]) + above = m_decoder.m_tx_sizes[u_pos]; + } + if (AVAIL_L) { + auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1; + if (!m_decoder.m_skips[l_pos]) + left = m_decoder.m_tx_sizes[l_pos]; + } if (!AVAIL_L) left = above; if (!AVAIL_U) @@ -582,20 +586,14 @@ u8 TreeParser::calculate_interp_filter_probability(u8 node) return m_decoder.m_probability_tables->interp_filter_probs()[m_ctx][node]; } -u8 TreeParser::calculate_token_probability(u8 node) +void TreeParser::set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos) { - auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)]; - if (node < 2) - return prob; - auto x = (prob - 1) / 2; - auto& pareto_table = m_decoder.m_probability_tables->pareto_table(); - if (prob & 1) - return pareto_table[x][node - 2]; - return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1; -} + m_band = band; + m_c = c; + m_plane = plane; + m_tx_size = tx_size; + m_pos = pos; -u8 TreeParser::calculate_more_coefs_probability() -{ if (m_c == 0) { auto sx = m_plane > 0 ? m_decoder.m_subsampling_x : 0; auto sy = m_plane > 0 ? m_decoder.m_subsampling_y : 0; @@ -618,7 +616,7 @@ u8 TreeParser::calculate_more_coefs_probability() auto n = 4 << m_tx_size; auto i = m_pos / n; auto j = m_pos % n; - auto a = (i - 1) * n + j; + auto a = i > 0 ? (i - 1) * n + j : 0; auto a2 = i * n + j - 1; if (i > 0 && j > 0) { if (m_decoder.m_tx_type == DCT_ADST) { @@ -640,9 +638,25 @@ u8 TreeParser::calculate_more_coefs_probability() } m_ctx = (1 + m_decoder.m_token_cache[neighbor_0] + m_decoder.m_token_cache[neighbor_1]) >> 1; } +} + +u8 TreeParser::calculate_more_coefs_probability() +{ return m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][0]; } +u8 TreeParser::calculate_token_probability(u8 node) +{ + auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)]; + if (node < 2) + return prob; + auto x = (prob - 1) / 2; + auto& pareto_table = m_decoder.m_probability_tables->pareto_table(); + if (prob & 1) + return pareto_table[x][node - 2]; + return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1; +} + void TreeParser::count_syntax_element(SyntaxElementType type, int value) { switch (type) { diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h index 87df3a4501b..ba0bf6f987f 100644 --- a/Userland/Libraries/LibVideo/VP9/TreeParser.h +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h @@ -57,14 +57,7 @@ public: m_idy = idy; } - void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos) - { - m_band = band; - m_c = c; - m_plane = plane; - m_tx_size = tx_size; - m_pos = pos; - } + void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos); void set_start_x_and_y(u32 start_x, u32 start_y) { diff --git a/Userland/Libraries/LibVideo/VP9/Utilities.cpp b/Userland/Libraries/LibVideo/VP9/Utilities.cpp deleted file mode 100644 index 70336efc6da..00000000000 --- a/Userland/Libraries/LibVideo/VP9/Utilities.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2021, Hunter Salyer - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include "Utilities.h" - -namespace Video::VP9 { - -u8 clip_3(u8 x, u8 y, u8 z) -{ - return clamp(z, x, y); -} - -u8 round_2(u8 x, u8 n) -{ - return (x + (1 << (n - 1))) >> n; -} - -} diff --git a/Userland/Libraries/LibVideo/VP9/Utilities.h b/Userland/Libraries/LibVideo/VP9/Utilities.h index 0d64125fa6e..08b073791de 100644 --- a/Userland/Libraries/LibVideo/VP9/Utilities.h +++ b/Userland/Libraries/LibVideo/VP9/Utilities.h @@ -11,7 +11,35 @@ namespace Video::VP9 { -u8 clip_3(u8 x, u8 y, u8 z); -u8 round_2(u8 x, u8 n); +// FIXME: Once everything is working, replace this with plain clamp +// since parameter order is different +template +T clip_3(T x, T y, T z) +{ + return clamp(z, x, y); +} + +template +u16 clip_1(u8 bit_depth, T x) +{ + if (x < 0) { + return 0u; + } + const T max = (1u << bit_depth) - 1u; + if (x > max) + return max; + return x; +} + +template +inline T brev(C bit_count, T value) +{ + T result = 0; + for (C i = 0; i < bit_count; i++) { + auto bit = (value >> i) & 1; + result |= bit << (bit_count - 1 - i); + } + return result; +} }