From 074f771b596d0275c818184f24036ba9d3e3af0a Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Sat, 8 Oct 2022 18:36:57 -0500 Subject: [PATCH] LibVideo: Add VideoFrame class for decoded video frames The class is virtual and has one subclass, SubsampledYUVFrame, which is used by the VP9 decoder to return a single frame. The output_to_bitmap(Bitmap&) function can be used to set pixels on an existing bitmap of the correct size to the RGB values that should be displayed. The to_bitmap() function will allocate a new bitmap and fill it using output_to_bitmap. This new class also implements bilinear scaling of the subsampled U and V planes so that subsampled videos' colors will appear smoother. --- Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp | 2 +- Tests/LibVideo/TestVP9Decode.cpp | 2 +- Userland/Applications/VideoPlayer/main.cpp | 42 +++------ Userland/Libraries/LibVideo/CMakeLists.txt | 1 + Userland/Libraries/LibVideo/VP9/Decoder.cpp | 67 +++++++++------ Userland/Libraries/LibVideo/VP9/Decoder.h | 19 ++--- Userland/Libraries/LibVideo/VideoFrame.cpp | 95 +++++++++++++++++++++ Userland/Libraries/LibVideo/VideoFrame.h | 86 +++++++++++++++++++ 8 files changed, 244 insertions(+), 70 deletions(-) create mode 100644 Userland/Libraries/LibVideo/VideoFrame.cpp create mode 100644 Userland/Libraries/LibVideo/VideoFrame.h diff --git a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp index 4827a1f9aa8..506b8c4ab54 100644 --- a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp @@ -10,7 +10,7 @@ extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size) { Video::VP9::Decoder vp9_decoder; - if (auto decode_error = vp9_decoder.decode({ data, size }); decode_error.is_error()) + if (auto decode_error = vp9_decoder.receive_sample({ data, size }); decode_error.is_error()) return -1; return 0; } diff --git a/Tests/LibVideo/TestVP9Decode.cpp b/Tests/LibVideo/TestVP9Decode.cpp index 15462b8f1e2..edcb5558901 100644 --- a/Tests/LibVideo/TestVP9Decode.cpp +++ b/Tests/LibVideo/TestVP9Decode.cpp @@ -29,7 +29,7 @@ static void decode_video(StringView path, size_t expected_frame_count) continue; for (frame_index = 0; frame_index < block.frames().size(); frame_index++) { - MUST(vp9_decoder.decode(block.frames()[frame_index])); + MUST(vp9_decoder.receive_sample(block.frames()[frame_index])); frame_count++; } } diff --git a/Userland/Applications/VideoPlayer/main.cpp b/Userland/Applications/VideoPlayer/main.cpp index 069b173012d..5ebe2773976 100644 --- a/Userland/Applications/VideoPlayer/main.cpp +++ b/Userland/Applications/VideoPlayer/main.cpp @@ -75,47 +75,29 @@ ErrorOr serenity_main(Main::Arguments arguments) if (!optional_sample.has_value()) return; - auto result = vp9_decoder.decode(optional_sample.release_value()); + auto result = vp9_decoder.receive_sample(optional_sample.release_value()); if (result.is_error()) { outln("Error decoding frame {}: {}", frame_number, result.error().string_literal()); return; } - // FIXME: This method of output is temporary and should be replaced with an image struct - // containing the planes and their sizes. Ideally, this struct would be interpreted - // by some color conversion library and then passed to something (GL?) for output. - auto const& output_y = vp9_decoder.get_output_buffer_for_plane(0); - auto const& output_u = vp9_decoder.get_output_buffer_for_plane(1); - auto const& output_v = vp9_decoder.get_output_buffer_for_plane(2); - auto y_size = vp9_decoder.get_y_plane_size(); - auto uv_subsampling_y = vp9_decoder.get_uv_subsampling_y(); - auto uv_subsampling_x = vp9_decoder.get_uv_subsampling_x(); - Gfx::IntSize uv_size { y_size.width() >> uv_subsampling_x, y_size.height() >> uv_subsampling_y }; - auto cicp = vp9_decoder.get_cicp_color_space(); + auto frame_result = vp9_decoder.get_decoded_frame(); + if (frame_result.is_error()) { + outln("Error retrieving frame {}: {}", frame_number, frame_result.error().string_literal()); + return; + } + auto frame = frame_result.release_value(); + + auto& cicp = frame->cicp(); video_track.color_format.replace_code_points_if_specified(cicp); cicp.default_code_points_if_unspecified(Video::ColorPrimaries::BT709, Video::TransferCharacteristics::BT709, Video::MatrixCoefficients::BT709); - auto color_converter_result = Video::ColorConverter::create(vp9_decoder.get_bit_depth(), cicp); - if (color_converter_result.is_error()) { - outln("Cannot convert video colors: {}", color_converter_result.release_error().string_literal()); + auto convert_result = frame->output_to_bitmap(image); + if (convert_result.is_error()) { + outln("Error creating bitmap for frame {}: {}", frame_number, convert_result.error().string_literal()); return; } - auto color_converter = color_converter_result.release_value(); - - for (auto y_row = 0u; y_row < video_track.pixel_height; y_row++) { - auto uv_row = y_row >> uv_subsampling_y; - - for (auto y_column = 0u; y_column < video_track.pixel_width; y_column++) { - auto uv_column = y_column >> uv_subsampling_x; - - auto y = output_y[y_row * y_size.width() + y_column]; - auto u = output_u[uv_row * uv_size.width() + uv_column]; - auto v = output_v[uv_row * uv_size.width() + uv_column]; - - image->set_pixel(y_column, y_row, color_converter.convert_yuv_to_full_range_rgb(y, u, v)); - } - } image_widget->set_bitmap(image); image_widget->update(); diff --git a/Userland/Libraries/LibVideo/CMakeLists.txt b/Userland/Libraries/LibVideo/CMakeLists.txt index deffad34beb..c45c1803b41 100644 --- a/Userland/Libraries/LibVideo/CMakeLists.txt +++ b/Userland/Libraries/LibVideo/CMakeLists.txt @@ -3,6 +3,7 @@ set(SOURCES Color/ColorPrimaries.cpp Color/TransferCharacteristics.cpp MatroskaReader.cpp + VideoFrame.cpp VP9/BitStream.cpp VP9/Decoder.cpp VP9/Parser.cpp diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp index 4d8b9fa950c..b899d3e050d 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp +++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp @@ -19,7 +19,7 @@ Decoder::Decoder() { } -DecoderErrorOr Decoder::decode(Span chunk_data) +DecoderErrorOr Decoder::receive_sample(Span chunk_data) { auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data); @@ -42,9 +42,9 @@ DecoderErrorOr Decoder::decode(Span chunk_data) return {}; } -DecoderErrorOr Decoder::decode(ByteBuffer const& chunk_data) +DecoderErrorOr Decoder::receive_sample(ByteBuffer const& chunk_data) { - return decode(chunk_data.span()); + return receive_sample(chunk_data.span()); } void Decoder::dump_frame_info() @@ -57,7 +57,7 @@ inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride) return row * stride + column; } -DecoderErrorOr Decoder::decode_frame(Span frame_data) +DecoderErrorOr Decoder::decode_frame(Span frame_data) { // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax // tables include function calls indicating when the block decode processes should be triggered. @@ -130,27 +130,7 @@ Vector& Decoder::get_output_buffer(u8 plane) return m_buffers.output[plane]; } -Vector const& Decoder::get_output_buffer_for_plane(u8 plane) const -{ - return m_buffers.output[plane]; -} - -Gfx::Size Decoder::get_y_plane_size() -{ - return m_parser->get_decoded_size_for_plane(0); -} - -bool Decoder::get_uv_subsampling_y() -{ - return m_parser->m_subsampling_y; -} - -bool Decoder::get_uv_subsampling_x() -{ - return m_parser->m_subsampling_x; -} - -CodingIndependentCodePoints Decoder::get_cicp_color_space() +inline CodingIndependentCodePoints Decoder::get_cicp_color_space() { ColorPrimaries color_primaries; TransferCharacteristics transfer_characteristics; @@ -209,9 +189,42 @@ CodingIndependentCodePoints Decoder::get_cicp_color_space() return { color_primaries, transfer_characteristics, matrix_coefficients, m_parser->m_color_range }; } -u8 Decoder::get_bit_depth() +DecoderErrorOr> Decoder::get_decoded_frame() { - return m_parser->m_bit_depth; + size_t decoded_y_width = m_parser->m_mi_cols * 8; + Gfx::Size output_y_size = { + m_parser->m_frame_width, + m_parser->m_frame_height, + }; + auto decoded_uv_width = decoded_y_width >> m_parser->m_subsampling_x; + Gfx::Size output_uv_size = { + output_y_size.width() >> m_parser->m_subsampling_x, + output_y_size.height() >> m_parser->m_subsampling_y, + }; + Array, 3> output_buffers = { + DECODER_TRY_ALLOC(FixedArray::try_create(output_y_size.width() * output_y_size.height())), + DECODER_TRY_ALLOC(FixedArray::try_create(output_uv_size.width() * output_uv_size.height())), + DECODER_TRY_ALLOC(FixedArray::try_create(output_uv_size.width() * output_uv_size.height())), + }; + for (u8 plane = 0; plane < 3; plane++) { + auto& buffer = output_buffers[plane]; + auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width; + auto output_size = plane == 0 ? output_y_size : output_uv_size; + auto const& decoded_buffer = get_output_buffer(plane); + + for (u32 row = 0; row < output_size.height(); row++) { + memcpy( + buffer.data() + row * output_size.width(), + decoded_buffer.data() + row * decoded_width, + output_size.width() * sizeof(*buffer.data())); + } + } + + return DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame( + { output_y_size.width(), output_y_size.height() }, + m_parser->m_bit_depth, get_cicp_color_space(), + m_parser->m_subsampling_x, m_parser->m_subsampling_y, + output_buffers[0], output_buffers[1], output_buffers[2]))); } u8 Decoder::merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor) diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.h b/Userland/Libraries/LibVideo/VP9/Decoder.h index 50b31716a8a..bde1dd671f0 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.h +++ b/Userland/Libraries/LibVideo/VP9/Decoder.h @@ -9,9 +9,11 @@ #include #include +#include #include #include #include +#include #include "Parser.h" @@ -23,23 +25,16 @@ class Decoder { public: Decoder(); /* (8.1) General */ - DecoderErrorOr decode(Span); - DecoderErrorOr decode(ByteBuffer const&); + DecoderErrorOr receive_sample(Span); + DecoderErrorOr receive_sample(ByteBuffer const&); void dump_frame_info(); - // FIXME: These functions should be replaced by a struct that contains - // all the information needed to display a frame. - Vector const& get_output_buffer_for_plane(u8 plane) const; - Gfx::Size get_y_plane_size(); - bool get_uv_subsampling_y(); - bool get_uv_subsampling_x(); - CodingIndependentCodePoints get_cicp_color_space(); - u8 get_bit_depth(); + DecoderErrorOr> get_decoded_frame(); private: typedef i32 Intermediate; - DecoderErrorOr decode_frame(Span); + DecoderErrorOr decode_frame(Span); DecoderErrorOr allocate_buffers(); Vector& get_temp_buffer(u8 plane); @@ -139,6 +134,8 @@ private: /* (8.10) Reference Frame Update Process */ DecoderErrorOr update_reference_frames(); + inline CodingIndependentCodePoints get_cicp_color_space(); + NonnullOwnPtr m_parser; struct { diff --git a/Userland/Libraries/LibVideo/VideoFrame.cpp b/Userland/Libraries/LibVideo/VideoFrame.cpp new file mode 100644 index 00000000000..1eef94d8d77 --- /dev/null +++ b/Userland/Libraries/LibVideo/VideoFrame.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2022, Gregory Bertilson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +#include "VideoFrame.h" + +namespace Video { + +ErrorOr> SubsampledYUVFrame::try_create( + Gfx::IntSize size, + u8 bit_depth, CodingIndependentCodePoints cicp, + bool subsampling_horizontal, bool subsampling_vertical, + Span plane_y, Span plane_u, Span plane_v) +{ + auto plane_y_array = TRY(FixedArray::try_create(plane_y)); + auto plane_u_array = TRY(FixedArray::try_create(plane_u)); + auto plane_v_array = TRY(FixedArray::try_create(plane_v)); + return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling_horizontal, subsampling_vertical, plane_y_array, plane_u_array, plane_v_array)); +} + +DecoderErrorOr SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap) +{ + size_t width = this->width(); + size_t height = this->height(); + auto u_sample_row = DECODER_TRY_ALLOC(FixedArray::try_create(width)); + auto v_sample_row = DECODER_TRY_ALLOC(FixedArray::try_create(width)); + size_t uv_width = width >> m_subsampling_horizontal; + + auto converter = TRY(ColorConverter::create(bit_depth(), cicp())); + + for (size_t row = 0; row < height; row++) { + auto uv_row = row >> m_subsampling_vertical; + + // Linearly interpolate the UV samples vertically first. + // This will write all UV samples that are located on the Y sample as well, + // so we only need to interpolate horizontally between UV samples in the next + // step. + if ((row & m_subsampling_vertical) == 0 || row == height - 1) { + for (size_t uv_column = 0; uv_column < uv_width; uv_column++) { + size_t column = uv_column << m_subsampling_horizontal; + size_t index = uv_row * uv_width + uv_column; + u_sample_row[column] = m_plane_u[index]; + v_sample_row[column] = m_plane_v[index]; + } + } else { + for (size_t uv_column = 0; uv_column < uv_width; uv_column++) { + size_t column = uv_column << m_subsampling_horizontal; + size_t index = (uv_row + 1) * uv_width + uv_column; + u_sample_row[column] = (u_sample_row[column] + m_plane_u[index]) >> 1; + v_sample_row[column] = (v_sample_row[column] + m_plane_v[index]) >> 1; + } + } + // Fill in the last pixel of the row which may not be applied by the above + // loops if the last pixel in each row is on an uneven index. + if ((width & 1) == 0) { + u_sample_row[width - 1] = u_sample_row[width - 2]; + v_sample_row[width - 1] = v_sample_row[width - 2]; + } + + // Interpolate the samples horizontally. + if (m_subsampling_horizontal) { + for (size_t column = 1; column < width - 1; column += 2) { + u_sample_row[column] = (u_sample_row[column - 1] + u_sample_row[column + 1]) >> 1; + v_sample_row[column] = (v_sample_row[column - 1] + v_sample_row[column + 1]) >> 1; + } + } + + for (size_t column = 0; column < width; column++) { + auto y_sample = m_plane_y[row * width + column]; + auto u_sample = u_sample_row[column]; + auto v_sample = v_sample_row[column]; + + bitmap.set_pixel(Gfx::IntPoint(column, row), converter.convert_yuv_to_full_range_rgb(y_sample, u_sample, v_sample)); + + /*auto r_float = clamp(y_sample + (v_sample - 128) * 219.0f / 224.0f * 1.5748f, 0, 255); + auto g_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * -0.0722f * 1.8556f / 0.7152f + (v_sample - 128) * 219.0f / 224.0f * -0.2126f * 1.5748f / 0.7152f, 0, 255); + auto b_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * 1.8556f, 0, 255); + auto r = static_cast(r_float); + auto g = static_cast(g_float); + auto b = static_cast(b_float); + bitmap.set_pixel(Gfx::IntPoint(column, row), Color(r, g, b));*/ + } + } + + return {}; +} + +} diff --git a/Userland/Libraries/LibVideo/VideoFrame.h b/Userland/Libraries/LibVideo/VideoFrame.h new file mode 100644 index 00000000000..049d88a1218 --- /dev/null +++ b/Userland/Libraries/LibVideo/VideoFrame.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022, Gregory Bertilson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Video { + +class VideoFrame { + +public: + virtual ~VideoFrame() { } + + virtual DecoderErrorOr output_to_bitmap(Gfx::Bitmap& bitmap) = 0; + virtual DecoderErrorOr> to_bitmap() + { + auto bitmap = DECODER_TRY_ALLOC(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, m_size)); + TRY(output_to_bitmap(bitmap)); + return bitmap; + } + + inline Gfx::IntSize size() { return m_size; } + inline size_t width() { return size().width(); } + inline size_t height() { return size().height(); } + + inline u8 bit_depth() { return m_bit_depth; } + inline CodingIndependentCodePoints& cicp() { return m_cicp; } + +protected: + VideoFrame(Gfx::IntSize size, + u8 bit_depth, CodingIndependentCodePoints cicp) + : m_size(size) + , m_bit_depth(bit_depth) + , m_cicp(cicp) + { + } + + Gfx::IntSize m_size; + u8 m_bit_depth; + CodingIndependentCodePoints m_cicp; +}; + +class SubsampledYUVFrame : public VideoFrame { + +public: + static ErrorOr> try_create( + Gfx::IntSize size, + u8 bit_depth, CodingIndependentCodePoints cicp, + bool subsampling_horizontal, bool subsampling_vertical, + Span plane_y, Span plane_u, Span plane_v); + + SubsampledYUVFrame( + Gfx::IntSize size, + u8 bit_depth, CodingIndependentCodePoints cicp, + bool subsampling_horizontal, bool subsampling_vertical, + FixedArray& plane_y, FixedArray& plane_u, FixedArray& plane_v) + : VideoFrame(size, bit_depth, cicp) + , m_subsampling_horizontal(subsampling_horizontal) + , m_subsampling_vertical(subsampling_vertical) + , m_plane_y(move(plane_y)) + , m_plane_u(move(plane_u)) + , m_plane_v(move(plane_v)) + { + } + + DecoderErrorOr output_to_bitmap(Gfx::Bitmap& bitmap) override; + +protected: + bool m_subsampling_horizontal; + bool m_subsampling_vertical; + FixedArray m_plane_y; + FixedArray m_plane_u; + FixedArray m_plane_v; +}; + +}