LibVideo: Add VideoFrame class for decoded video frames

The class is virtual and has one subclass, SubsampledYUVFrame, which is used by the VP9 decoder to return a single frame. The output_to_bitmap(Bitmap&) function can be used to set pixels on an existing bitmap of the correct size to the RGB values that should be displayed. The to_bitmap() function will allocate a new bitmap and fill it using output_to_bitmap. This new class also implements bilinear scaling of the subsampled U and V planes so that subsampled videos' colors will appear smoother.
Author: https://github.com/Zaggy1024 Commit: https://github.com/SerenityOS/serenity/commit/074f771b59 Pull-request: https://github.com/SerenityOS/serenity/pull/15851
2024-11-22 07:30:19 +00:00 · 2022-10-08 18:36:57 -05:00 · 2022-10-08 18:36:57 -05:00 · 074f771b59 · 2024-07-17 06:54:15 +09:00
commit 074f771b59
parent 30189186e9
8 changed files with 244 additions and 70 deletions
--- a/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp
+++ b/Meta/Lagom/Fuzzers/FuzzVP9Decoder.cpp
@ -10,7 +10,7 @@
 extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size)
 {
    Video::VP9::Decoder vp9_decoder;
-    if (auto decode_error = vp9_decoder.decode({ data, size }); decode_error.is_error())
+    if (auto decode_error = vp9_decoder.receive_sample({ data, size }); decode_error.is_error())
        return -1;
    return 0;
 }
--- a/Tests/LibVideo/TestVP9Decode.cpp
+++ b/Tests/LibVideo/TestVP9Decode.cpp
@ -29,7 +29,7 @@ static void decode_video(StringView path, size_t expected_frame_count)
                continue;

            for (frame_index = 0; frame_index < block.frames().size(); frame_index++) {
-                MUST(vp9_decoder.decode(block.frames()[frame_index]));
+                MUST(vp9_decoder.receive_sample(block.frames()[frame_index]));
                frame_count++;
            }
        }
--- a/Userland/Applications/VideoPlayer/main.cpp
+++ b/Userland/Applications/VideoPlayer/main.cpp
@ -75,47 +75,29 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
        if (!optional_sample.has_value())
            return;

-        auto result = vp9_decoder.decode(optional_sample.release_value());
+        auto result = vp9_decoder.receive_sample(optional_sample.release_value());

        if (result.is_error()) {
            outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
            return;
        }

-        // FIXME: This method of output is temporary and should be replaced with an image struct
-        //        containing the planes and their sizes. Ideally, this struct would be interpreted
-        //        by some color conversion library and then passed to something (GL?) for output.
-        auto const& output_y = vp9_decoder.get_output_buffer_for_plane(0);
-        auto const& output_u = vp9_decoder.get_output_buffer_for_plane(1);
-        auto const& output_v = vp9_decoder.get_output_buffer_for_plane(2);
-        auto y_size = vp9_decoder.get_y_plane_size();
-        auto uv_subsampling_y = vp9_decoder.get_uv_subsampling_y();
-        auto uv_subsampling_x = vp9_decoder.get_uv_subsampling_x();
-        Gfx::IntSize uv_size { y_size.width() >> uv_subsampling_x, y_size.height() >> uv_subsampling_y };
-        auto cicp = vp9_decoder.get_cicp_color_space();
+        auto frame_result = vp9_decoder.get_decoded_frame();
+        if (frame_result.is_error()) {
+            outln("Error retrieving frame {}: {}", frame_number, frame_result.error().string_literal());
+            return;
+        }
+        auto frame = frame_result.release_value();
+
+        auto& cicp = frame->cicp();
        video_track.color_format.replace_code_points_if_specified(cicp);
        cicp.default_code_points_if_unspecified(Video::ColorPrimaries::BT709, Video::TransferCharacteristics::BT709, Video::MatrixCoefficients::BT709);

-        auto color_converter_result = Video::ColorConverter::create(vp9_decoder.get_bit_depth(), cicp);
-        if (color_converter_result.is_error()) {
-            outln("Cannot convert video colors: {}", color_converter_result.release_error().string_literal());
+        auto convert_result = frame->output_to_bitmap(image);
+        if (convert_result.is_error()) {
+            outln("Error creating bitmap for frame {}: {}", frame_number, convert_result.error().string_literal());
            return;
        }
-        auto color_converter = color_converter_result.release_value();
-
-        for (auto y_row = 0u; y_row < video_track.pixel_height; y_row++) {
-            auto uv_row = y_row >> uv_subsampling_y;
-
-            for (auto y_column = 0u; y_column < video_track.pixel_width; y_column++) {
-                auto uv_column = y_column >> uv_subsampling_x;
-
-                auto y = output_y[y_row * y_size.width() + y_column];
-                auto u = output_u[uv_row * uv_size.width() + uv_column];
-                auto v = output_v[uv_row * uv_size.width() + uv_column];
-
-                image->set_pixel(y_column, y_row, color_converter.convert_yuv_to_full_range_rgb(y, u, v));
-            }
-        }

        image_widget->set_bitmap(image);
        image_widget->update();
--- a/Userland/Libraries/LibVideo/CMakeLists.txt
+++ b/Userland/Libraries/LibVideo/CMakeLists.txt
@ -3,6 +3,7 @@ set(SOURCES
    Color/ColorPrimaries.cpp
    Color/TransferCharacteristics.cpp
    MatroskaReader.cpp
+    VideoFrame.cpp
    VP9/BitStream.cpp
    VP9/Decoder.cpp
    VP9/Parser.cpp
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
@ -19,7 +19,7 @@ Decoder::Decoder()
 {
 }

-DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
+DecoderErrorOr<void> Decoder::receive_sample(Span<u8 const> chunk_data)
 {
    auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);

@ -42,9 +42,9 @@ DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
    return {};
 }

-DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
+DecoderErrorOr<void> Decoder::receive_sample(ByteBuffer const& chunk_data)
 {
-    return decode(chunk_data.span());
+    return receive_sample(chunk_data.span());
 }

 void Decoder::dump_frame_info()
@ -57,7 +57,7 @@ inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
    return row * stride + column;
 }

-DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
+DecoderErrorOr<void> Decoder::decode_frame(Span<u8 const> frame_data)
 {
    // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
    // tables include function calls indicating when the block decode processes should be triggered.
@ -130,27 +130,7 @@ Vector<u16>& Decoder::get_output_buffer(u8 plane)
    return m_buffers.output[plane];
 }

-Vector<u16> const& Decoder::get_output_buffer_for_plane(u8 plane) const
-{
-    return m_buffers.output[plane];
-}
-
-Gfx::Size<size_t> Decoder::get_y_plane_size()
-{
-    return m_parser->get_decoded_size_for_plane(0);
-}
-
-bool Decoder::get_uv_subsampling_y()
-{
-    return m_parser->m_subsampling_y;
-}
-
-bool Decoder::get_uv_subsampling_x()
-{
-    return m_parser->m_subsampling_x;
-}
-
-CodingIndependentCodePoints Decoder::get_cicp_color_space()
+inline CodingIndependentCodePoints Decoder::get_cicp_color_space()
 {
    ColorPrimaries color_primaries;
    TransferCharacteristics transfer_characteristics;
@ -209,9 +189,42 @@ CodingIndependentCodePoints Decoder::get_cicp_color_space()
    return { color_primaries, transfer_characteristics, matrix_coefficients, m_parser->m_color_range };
 }

-u8 Decoder::get_bit_depth()
+DecoderErrorOr<NonnullOwnPtr<VideoFrame>> Decoder::get_decoded_frame()
 {
-    return m_parser->m_bit_depth;
+    size_t decoded_y_width = m_parser->m_mi_cols * 8;
+    Gfx::Size<size_t> output_y_size = {
+        m_parser->m_frame_width,
+        m_parser->m_frame_height,
+    };
+    auto decoded_uv_width = decoded_y_width >> m_parser->m_subsampling_x;
+    Gfx::Size<size_t> output_uv_size = {
+        output_y_size.width() >> m_parser->m_subsampling_x,
+        output_y_size.height() >> m_parser->m_subsampling_y,
+    };
+    Array<FixedArray<u16>, 3> output_buffers = {
+        DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_y_size.width() * output_y_size.height())),
+        DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_uv_size.width() * output_uv_size.height())),
+        DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_uv_size.width() * output_uv_size.height())),
+    };
+    for (u8 plane = 0; plane < 3; plane++) {
+        auto& buffer = output_buffers[plane];
+        auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width;
+        auto output_size = plane == 0 ? output_y_size : output_uv_size;
+        auto const& decoded_buffer = get_output_buffer(plane);
+
+        for (u32 row = 0; row < output_size.height(); row++) {
+            memcpy(
+                buffer.data() + row * output_size.width(),
+                decoded_buffer.data() + row * decoded_width,
+                output_size.width() * sizeof(*buffer.data()));
+        }
+    }
+
+    return DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(
+        { output_y_size.width(), output_y_size.height() },
+        m_parser->m_bit_depth, get_cicp_color_space(),
+        m_parser->m_subsampling_x, m_parser->m_subsampling_y,
+        output_buffers[0], output_buffers[1], output_buffers[2])));
 }

 u8 Decoder::merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor)
--- a/Userland/Libraries/LibVideo/VP9/Decoder.h
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.h
@ -9,9 +9,11 @@

 #include <AK/ByteBuffer.h>
 #include <AK/Error.h>
+#include <AK/NonnullOwnPtr.h>
 #include <AK/Span.h>
 #include <LibVideo/Color/CodingIndependentCodePoints.h>
 #include <LibVideo/DecoderError.h>
+#include <LibVideo/VideoFrame.h>

 #include "Parser.h"

@ -23,23 +25,16 @@ class Decoder {
 public:
    Decoder();
    /* (8.1) General */
-    DecoderErrorOr<void> decode(Span<const u8>);
-    DecoderErrorOr<void> decode(ByteBuffer const&);
+    DecoderErrorOr<void> receive_sample(Span<u8 const>);
+    DecoderErrorOr<void> receive_sample(ByteBuffer const&);
    void dump_frame_info();

-    // FIXME: These functions should be replaced by a struct that contains
-    //        all the information needed to display a frame.
-    Vector<u16> const& get_output_buffer_for_plane(u8 plane) const;
-    Gfx::Size<size_t> get_y_plane_size();
-    bool get_uv_subsampling_y();
-    bool get_uv_subsampling_x();
-    CodingIndependentCodePoints get_cicp_color_space();
-    u8 get_bit_depth();
+    DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame();

 private:
    typedef i32 Intermediate;

-    DecoderErrorOr<void> decode_frame(Span<const u8>);
+    DecoderErrorOr<void> decode_frame(Span<u8 const>);

    DecoderErrorOr<void> allocate_buffers();
    Vector<Intermediate>& get_temp_buffer(u8 plane);
@ -139,6 +134,8 @@ private:
    /* (8.10) Reference Frame Update Process */
    DecoderErrorOr<void> update_reference_frames();

+    inline CodingIndependentCodePoints get_cicp_color_space();
+
    NonnullOwnPtr<Parser> m_parser;

    struct {
--- a/Userland/Libraries/LibVideo/VideoFrame.cpp
+++ b/Userland/Libraries/LibVideo/VideoFrame.cpp
@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Format.h>
+#include <AK/NonnullOwnPtr.h>
+#include <AK/OwnPtr.h>
+#include <LibVideo/Color/ColorConverter.h>
+
+#include "VideoFrame.h"
+
+namespace Video {
+
+ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
+    Gfx::IntSize size,
+    u8 bit_depth, CodingIndependentCodePoints cicp,
+    bool subsampling_horizontal, bool subsampling_vertical,
+    Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v)
+{
+    auto plane_y_array = TRY(FixedArray<u16>::try_create(plane_y));
+    auto plane_u_array = TRY(FixedArray<u16>::try_create(plane_u));
+    auto plane_v_array = TRY(FixedArray<u16>::try_create(plane_v));
+    return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling_horizontal, subsampling_vertical, plane_y_array, plane_u_array, plane_v_array));
+}
+
+DecoderErrorOr<void> SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap)
+{
+    size_t width = this->width();
+    size_t height = this->height();
+    auto u_sample_row = DECODER_TRY_ALLOC(FixedArray<u16>::try_create(width));
+    auto v_sample_row = DECODER_TRY_ALLOC(FixedArray<u16>::try_create(width));
+    size_t uv_width = width >> m_subsampling_horizontal;
+
+    auto converter = TRY(ColorConverter::create(bit_depth(), cicp()));
+
+    for (size_t row = 0; row < height; row++) {
+        auto uv_row = row >> m_subsampling_vertical;
+
+        // Linearly interpolate the UV samples vertically first.
+        // This will write all UV samples that are located on the Y sample as well,
+        // so we only need to interpolate horizontally between UV samples in the next
+        // step.
+        if ((row & m_subsampling_vertical) == 0 || row == height - 1) {
+            for (size_t uv_column = 0; uv_column < uv_width; uv_column++) {
+                size_t column = uv_column << m_subsampling_horizontal;
+                size_t index = uv_row * uv_width + uv_column;
+                u_sample_row[column] = m_plane_u[index];
+                v_sample_row[column] = m_plane_v[index];
+            }
+        } else {
+            for (size_t uv_column = 0; uv_column < uv_width; uv_column++) {
+                size_t column = uv_column << m_subsampling_horizontal;
+                size_t index = (uv_row + 1) * uv_width + uv_column;
+                u_sample_row[column] = (u_sample_row[column] + m_plane_u[index]) >> 1;
+                v_sample_row[column] = (v_sample_row[column] + m_plane_v[index]) >> 1;
+            }
+        }
+        // Fill in the last pixel of the row which may not be applied by the above
+        // loops if the last pixel in each row is on an uneven index.
+        if ((width & 1) == 0) {
+            u_sample_row[width - 1] = u_sample_row[width - 2];
+            v_sample_row[width - 1] = v_sample_row[width - 2];
+        }
+
+        // Interpolate the samples horizontally.
+        if (m_subsampling_horizontal) {
+            for (size_t column = 1; column < width - 1; column += 2) {
+                u_sample_row[column] = (u_sample_row[column - 1] + u_sample_row[column + 1]) >> 1;
+                v_sample_row[column] = (v_sample_row[column - 1] + v_sample_row[column + 1]) >> 1;
+            }
+        }
+
+        for (size_t column = 0; column < width; column++) {
+            auto y_sample = m_plane_y[row * width + column];
+            auto u_sample = u_sample_row[column];
+            auto v_sample = v_sample_row[column];
+
+            bitmap.set_pixel(Gfx::IntPoint(column, row), converter.convert_yuv_to_full_range_rgb(y_sample, u_sample, v_sample));
+
+            /*auto r_float = clamp(y_sample + (v_sample - 128) * 219.0f / 224.0f * 1.5748f, 0, 255);
+            auto g_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * -0.0722f * 1.8556f / 0.7152f + (v_sample - 128) * 219.0f / 224.0f * -0.2126f * 1.5748f / 0.7152f, 0, 255);
+            auto b_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * 1.8556f, 0, 255);
+            auto r = static_cast<u8>(r_float);
+            auto g = static_cast<u8>(g_float);
+            auto b = static_cast<u8>(b_float);
+            bitmap.set_pixel(Gfx::IntPoint(column, row), Color(r, g, b));*/
+        }
+    }
+
+    return {};
+}
+
+}
--- a/Userland/Libraries/LibVideo/VideoFrame.h
+++ b/Userland/Libraries/LibVideo/VideoFrame.h
@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+#include <AK/Concepts.h>
+#include <AK/FixedArray.h>
+#include <LibGfx/Bitmap.h>
+#include <LibGfx/Size.h>
+#include <LibVideo/Color/CodingIndependentCodePoints.h>
+#include <LibVideo/DecoderError.h>
+
+namespace Video {
+
+class VideoFrame {
+
+public:
+    virtual ~VideoFrame() { }
+
+    virtual DecoderErrorOr<void> output_to_bitmap(Gfx::Bitmap& bitmap) = 0;
+    virtual DecoderErrorOr<NonnullRefPtr<Gfx::Bitmap>> to_bitmap()
+    {
+        auto bitmap = DECODER_TRY_ALLOC(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, m_size));
+        TRY(output_to_bitmap(bitmap));
+        return bitmap;
+    }
+
+    inline Gfx::IntSize size() { return m_size; }
+    inline size_t width() { return size().width(); }
+    inline size_t height() { return size().height(); }
+
+    inline u8 bit_depth() { return m_bit_depth; }
+    inline CodingIndependentCodePoints& cicp() { return m_cicp; }
+
+protected:
+    VideoFrame(Gfx::IntSize size,
+        u8 bit_depth, CodingIndependentCodePoints cicp)
+        : m_size(size)
+        , m_bit_depth(bit_depth)
+        , m_cicp(cicp)
+    {
+    }
+
+    Gfx::IntSize m_size;
+    u8 m_bit_depth;
+    CodingIndependentCodePoints m_cicp;
+};
+
+class SubsampledYUVFrame : public VideoFrame {
+
+public:
+    static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create(
+        Gfx::IntSize size,
+        u8 bit_depth, CodingIndependentCodePoints cicp,
+        bool subsampling_horizontal, bool subsampling_vertical,
+        Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v);
+
+    SubsampledYUVFrame(
+        Gfx::IntSize size,
+        u8 bit_depth, CodingIndependentCodePoints cicp,
+        bool subsampling_horizontal, bool subsampling_vertical,
+        FixedArray<u16>& plane_y, FixedArray<u16>& plane_u, FixedArray<u16>& plane_v)
+        : VideoFrame(size, bit_depth, cicp)
+        , m_subsampling_horizontal(subsampling_horizontal)
+        , m_subsampling_vertical(subsampling_vertical)
+        , m_plane_y(move(plane_y))
+        , m_plane_u(move(plane_u))
+        , m_plane_v(move(plane_v))
+    {
+    }
+
+    DecoderErrorOr<void> output_to_bitmap(Gfx::Bitmap& bitmap) override;
+
+protected:
+    bool m_subsampling_horizontal;
+    bool m_subsampling_vertical;
+    FixedArray<u16> m_plane_y;
+    FixedArray<u16> m_plane_u;
+    FixedArray<u16> m_plane_v;
+};
+
+}