VideoPlayer: Display frames from the VP9 decoder

For testing purposes, the output buffer is taken directly from the decoder and displayed in an image widget. The first keyframe can be displayed, but the second will not decode so VideoPlayer will stop at frame 0 for now. This implements a BT.709 YCbCr to RGB conversion in VideoPlayer, but that should be moved to a library for handling color space conversion.
Author: https://github.com/Zaggy1024 Commit: https://github.com/SerenityOS/serenity/commit/85fd56cf48 Pull-request: https://github.com/SerenityOS/serenity/pull/15363 Reviewed-by: https://github.com/ADKaster Reviewed-by: https://github.com/FireFox317 Reviewed-by: https://github.com/MacDue Reviewed-by: https://github.com/davidot
2024-11-26 09:30:24 +00:00 · 2022-09-16 04:07:52 -05:00 · 2022-09-16 04:07:52 -05:00 · 85fd56cf48 · 2024-07-17 14:33:07 +09:00
commit 85fd56cf48
parent 1514004cd5
2 changed files with 75 additions and 18 deletions
--- a/Userland/Applications/VideoPlayer/main.cpp
+++ b/Userland/Applications/VideoPlayer/main.cpp
@ -25,31 +25,87 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    auto const& track = optional_track.value();
    auto const video_track = track.video_track().value();

-    auto image = Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, Gfx::IntSize(video_track.pixel_height, video_track.pixel_width)).release_value_but_fixme_should_propagate_errors();
+    auto image = TRY(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, Gfx::IntSize(video_track.pixel_width, video_track.pixel_height)));
+
    auto main_widget = TRY(window->try_set_main_widget<GUI::Widget>());
    main_widget->set_fill_with_background_color(true);
    main_widget->set_layout<GUI::VerticalBoxLayout>();
-    auto& image_widget = main_widget->add<GUI::ImageWidget>();
-    image_widget.set_bitmap(image);
-    image_widget.set_fixed_size(video_track.pixel_height, video_track.pixel_width);
-    TRY(main_widget->try_add_child(image_widget));
+    auto image_widget = TRY(main_widget->try_add<GUI::ImageWidget>());

    Video::VP9::Decoder vp9_decoder;
-    for (auto const& cluster : document->clusters()) {
-        for (auto const& block : cluster.blocks()) {
-            if (block.track_number() != track.track_number())
-                continue;
+    size_t cluster_index = 0;
+    size_t block_index = 0;
+    size_t frame_index = 0;
+    auto frame_number = 0u;

-            auto const& frame = block.frame(0);
-            dbgln("Reading frame 0 from block @ {}", block.timestamp());
-            auto result = vp9_decoder.decode_frame(frame);
-            vp9_decoder.dump_frame_info();
-            if (result.is_error()) {
-                outln("Error: {}", result.error().string_literal());
-                return 1;
+    auto get_next_sample = [&]() -> Optional<ByteBuffer> {
+        for (; cluster_index < document->clusters().size(); cluster_index++) {
+            for (; block_index < document->clusters()[cluster_index].blocks().size(); block_index++) {
+                auto const& candidate_block = document->clusters()[cluster_index].blocks()[block_index];
+                if (candidate_block.track_number() != track.track_number())
+                    continue;
+                if (frame_index < candidate_block.frames().size())
+                    return candidate_block.frame(frame_index);
+                frame_index = 0;
+            }
+            block_index = 0;
+        }
+        return {};
+    };
+
+    auto display_next_frame = [&]() {
+        auto optional_sample = get_next_sample();
+
+        if (!optional_sample.has_value())
+            return;
+
+        auto result = vp9_decoder.decode_frame(optional_sample.release_value());
+
+        if (result.is_error()) {
+            outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
+            return;
+        }
+
+        // FIXME: This method of output is temporary and should be replaced with an image struct
+        //        containing the planes and their sizes. Ideally, this struct would be interpreted
+        //        by some color conversion library and then passed to something (GL?) for output.
+        auto const& output_y = vp9_decoder.get_output_buffer_for_plane(0);
+        auto const& output_u = vp9_decoder.get_output_buffer_for_plane(1);
+        auto const& output_v = vp9_decoder.get_output_buffer_for_plane(2);
+        auto y_size = vp9_decoder.get_y_plane_size();
+        auto uv_subsampling_y = vp9_decoder.get_uv_subsampling_y();
+        auto uv_subsampling_x = vp9_decoder.get_uv_subsampling_x();
+        Gfx::IntSize uv_size { y_size.width() >> uv_subsampling_x, y_size.height() >> uv_subsampling_y };
+
+        for (auto y_row = 0u; y_row < video_track.pixel_height; y_row++) {
+            auto uv_row = y_row >> uv_subsampling_y;
+
+            for (auto y_column = 0u; y_column < video_track.pixel_width; y_column++) {
+                auto uv_column = y_column >> uv_subsampling_x;
+
+                auto y = output_y[y_row * y_size.width() + y_column];
+                auto cb = output_u[uv_row * uv_size.width() + uv_column];
+                auto cr = output_v[uv_row * uv_size.width() + uv_column];
+                // Convert from Rec.709 YCbCr to RGB.
+                auto r_float = floorf(clamp(y + (cr - 128) * 219.0f / 224.0f * 1.5748f, 0, 255));
+                auto g_float = floorf(clamp(y + (cb - 128) * 219.0f / 224.0f * -0.0722f * 1.8556f / 0.7152f + (cr - 128) * 219.0f / 224.0f * -0.2126f * 1.5748f / 0.7152f, 0, 255));
+                auto b_float = floorf(clamp(y + (cb - 128) * 219.0f / 224.0f * 1.8556f, 0, 255));
+                auto r = static_cast<u8>(r_float);
+                auto g = static_cast<u8>(g_float);
+                auto b = static_cast<u8>(b_float);
+
+                image->set_pixel(y_column, y_row, Gfx::Color(r, g, b));
            }
        }
-    }
+
+        image_widget->set_bitmap(image);
+        image_widget->update();
+
+        frame_index++;
+        frame_number++;
+    };
+
+    display_next_frame();

    window->show();
    return app->exec();
--- a/Userland/Libraries/LibVideo/MatroskaDocument.h
+++ b/Userland/Libraries/LibVideo/MatroskaDocument.h
@ -122,7 +122,8 @@ public:
    bool discardable() const { return m_discardable; }
    void set_discardable(bool discardable) { m_discardable = discardable; }
    u64 frame_count() const { return m_frames.size(); }
-    ByteBuffer const& frame(size_t index) const { return m_frames.at(index); }
+    Vector<ByteBuffer> const& frames() const { return m_frames; }
+    ByteBuffer const& frame(size_t index) const { return frames()[index]; }
    void add_frame(ByteBuffer frame) { m_frames.append(move(frame)); }

 private: