LibVideo: Add VideoFrame class for decoded video frames

The class is virtual and has one subclass, SubsampledYUVFrame, which
is used by the VP9 decoder to return a single frame. The
output_to_bitmap(Bitmap&) function can be used to set pixels on an
existing bitmap of the correct size to the RGB values that
should be displayed. The to_bitmap() function will allocate a new bitmap
and fill it using output_to_bitmap.

This new class also implements bilinear scaling of the subsampled U and
V planes so that subsampled videos' colors will appear smoother.
This commit is contained in:
Zaggy1024 2022-10-08 18:36:57 -05:00 committed by Andreas Kling
parent 30189186e9
commit 074f771b59
Notes: sideshowbarker 2024-07-17 06:54:15 +09:00
8 changed files with 244 additions and 70 deletions

View file

@ -10,7 +10,7 @@
extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size)
{
Video::VP9::Decoder vp9_decoder;
if (auto decode_error = vp9_decoder.decode({ data, size }); decode_error.is_error())
if (auto decode_error = vp9_decoder.receive_sample({ data, size }); decode_error.is_error())
return -1;
return 0;
}

View file

@ -29,7 +29,7 @@ static void decode_video(StringView path, size_t expected_frame_count)
continue;
for (frame_index = 0; frame_index < block.frames().size(); frame_index++) {
MUST(vp9_decoder.decode(block.frames()[frame_index]));
MUST(vp9_decoder.receive_sample(block.frames()[frame_index]));
frame_count++;
}
}

View file

@ -75,47 +75,29 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
if (!optional_sample.has_value())
return;
auto result = vp9_decoder.decode(optional_sample.release_value());
auto result = vp9_decoder.receive_sample(optional_sample.release_value());
if (result.is_error()) {
outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
return;
}
// FIXME: This method of output is temporary and should be replaced with an image struct
// containing the planes and their sizes. Ideally, this struct would be interpreted
// by some color conversion library and then passed to something (GL?) for output.
auto const& output_y = vp9_decoder.get_output_buffer_for_plane(0);
auto const& output_u = vp9_decoder.get_output_buffer_for_plane(1);
auto const& output_v = vp9_decoder.get_output_buffer_for_plane(2);
auto y_size = vp9_decoder.get_y_plane_size();
auto uv_subsampling_y = vp9_decoder.get_uv_subsampling_y();
auto uv_subsampling_x = vp9_decoder.get_uv_subsampling_x();
Gfx::IntSize uv_size { y_size.width() >> uv_subsampling_x, y_size.height() >> uv_subsampling_y };
auto cicp = vp9_decoder.get_cicp_color_space();
auto frame_result = vp9_decoder.get_decoded_frame();
if (frame_result.is_error()) {
outln("Error retrieving frame {}: {}", frame_number, frame_result.error().string_literal());
return;
}
auto frame = frame_result.release_value();
auto& cicp = frame->cicp();
video_track.color_format.replace_code_points_if_specified(cicp);
cicp.default_code_points_if_unspecified(Video::ColorPrimaries::BT709, Video::TransferCharacteristics::BT709, Video::MatrixCoefficients::BT709);
auto color_converter_result = Video::ColorConverter::create(vp9_decoder.get_bit_depth(), cicp);
if (color_converter_result.is_error()) {
outln("Cannot convert video colors: {}", color_converter_result.release_error().string_literal());
auto convert_result = frame->output_to_bitmap(image);
if (convert_result.is_error()) {
outln("Error creating bitmap for frame {}: {}", frame_number, convert_result.error().string_literal());
return;
}
auto color_converter = color_converter_result.release_value();
for (auto y_row = 0u; y_row < video_track.pixel_height; y_row++) {
auto uv_row = y_row >> uv_subsampling_y;
for (auto y_column = 0u; y_column < video_track.pixel_width; y_column++) {
auto uv_column = y_column >> uv_subsampling_x;
auto y = output_y[y_row * y_size.width() + y_column];
auto u = output_u[uv_row * uv_size.width() + uv_column];
auto v = output_v[uv_row * uv_size.width() + uv_column];
image->set_pixel(y_column, y_row, color_converter.convert_yuv_to_full_range_rgb(y, u, v));
}
}
image_widget->set_bitmap(image);
image_widget->update();

View file

@ -3,6 +3,7 @@ set(SOURCES
Color/ColorPrimaries.cpp
Color/TransferCharacteristics.cpp
MatroskaReader.cpp
VideoFrame.cpp
VP9/BitStream.cpp
VP9/Decoder.cpp
VP9/Parser.cpp

View file

@ -19,7 +19,7 @@ Decoder::Decoder()
{
}
DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
DecoderErrorOr<void> Decoder::receive_sample(Span<u8 const> chunk_data)
{
auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
@ -42,9 +42,9 @@ DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
return {};
}
DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
DecoderErrorOr<void> Decoder::receive_sample(ByteBuffer const& chunk_data)
{
return decode(chunk_data.span());
return receive_sample(chunk_data.span());
}
void Decoder::dump_frame_info()
@ -57,7 +57,7 @@ inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
return row * stride + column;
}
DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
DecoderErrorOr<void> Decoder::decode_frame(Span<u8 const> frame_data)
{
// 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
// tables include function calls indicating when the block decode processes should be triggered.
@ -130,27 +130,7 @@ Vector<u16>& Decoder::get_output_buffer(u8 plane)
return m_buffers.output[plane];
}
Vector<u16> const& Decoder::get_output_buffer_for_plane(u8 plane) const
{
return m_buffers.output[plane];
}
Gfx::Size<size_t> Decoder::get_y_plane_size()
{
return m_parser->get_decoded_size_for_plane(0);
}
bool Decoder::get_uv_subsampling_y()
{
return m_parser->m_subsampling_y;
}
bool Decoder::get_uv_subsampling_x()
{
return m_parser->m_subsampling_x;
}
CodingIndependentCodePoints Decoder::get_cicp_color_space()
inline CodingIndependentCodePoints Decoder::get_cicp_color_space()
{
ColorPrimaries color_primaries;
TransferCharacteristics transfer_characteristics;
@ -209,9 +189,42 @@ CodingIndependentCodePoints Decoder::get_cicp_color_space()
return { color_primaries, transfer_characteristics, matrix_coefficients, m_parser->m_color_range };
}
u8 Decoder::get_bit_depth()
DecoderErrorOr<NonnullOwnPtr<VideoFrame>> Decoder::get_decoded_frame()
{
return m_parser->m_bit_depth;
size_t decoded_y_width = m_parser->m_mi_cols * 8;
Gfx::Size<size_t> output_y_size = {
m_parser->m_frame_width,
m_parser->m_frame_height,
};
auto decoded_uv_width = decoded_y_width >> m_parser->m_subsampling_x;
Gfx::Size<size_t> output_uv_size = {
output_y_size.width() >> m_parser->m_subsampling_x,
output_y_size.height() >> m_parser->m_subsampling_y,
};
Array<FixedArray<u16>, 3> output_buffers = {
DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_y_size.width() * output_y_size.height())),
DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_uv_size.width() * output_uv_size.height())),
DECODER_TRY_ALLOC(FixedArray<u16>::try_create(output_uv_size.width() * output_uv_size.height())),
};
for (u8 plane = 0; plane < 3; plane++) {
auto& buffer = output_buffers[plane];
auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width;
auto output_size = plane == 0 ? output_y_size : output_uv_size;
auto const& decoded_buffer = get_output_buffer(plane);
for (u32 row = 0; row < output_size.height(); row++) {
memcpy(
buffer.data() + row * output_size.width(),
decoded_buffer.data() + row * decoded_width,
output_size.width() * sizeof(*buffer.data()));
}
}
return DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(
{ output_y_size.width(), output_y_size.height() },
m_parser->m_bit_depth, get_cicp_color_space(),
m_parser->m_subsampling_x, m_parser->m_subsampling_y,
output_buffers[0], output_buffers[1], output_buffers[2])));
}
u8 Decoder::merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor)

View file

@ -9,9 +9,11 @@
#include <AK/ByteBuffer.h>
#include <AK/Error.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/Span.h>
#include <LibVideo/Color/CodingIndependentCodePoints.h>
#include <LibVideo/DecoderError.h>
#include <LibVideo/VideoFrame.h>
#include "Parser.h"
@ -23,23 +25,16 @@ class Decoder {
public:
Decoder();
/* (8.1) General */
DecoderErrorOr<void> decode(Span<const u8>);
DecoderErrorOr<void> decode(ByteBuffer const&);
DecoderErrorOr<void> receive_sample(Span<u8 const>);
DecoderErrorOr<void> receive_sample(ByteBuffer const&);
void dump_frame_info();
// FIXME: These functions should be replaced by a struct that contains
// all the information needed to display a frame.
Vector<u16> const& get_output_buffer_for_plane(u8 plane) const;
Gfx::Size<size_t> get_y_plane_size();
bool get_uv_subsampling_y();
bool get_uv_subsampling_x();
CodingIndependentCodePoints get_cicp_color_space();
u8 get_bit_depth();
DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame();
private:
typedef i32 Intermediate;
DecoderErrorOr<void> decode_frame(Span<const u8>);
DecoderErrorOr<void> decode_frame(Span<u8 const>);
DecoderErrorOr<void> allocate_buffers();
Vector<Intermediate>& get_temp_buffer(u8 plane);
@ -139,6 +134,8 @@ private:
/* (8.10) Reference Frame Update Process */
DecoderErrorOr<void> update_reference_frames();
inline CodingIndependentCodePoints get_cicp_color_space();
NonnullOwnPtr<Parser> m_parser;
struct {

View file

@ -0,0 +1,95 @@
/*
* Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/OwnPtr.h>
#include <LibVideo/Color/ColorConverter.h>
#include "VideoFrame.h"
namespace Video {
ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
Gfx::IntSize size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v)
{
auto plane_y_array = TRY(FixedArray<u16>::try_create(plane_y));
auto plane_u_array = TRY(FixedArray<u16>::try_create(plane_u));
auto plane_v_array = TRY(FixedArray<u16>::try_create(plane_v));
return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling_horizontal, subsampling_vertical, plane_y_array, plane_u_array, plane_v_array));
}
DecoderErrorOr<void> SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap)
{
size_t width = this->width();
size_t height = this->height();
auto u_sample_row = DECODER_TRY_ALLOC(FixedArray<u16>::try_create(width));
auto v_sample_row = DECODER_TRY_ALLOC(FixedArray<u16>::try_create(width));
size_t uv_width = width >> m_subsampling_horizontal;
auto converter = TRY(ColorConverter::create(bit_depth(), cicp()));
for (size_t row = 0; row < height; row++) {
auto uv_row = row >> m_subsampling_vertical;
// Linearly interpolate the UV samples vertically first.
// This will write all UV samples that are located on the Y sample as well,
// so we only need to interpolate horizontally between UV samples in the next
// step.
if ((row & m_subsampling_vertical) == 0 || row == height - 1) {
for (size_t uv_column = 0; uv_column < uv_width; uv_column++) {
size_t column = uv_column << m_subsampling_horizontal;
size_t index = uv_row * uv_width + uv_column;
u_sample_row[column] = m_plane_u[index];
v_sample_row[column] = m_plane_v[index];
}
} else {
for (size_t uv_column = 0; uv_column < uv_width; uv_column++) {
size_t column = uv_column << m_subsampling_horizontal;
size_t index = (uv_row + 1) * uv_width + uv_column;
u_sample_row[column] = (u_sample_row[column] + m_plane_u[index]) >> 1;
v_sample_row[column] = (v_sample_row[column] + m_plane_v[index]) >> 1;
}
}
// Fill in the last pixel of the row which may not be applied by the above
// loops if the last pixel in each row is on an uneven index.
if ((width & 1) == 0) {
u_sample_row[width - 1] = u_sample_row[width - 2];
v_sample_row[width - 1] = v_sample_row[width - 2];
}
// Interpolate the samples horizontally.
if (m_subsampling_horizontal) {
for (size_t column = 1; column < width - 1; column += 2) {
u_sample_row[column] = (u_sample_row[column - 1] + u_sample_row[column + 1]) >> 1;
v_sample_row[column] = (v_sample_row[column - 1] + v_sample_row[column + 1]) >> 1;
}
}
for (size_t column = 0; column < width; column++) {
auto y_sample = m_plane_y[row * width + column];
auto u_sample = u_sample_row[column];
auto v_sample = v_sample_row[column];
bitmap.set_pixel(Gfx::IntPoint(column, row), converter.convert_yuv_to_full_range_rgb(y_sample, u_sample, v_sample));
/*auto r_float = clamp(y_sample + (v_sample - 128) * 219.0f / 224.0f * 1.5748f, 0, 255);
auto g_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * -0.0722f * 1.8556f / 0.7152f + (v_sample - 128) * 219.0f / 224.0f * -0.2126f * 1.5748f / 0.7152f, 0, 255);
auto b_float = clamp(y_sample + (u_sample - 128) * 219.0f / 224.0f * 1.8556f, 0, 255);
auto r = static_cast<u8>(r_float);
auto g = static_cast<u8>(g_float);
auto b = static_cast<u8>(b_float);
bitmap.set_pixel(Gfx::IntPoint(column, row), Color(r, g, b));*/
}
}
return {};
}
}

View file

@ -0,0 +1,86 @@
/*
* Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/ByteBuffer.h>
#include <AK/Concepts.h>
#include <AK/FixedArray.h>
#include <LibGfx/Bitmap.h>
#include <LibGfx/Size.h>
#include <LibVideo/Color/CodingIndependentCodePoints.h>
#include <LibVideo/DecoderError.h>
namespace Video {
class VideoFrame {
public:
virtual ~VideoFrame() { }
virtual DecoderErrorOr<void> output_to_bitmap(Gfx::Bitmap& bitmap) = 0;
virtual DecoderErrorOr<NonnullRefPtr<Gfx::Bitmap>> to_bitmap()
{
auto bitmap = DECODER_TRY_ALLOC(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, m_size));
TRY(output_to_bitmap(bitmap));
return bitmap;
}
inline Gfx::IntSize size() { return m_size; }
inline size_t width() { return size().width(); }
inline size_t height() { return size().height(); }
inline u8 bit_depth() { return m_bit_depth; }
inline CodingIndependentCodePoints& cicp() { return m_cicp; }
protected:
VideoFrame(Gfx::IntSize size,
u8 bit_depth, CodingIndependentCodePoints cicp)
: m_size(size)
, m_bit_depth(bit_depth)
, m_cicp(cicp)
{
}
Gfx::IntSize m_size;
u8 m_bit_depth;
CodingIndependentCodePoints m_cicp;
};
class SubsampledYUVFrame : public VideoFrame {
public:
static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create(
Gfx::IntSize size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v);
SubsampledYUVFrame(
Gfx::IntSize size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
FixedArray<u16>& plane_y, FixedArray<u16>& plane_u, FixedArray<u16>& plane_v)
: VideoFrame(size, bit_depth, cicp)
, m_subsampling_horizontal(subsampling_horizontal)
, m_subsampling_vertical(subsampling_vertical)
, m_plane_y(move(plane_y))
, m_plane_u(move(plane_u))
, m_plane_v(move(plane_v))
{
}
DecoderErrorOr<void> output_to_bitmap(Gfx::Bitmap& bitmap) override;
protected:
bool m_subsampling_horizontal;
bool m_subsampling_vertical;
FixedArray<u16> m_plane_y;
FixedArray<u16> m_plane_u;
FixedArray<u16> m_plane_v;
};
}