LibMedia: Store YUV planes as byte arrays with no padding for 8-bit

This should halve the size of frames in memory for frames with 8-bit
color components, which is the majority of videos.

Calculation of the size of subsampled planes has also been consolidated
into a struct. There are likely some places that will still need to
change over to this, but it should prevent issues due to differing
handling of rounding/ceiling.
This commit is contained in:
Zaggy1024 2024-06-20 21:50:54 -05:00 committed by Andrew Kaster
parent b49d3dcf6f
commit 40fe0cb9d5
Notes: sideshowbarker 2024-07-17 02:08:15 +09:00
7 changed files with 206 additions and 77 deletions

View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2024, Gregory Bertilson <zaggy1024@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibGfx/Size.h>
namespace Media {
struct Subsampling {
public:
Subsampling(bool x, bool y)
: m_x(x)
, m_y(y)
{
}
Subsampling() = default;
bool x() const { return m_x; }
bool y() const { return m_y; }
static u32 subsampled_size(bool subsampled, u32 size)
{
u32 subsampled_as_int = static_cast<u32>(subsampled);
return (size + subsampled_as_int) >> subsampled_as_int;
}
template<Integral T>
Gfx::Size<T> subsampled_size(Gfx::Size<T> size) const
{
return {
subsampled_size(x(), size.width()),
subsampled_size(y(), size.height())
};
}
private:
bool m_x = false;
bool m_y = false;
};
}

View file

@ -15,6 +15,7 @@
#include <LibGfx/Size.h>
#include <LibMedia/Color/CodingIndependentCodePoints.h>
#include <LibMedia/DecoderError.h>
#include <LibMedia/Subsampling.h>
#include "BooleanDecoder.h"
#include "ContextStorage.h"
@ -126,8 +127,8 @@ public:
{
if (uv) {
return {
y_size_to_uv_size(color_config.subsampling_y, blocks_to_pixels(columns())),
y_size_to_uv_size(color_config.subsampling_y, blocks_to_pixels(rows())),
Subsampling::subsampled_size(color_config.subsampling_y, blocks_to_pixels(columns())),
Subsampling::subsampled_size(color_config.subsampling_y, blocks_to_pixels(rows())),
};
}
return {

View file

@ -66,8 +66,17 @@ DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
// This is handled by update_reference_frames.
// 4. The output process as specified in section 8.9 is invoked.
if (frame_context.shows_a_frame())
TRY(create_video_frame(frame_context));
if (frame_context.shows_a_frame()) {
switch (frame_context.color_config.bit_depth) {
case 8:
TRY(create_video_frame<u8>(frame_context));
break;
case 10:
case 12:
TRY(create_video_frame<u16>(frame_context));
break;
}
}
// 5. The reference frame update process as specified in section 8.10 is invoked.
TRY(update_reference_frames(frame_context));
@ -133,6 +142,7 @@ inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& fram
return { color_primaries, transfer_characteristics, matrix_coefficients, frame_context.color_config.color_range };
}
template<typename T>
DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_context)
{
// (8.9) Output process
@ -146,37 +156,27 @@ DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_conte
// sizes, as the spec seems to prefer that the halved sizes be ceiled.
u32 decoded_y_width = frame_context.decoded_size(false).width();
auto decoded_uv_width = frame_context.decoded_size(true).width();
Gfx::Size<u32> output_y_size = frame_context.size();
auto subsampling_x = frame_context.color_config.subsampling_x;
auto subsampling_y = frame_context.color_config.subsampling_y;
Gfx::Size<u32> output_uv_size = {
y_size_to_uv_size(subsampling_x, output_y_size.width()),
y_size_to_uv_size(subsampling_y, output_y_size.height()),
};
Array<FixedArray<u16>, 3> output_buffers = {
DECODER_TRY_ALLOC(FixedArray<u16>::create(output_y_size.width() * output_y_size.height())),
DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())),
DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())),
};
for (u8 plane = 0; plane < 3; plane++) {
auto& buffer = output_buffers[plane];
Subsampling subsampling { frame_context.color_config.subsampling_x, frame_context.color_config.subsampling_y };
auto output_y_size = frame_context.size().to_type<size_t>();
auto output_uv_size = subsampling.subsampled_size(output_y_size);
auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(
{ output_y_size.width(), output_y_size.height() },
frame_context.color_config.bit_depth, get_cicp_color_space(frame_context),
subsampling));
for (u32 plane = 0; plane < 3; plane++) {
auto* buffer = frame->get_plane_data<T>(plane);
auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width;
auto output_size = plane == 0 ? output_y_size : output_uv_size;
auto const& decoded_buffer = get_output_buffer(plane);
auto const* decoded_buffer = get_output_buffer(plane).data();
for (u32 row = 0; row < output_size.height(); row++) {
memcpy(
buffer.data() + row * output_size.width(),
decoded_buffer.data() + row * decoded_width,
output_size.width() * sizeof(*buffer.data()));
for (u32 column = 0; column < output_size.width(); column++)
buffer[row * output_size.width() + column] = static_cast<T>(decoded_buffer[row * decoded_width + column]);
}
}
auto frame = DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(
{ output_y_size.width(), output_y_size.height() },
frame_context.color_config.bit_depth, get_cicp_color_space(frame_context),
subsampling_x, subsampling_y,
move(output_buffers[0]), move(output_buffers[1]), move(output_buffers[2]))));
m_video_frame_queue.enqueue(move(frame));
return {};
@ -907,7 +907,7 @@ DecoderErrorOr<void> Decoder::predict_inter_block(u8 plane, BlockContext const&
// A variable ref specifying the reference frame contents is set equal to FrameStore[ refIdx ].
auto& reference_frame_buffer = reference_frame.frame_planes[plane];
auto reference_frame_width = y_size_to_uv_size(subsampling_x, reference_frame.size.width()) + MV_BORDER * 2;
auto reference_frame_width = Subsampling::subsampled_size(subsampling_x, reference_frame.size.width()) + MV_BORDER * 2;
// The variable lastX is set equal to ( (RefFrameWidth[ refIdx ] + subX) >> subX) - 1.
// The variable lastY is set equal to ( (RefFrameHeight[ refIdx ] + subY) >> subY) - 1.
@ -1941,8 +1941,8 @@ DecoderErrorOr<void> Decoder::update_reference_frames(FrameContext const& frame_
auto height = frame_context.size().height();
auto stride = frame_context.decoded_size(plane > 0).width();
if (plane > 0) {
width = y_size_to_uv_size(frame_context.color_config.subsampling_x, width);
height = y_size_to_uv_size(frame_context.color_config.subsampling_y, height);
width = Subsampling::subsampled_size(frame_context.color_config.subsampling_x, width);
height = Subsampling::subsampled_size(frame_context.color_config.subsampling_y, height);
}
auto const& original_buffer = get_output_buffer(plane);

View file

@ -42,6 +42,7 @@ private:
static constexpr size_t maximum_transform_size = 32ULL * 32ULL;
DecoderErrorOr<void> decode_frame(ReadonlyBytes);
template<typename T>
DecoderErrorOr<void> create_video_frame(FrameContext const&);
DecoderErrorOr<void> allocate_buffers(FrameContext const&);

View file

@ -130,9 +130,4 @@ inline u8 transform_size_to_sub_blocks(TransformSize transform_size)
return 1 << transform_size;
}
inline u32 y_size_to_uv_size(bool subsampled, u32 size)
{
return (size + subsampled) >> subsampled;
}
}

View file

@ -15,17 +15,60 @@ namespace Media {
ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v)
Subsampling subsampling)
{
auto plane_y_array = TRY(FixedArray<u16>::create(plane_y));
auto plane_u_array = TRY(FixedArray<u16>::create(plane_u));
auto plane_v_array = TRY(FixedArray<u16>::create(plane_v));
return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling_horizontal, subsampling_vertical, move(plane_y_array), move(plane_u_array), move(plane_v_array)));
VERIFY(bit_depth < 16);
size_t component_size = bit_depth > 8 ? sizeof(u16) : sizeof(u8);
size_t alignment_size = max(bit_depth > 8 ? sizeof(u16) : sizeof(u8), sizeof(void*));
auto alloc_buffer = [&](size_t size) -> ErrorOr<u8*> {
void* buffer = nullptr;
auto result = posix_memalign(&buffer, alignment_size, size);
if (result != 0)
return Error::from_errno(result);
return reinterpret_cast<u8*>(buffer);
};
auto y_data_size = size.to_type<size_t>().area() * component_size;
auto uv_data_size = subsampling.subsampled_size(size).to_type<size_t>().area() * component_size;
auto* y_buffer = TRY(alloc_buffer(y_data_size));
auto* u_buffer = TRY(alloc_buffer(uv_data_size));
auto* v_buffer = TRY(alloc_buffer(uv_data_size));
return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer));
}
template<u32 subsampling_horizontal>
ALWAYS_INLINE void interpolate_row(u32 const row, u32 const width, u16 const* plane_u, u16 const* plane_v, u16* __restrict__ u_row, u16* __restrict__ v_row)
ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create_from_data(
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling,
ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data)
{
auto frame = TRY(try_create(size, bit_depth, cicp, subsampling));
size_t component_size = bit_depth > 8 ? sizeof(u16) : sizeof(u8);
auto y_data_size = size.to_type<size_t>().area() * component_size;
auto uv_data_size = subsampling.subsampled_size(size).to_type<size_t>().area() * component_size;
VERIFY(y_data.size() >= y_data_size);
VERIFY(u_data.size() >= uv_data_size);
VERIFY(v_data.size() >= uv_data_size);
memcpy(frame->m_y_buffer, y_data.data(), y_data_size);
memcpy(frame->m_u_buffer, u_data.data(), uv_data_size);
memcpy(frame->m_v_buffer, v_data.data(), uv_data_size);
return frame;
}
SubsampledYUVFrame::~SubsampledYUVFrame()
{
free(m_y_buffer);
free(m_u_buffer);
free(m_v_buffer);
}
template<u32 subsampling_horizontal, typename T>
ALWAYS_INLINE void interpolate_row(u32 const row, u32 const width, T const* plane_u, T const* plane_v, T* __restrict__ u_row, T* __restrict__ v_row)
{
// OPTIMIZATION: __restrict__ allows some load eliminations because the planes and the rows will not alias.
@ -57,15 +100,15 @@ ALWAYS_INLINE void interpolate_row(u32 const row, u32 const width, u16 const* pl
}
}
template<u32 subsampling_horizontal, u32 subsampling_vertical, typename Convert>
ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_subsampled(Convert convert, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
template<u32 subsampling_horizontal, u32 subsampling_vertical, typename T, typename Convert>
ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_subsampled(Convert convert, u32 const width, u32 const height, T const* plane_y, T const* plane_u, T const* plane_v, Gfx::Bitmap& bitmap)
{
VERIFY(bitmap.width() >= 0);
VERIFY(bitmap.height() >= 0);
VERIFY(static_cast<u32>(bitmap.width()) == width);
VERIFY(static_cast<u32>(bitmap.height()) == height);
auto temporary_buffer = DECODER_TRY_ALLOC(FixedArray<u16>::create(static_cast<size_t>(width) * 4));
auto temporary_buffer = DECODER_TRY_ALLOC(FixedArray<T>::create(static_cast<size_t>(width) * 4));
// Above rows
auto* u_row_a = temporary_buffer.span().slice(static_cast<size_t>(width) * 0, width).data();
@ -77,14 +120,14 @@ ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_subsampled(Convert convert,
u32 const vertical_step = 1 << subsampling_vertical;
interpolate_row<subsampling_horizontal>(0, width, plane_u.data(), plane_v.data(), u_row_a, v_row_a);
interpolate_row<subsampling_horizontal>(0, width, plane_u, plane_v, u_row_a, v_row_a);
// Do interpolation for all inner rows.
u32 const rows_end = height - subsampling_vertical;
for (u32 row = 0; row < rows_end; row += vertical_step) {
// Horizontally scale the row if subsampled.
auto uv_row = row >> subsampling_vertical;
interpolate_row<subsampling_horizontal>(uv_row, width, plane_u.data(), plane_v.data(), u_row_b, v_row_b);
interpolate_row<subsampling_horizontal>(uv_row, width, plane_u, plane_v, u_row_b, v_row_b);
// If subsampled vertically, vertically interpolate the middle row between the above and below rows.
if constexpr (subsampling_vertical != 0) {
@ -129,46 +172,60 @@ ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_subsampled(Convert convert,
return {};
}
template<u32 subsampling_horizontal, u32 subsampling_vertical>
static ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_selecting_converter(CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
template<u32 subsampling_horizontal, u32 subsampling_vertical, typename T>
static ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_selecting_converter(CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, void* plane_y_data, void* plane_u_data, void* plane_v_data, Gfx::Bitmap& bitmap)
{
auto const* plane_y = reinterpret_cast<T const*>(plane_y_data);
auto const* plane_u = reinterpret_cast<T const*>(plane_u_data);
auto const* plane_v = reinterpret_cast<T const*>(plane_v_data);
constexpr auto output_cicp = CodingIndependentCodePoints(ColorPrimaries::BT709, TransferCharacteristics::SRGB, MatrixCoefficients::BT709, VideoFullRangeFlag::Full);
if (bit_depth == 8 && cicp.transfer_characteristics() == output_cicp.transfer_characteristics() && cicp.color_primaries() == output_cicp.color_primaries() && cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
switch (cicp.matrix_coefficients()) {
case MatrixCoefficients::BT709:
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT709, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](T y, T u, T v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT709, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
case MatrixCoefficients::BT601:
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT601, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([](T y, T u, T v) { return ColorConverter::convert_simple_yuv_to_rgb<MatrixCoefficients::BT601, VideoFullRangeFlag::Studio>(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
default:
break;
}
}
auto converter = TRY(ColorConverter::create(bit_depth, cicp, output_cicp));
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([&](u16 y, u16 u, u16 v) { return converter.convert_yuv(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
return convert_to_bitmap_subsampled<subsampling_horizontal, subsampling_vertical>([&](T y, T u, T v) { return converter.convert_yuv(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap);
}
static DecoderErrorOr<void> convert_to_bitmap_selecting_subsampling(bool subsampling_horizontal, bool subsampling_vertical, CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray<u16> const& plane_y, FixedArray<u16> const& plane_u, FixedArray<u16> const& plane_v, Gfx::Bitmap& bitmap)
template<u32 subsampling_horizontal, u32 subsampling_vertical>
static ALWAYS_INLINE DecoderErrorOr<void> convert_to_bitmap_selecting_bit_depth(CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, void* plane_y_data, void* plane_u_data, void* plane_v_data, Gfx::Bitmap& bitmap)
{
if (subsampling_horizontal && subsampling_vertical) {
return convert_to_bitmap_selecting_converter<true, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
if (bit_depth <= 8) {
return convert_to_bitmap_selecting_converter<subsampling_horizontal, subsampling_vertical, u8>(cicp, bit_depth, width, height, plane_y_data, plane_u_data, plane_v_data, bitmap);
}
if (subsampling_horizontal && !subsampling_vertical) {
return convert_to_bitmap_selecting_converter<true, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
return convert_to_bitmap_selecting_converter<subsampling_horizontal, subsampling_vertical, u16>(cicp, bit_depth, width, height, plane_y_data, plane_u_data, plane_v_data, bitmap);
}
static DecoderErrorOr<void> convert_to_bitmap_selecting_subsampling(Subsampling subsampling, CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, void* plane_y, void* plane_u, void* plane_v, Gfx::Bitmap& bitmap)
{
if (subsampling.x() && subsampling.y()) {
return convert_to_bitmap_selecting_bit_depth<true, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
}
if (!subsampling_horizontal && subsampling_vertical) {
return convert_to_bitmap_selecting_converter<false, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
if (subsampling.x() && !subsampling.y()) {
return convert_to_bitmap_selecting_bit_depth<true, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
}
return convert_to_bitmap_selecting_converter<false, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
if (!subsampling.x() && subsampling.y()) {
return convert_to_bitmap_selecting_bit_depth<false, true>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
}
return convert_to_bitmap_selecting_bit_depth<false, false>(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap);
}
DecoderErrorOr<void> SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap)
{
return convert_to_bitmap_selecting_subsampling(m_subsampling_horizontal, m_subsampling_vertical, cicp(), bit_depth(), width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap);
return convert_to_bitmap_selecting_subsampling(m_subsampling, cicp(), bit_depth(), width(), height(), m_y_buffer, m_u_buffer, m_v_buffer, bitmap);
}
}

View file

@ -13,6 +13,7 @@
#include <LibMedia/Color/CodingIndependentCodePoints.h>
#include "DecoderError.h"
#include "Subsampling.h"
namespace Media {
@ -56,31 +57,59 @@ public:
static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create(
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
Span<u16> plane_y, Span<u16> plane_u, Span<u16> plane_v);
Subsampling subsampling);
static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create_from_data(
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling,
ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data);
SubsampledYUVFrame(
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
bool subsampling_horizontal, bool subsampling_vertical,
FixedArray<u16>&& plane_y, FixedArray<u16>&& plane_u, FixedArray<u16>&& plane_v)
Subsampling subsampling,
u8* plane_y_data, u8* plane_u_data, u8* plane_v_data)
: VideoFrame(size, bit_depth, cicp)
, m_subsampling_horizontal(subsampling_horizontal)
, m_subsampling_vertical(subsampling_vertical)
, m_plane_y(move(plane_y))
, m_plane_u(move(plane_u))
, m_plane_v(move(plane_v))
, m_subsampling(subsampling)
, m_y_buffer(plane_y_data)
, m_u_buffer(plane_u_data)
, m_v_buffer(plane_v_data)
{
VERIFY(m_y_buffer != nullptr);
VERIFY(m_u_buffer != nullptr);
VERIFY(m_v_buffer != nullptr);
}
~SubsampledYUVFrame();
DecoderErrorOr<void> output_to_bitmap(Gfx::Bitmap& bitmap) override;
u8* get_raw_plane_data(u32 plane)
{
switch (plane) {
case 0:
return m_y_buffer;
case 1:
return m_u_buffer;
case 2:
return m_v_buffer;
}
VERIFY_NOT_REACHED();
}
template<typename T>
T* get_plane_data(u32 plane)
{
VERIFY((IsSame<T, u8>) == (bit_depth() <= 8));
return reinterpret_cast<T*>(get_raw_plane_data(plane));
}
protected:
bool m_subsampling_horizontal;
bool m_subsampling_vertical;
FixedArray<u16> m_plane_y;
FixedArray<u16> m_plane_u;
FixedArray<u16> m_plane_v;
Subsampling m_subsampling;
u8* m_y_buffer = nullptr;
u8* m_u_buffer = nullptr;
u8* m_v_buffer = nullptr;
};
}