LibMedia: Give frame timestamps to FFmpeg decoders

H.264 in Matroska can have blocks with unordered timestamps. Without
passing these as the presentation timestamp into the FFmpeg decoder,
the frames will not be returned in chronological order.

VideoFrame will now include a timestamp that is used by the
PlaybackManager, rather than assuming that it is the same timestamp
returned by the demuxer.
This commit is contained in:
Zaggy1024 2024-06-19 18:29:12 -05:00 committed by Andrew Kaster
parent 084cf68dd5
commit f6a4973578
Notes: sideshowbarker 2024-07-17 02:22:23 +09:00
11 changed files with 42 additions and 26 deletions

View file

@ -11,6 +11,6 @@ extern "C" int LLVMFuzzerTestOneInput(u8 const* data, size_t size)
{
AK::set_debug_enabled(false);
Media::Video::VP9::Decoder vp9_decoder;
(void)vp9_decoder.receive_sample({ data, size });
(void)vp9_decoder.receive_sample(Duration::zero(), { data, size });
return 0;
}

View file

@ -37,7 +37,7 @@ static inline void decode_video(StringView path, size_t expected_frame_count, T
auto block = block_result.release_value();
for (auto const& frame : block.frames()) {
MUST(decoder->receive_sample(frame));
MUST(decoder->receive_sample(block.timestamp(), frame));
while (true) {
auto frame_result = decoder->get_decoded_frame();
if (frame_result.is_error()) {

View file

@ -35,7 +35,7 @@ TEST_CASE(vp9_malformed_frame)
for (auto test_input : test_inputs) {
auto file = MUST(Core::MappedFile::map(test_input));
Media::Video::VP9::Decoder vp9_decoder;
auto maybe_decoder_error = vp9_decoder.receive_sample(file->bytes());
auto maybe_decoder_error = vp9_decoder.receive_sample(Duration::zero(), file->bytes());
EXPECT(maybe_decoder_error.is_error());
}
}

View file

@ -101,12 +101,14 @@ FFmpegVideoDecoder::~FFmpegVideoDecoder()
avcodec_free_context(&m_codec_context);
}
DecoderErrorOr<void> FFmpegVideoDecoder::receive_sample(ReadonlyBytes sample)
DecoderErrorOr<void> FFmpegVideoDecoder::receive_sample(Duration timestamp, ReadonlyBytes sample)
{
VERIFY(sample.size() < NumericLimits<int>::max());
m_packet->data = const_cast<u8*>(sample.data());
m_packet->size = static_cast<int>(sample.size());
m_packet->pts = timestamp.to_microseconds();
m_packet->dts = m_packet->pts;
auto result = avcodec_send_packet(m_codec_context, m_packet);
switch (result) {
@ -187,7 +189,8 @@ DecoderErrorOr<NonnullOwnPtr<VideoFrame>> FFmpegVideoDecoder::get_decoded_frame(
auto size = Gfx::Size<u32> { m_frame->width, m_frame->height };
auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(size, bit_depth, cicp, subsampling));
auto timestamp = Duration::from_microseconds(m_frame->pts);
auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(timestamp, size, bit_depth, cicp, subsampling));
for (u32 plane = 0; plane < 3; plane++) {
VERIFY(m_frame->linesize[plane] != 0);

View file

@ -19,7 +19,7 @@ public:
FFmpegVideoDecoder(AVCodecContext* codec_context, AVPacket* packet, AVFrame* frame);
~FFmpegVideoDecoder();
DecoderErrorOr<void> receive_sample(ReadonlyBytes sample) override;
DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes sample) override;
DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() override;
private:

View file

@ -215,7 +215,7 @@ void PlaybackManager::decode_and_queue_one_sample()
auto sample = sample_result.release_value();
// Submit the sample to the decoder.
auto decode_result = m_decoder->receive_sample(sample.data());
auto decode_result = m_decoder->receive_sample(sample.timestamp(), sample.data());
if (decode_result.is_error()) {
item_to_enqueue = FrameQueueItem::error_marker(decode_result.release_error(), sample.timestamp());
break;
@ -262,9 +262,9 @@ void PlaybackManager::decode_and_queue_one_sample()
auto bitmap_result = decoded_frame->to_bitmap();
if (bitmap_result.is_error())
item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), sample.timestamp());
item_to_enqueue = FrameQueueItem::error_marker(bitmap_result.release_error(), decoded_frame->timestamp());
else
item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), sample.timestamp());
item_to_enqueue = FrameQueueItem::frame(bitmap_result.release_value(), decoded_frame->timestamp());
break;
}
}

View file

@ -25,12 +25,12 @@ Decoder::Decoder()
{
}
DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data)
DecoderErrorOr<void> Decoder::receive_sample(Duration timestamp, ReadonlyBytes chunk_data)
{
auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
if (superframe_sizes.is_empty()) {
return decode_frame(chunk_data);
return decode_frame(timestamp, chunk_data);
}
size_t offset = 0;
@ -41,14 +41,14 @@ DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data)
if (checked_size.has_overflow() || checked_size.value() > chunk_data.size())
return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Superframe size invalid"sv);
auto frame_data = chunk_data.slice(offset, superframe_size);
TRY(decode_frame(frame_data));
TRY(decode_frame(timestamp, frame_data));
offset = checked_size.value();
}
return {};
}
DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
DecoderErrorOr<void> Decoder::decode_frame(Duration timestamp, ReadonlyBytes frame_data)
{
// 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
// tables include function calls indicating when the block decode processes should be triggered.
@ -69,11 +69,11 @@ DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
if (frame_context.shows_a_frame()) {
switch (frame_context.color_config.bit_depth) {
case 8:
TRY(create_video_frame<u8>(frame_context));
TRY(create_video_frame<u8>(timestamp, frame_context));
break;
case 10:
case 12:
TRY(create_video_frame<u16>(frame_context));
TRY(create_video_frame<u16>(timestamp, frame_context));
break;
}
}
@ -143,7 +143,7 @@ inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& fram
}
template<typename T>
DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_context)
DecoderErrorOr<void> Decoder::create_video_frame(Duration timestamp, FrameContext const& frame_context)
{
// (8.9) Output process
@ -162,6 +162,7 @@ DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_conte
auto output_uv_size = subsampling.subsampled_size(output_y_size);
auto frame = DECODER_TRY_ALLOC(SubsampledYUVFrame::try_create(
timestamp,
{ output_y_size.width(), output_y_size.height() },
frame_context.color_config.bit_depth, get_cicp_color_space(frame_context),
subsampling));

View file

@ -28,7 +28,7 @@ public:
Decoder();
~Decoder() override { }
/* (8.1) General */
DecoderErrorOr<void> receive_sample(ReadonlyBytes) override;
DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes) override;
DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() override;
@ -41,9 +41,9 @@ private:
// Based on the maximum for TXSize.
static constexpr size_t maximum_transform_size = 32ULL * 32ULL;
DecoderErrorOr<void> decode_frame(ReadonlyBytes);
DecoderErrorOr<void> decode_frame(Duration timestamp, ReadonlyBytes);
template<typename T>
DecoderErrorOr<void> create_video_frame(FrameContext const&);
DecoderErrorOr<void> create_video_frame(Duration timestamp, FrameContext const&);
DecoderErrorOr<void> allocate_buffers(FrameContext const&);
Vector<u16>& get_output_buffer(u8 plane);

View file

@ -8,6 +8,7 @@
#include <AK/ByteBuffer.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/Time.h>
#include "DecoderError.h"
@ -17,8 +18,8 @@ class VideoDecoder {
public:
virtual ~VideoDecoder() {};
virtual DecoderErrorOr<void> receive_sample(ReadonlyBytes sample) = 0;
DecoderErrorOr<void> receive_sample(ByteBuffer const& sample) { return receive_sample(sample.span()); }
virtual DecoderErrorOr<void> receive_sample(Duration timestamp, ReadonlyBytes sample) = 0;
DecoderErrorOr<void> receive_sample(Duration timestamp, ByteBuffer const& sample) { return receive_sample(timestamp, sample.span()); }
virtual DecoderErrorOr<NonnullOwnPtr<VideoFrame>> get_decoded_frame() = 0;
};

View file

@ -13,6 +13,7 @@
namespace Media {
ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling)
@ -35,16 +36,17 @@ ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create(
auto* u_buffer = TRY(alloc_buffer(uv_data_size));
auto* v_buffer = TRY(alloc_buffer(uv_data_size));
return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer));
return adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(timestamp, size, bit_depth, cicp, subsampling, y_buffer, u_buffer, v_buffer));
}
ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> SubsampledYUVFrame::try_create_from_data(
Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling,
ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data)
{
auto frame = TRY(try_create(size, bit_depth, cicp, subsampling));
auto frame = TRY(try_create(timestamp, size, bit_depth, cicp, subsampling));
size_t component_size = bit_depth > 8 ? sizeof(u16) : sizeof(u8);
auto y_data_size = size.to_type<size_t>().area() * component_size;

View file

@ -8,6 +8,7 @@
#include <AK/ByteBuffer.h>
#include <AK/FixedArray.h>
#include <AK/Time.h>
#include <LibGfx/Bitmap.h>
#include <LibGfx/Size.h>
#include <LibMedia/Color/CodingIndependentCodePoints.h>
@ -30,6 +31,8 @@ public:
return bitmap;
}
inline Duration timestamp() const { return m_timestamp; }
inline Gfx::Size<u32> size() const { return m_size; }
inline u32 width() const { return size().width(); }
inline u32 height() const { return size().height(); }
@ -38,14 +41,17 @@ public:
inline CodingIndependentCodePoints& cicp() { return m_cicp; }
protected:
VideoFrame(Gfx::Size<u32> size,
VideoFrame(Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp)
: m_size(size)
: m_timestamp(timestamp)
, m_size(size)
, m_bit_depth(bit_depth)
, m_cicp(cicp)
{
}
Duration m_timestamp;
Gfx::Size<u32> m_size;
u8 m_bit_depth;
CodingIndependentCodePoints m_cicp;
@ -55,22 +61,25 @@ class SubsampledYUVFrame : public VideoFrame {
public:
static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create(
Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling);
static ErrorOr<NonnullOwnPtr<SubsampledYUVFrame>> try_create_from_data(
Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling,
ReadonlyBytes y_data, ReadonlyBytes u_data, ReadonlyBytes v_data);
SubsampledYUVFrame(
Duration timestamp,
Gfx::Size<u32> size,
u8 bit_depth, CodingIndependentCodePoints cicp,
Subsampling subsampling,
u8* plane_y_data, u8* plane_u_data, u8* plane_v_data)
: VideoFrame(size, bit_depth, cicp)
: VideoFrame(timestamp, size, bit_depth, cicp)
, m_subsampling(subsampling)
, m_y_buffer(plane_y_data)
, m_u_buffer(plane_u_data)