From f6830eaf73e3a18d4a386f6f164de5ff70d76cd3 Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Sun, 13 Nov 2022 19:28:56 -0600 Subject: [PATCH] LibVideo: Implement Matroska Cues for faster keyframe lookup This implements the fastest seeking mode available for tracks with cues using an array of cue points for each track. It approximates the index based on the seeking timestamp and then finds the earliest cue point before the timestamp. The approximation assumes that cues will be on a regular interval, which I don't believe is always the case, but it should at least be faster than iterating the whole set of cue points each time. Cues are stored per track, but most videos will only have cue points for the video track(s) that are present. For now, this assumes that it should only seek based on the cue points for the selected track. To seek audio in a video file, we should copy the seeked iterator over to the audio track's iterator after seeking is complete. The iterator will then skip to the next audio block. --- .../LibVideo/Containers/Matroska/Document.h | 28 +++ .../LibVideo/Containers/Matroska/Reader.cpp | 225 +++++++++++++++++- .../LibVideo/Containers/Matroska/Reader.h | 11 + .../Libraries/LibVideo/PlaybackManager.cpp | 2 +- Userland/Utilities/matroska.cpp | 24 ++ 5 files changed, 288 insertions(+), 2 deletions(-) diff --git a/Userland/Libraries/LibVideo/Containers/Matroska/Document.h b/Userland/Libraries/LibVideo/Containers/Matroska/Document.h index 195647fc1ab..eab1174431b 100644 --- a/Userland/Libraries/LibVideo/Containers/Matroska/Document.h +++ b/Userland/Libraries/LibVideo/Containers/Matroska/Document.h @@ -203,4 +203,32 @@ private: Time m_timestamp { Time::zero() }; }; +class CueTrackPosition { +public: + u64 track_number() const { return m_track_number; } + void set_track_number(u64 track_number) { m_track_number = track_number; } + size_t cluster_position() const { return m_cluster_position; } + void set_cluster_position(size_t cluster_position) { m_cluster_position = cluster_position; } + size_t block_offset() const { return m_block_offset; } + void set_block_offset(size_t block_offset) { m_block_offset = block_offset; } + +private: + u64 m_track_number { 0 }; + size_t m_cluster_position { 0 }; + size_t m_block_offset { 0 }; +}; + +class CuePoint { +public: + Time timestamp() const { return m_timestamp; } + void set_timestamp(Time timestamp) { m_timestamp = timestamp; } + OrderedHashMap& track_positions() { return m_track_positions; } + OrderedHashMap const& track_positions() const { return m_track_positions; } + Optional position_for_track(u64 track_number) const { return m_track_positions.get(track_number); } + +private: + Time m_timestamp = Time::min(); + OrderedHashMap m_track_positions; +}; + } diff --git a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp index abc4f0f9582..1529a9f4644 100644 --- a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp +++ b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp @@ -66,6 +66,17 @@ constexpr u32 BIT_DEPTH_ID = 0x6264; constexpr u32 SIMPLE_BLOCK_ID = 0xA3; constexpr u32 TIMESTAMP_ID = 0xE7; +// Cues +constexpr u32 CUES_ID = 0x1C53BB6B; +constexpr u32 CUE_POINT_ID = 0xBB; +constexpr u32 CUE_TIME_ID = 0xB3; +constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7; +constexpr u32 CUE_TRACK_ID = 0xF7; +constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1; +constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0; +constexpr u32 CUE_CODEC_STATE_ID = 0xEA; +constexpr u32 CUE_REFERENCE_ID = 0xDB; + DecoderErrorOr Reader::from_file(StringView path) { auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path)); @@ -625,6 +636,175 @@ DecoderErrorOr Reader::create_sample_iterator(u64 track_number) return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position); } +static DecoderErrorOr parse_cue_track_position(Streamer& streamer) +{ + CueTrackPosition track_position; + + bool had_cluster_position = false; + + TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + switch (element_id) { + case CUE_TRACK_ID: + track_position.set_track_number(TRY_READ(streamer.read_u64())); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions track number {}", track_position.track_number()); + break; + case CUE_CLUSTER_POSITION_ID: + track_position.set_cluster_position(TRY_READ(streamer.read_u64())); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions cluster position {}", track_position.cluster_position()); + had_cluster_position = true; + break; + case CUE_RELATIVE_POSITION_ID: + track_position.set_block_offset(TRY_READ(streamer.read_u64())); + dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions relative position {}", track_position.block_offset()); + break; + case CUE_CODEC_STATE_ID: + // Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state. + // FIXME: Do something with this value. + dbgln_if(MATROSKA_DEBUG, "Found CodecState, skipping"); + TRY_READ(streamer.read_unknown_element()); + break; + case CUE_REFERENCE_ID: + return DecoderError::not_implemented(); + default: + TRY_READ(streamer.read_unknown_element()); + break; + } + + return IterationDecision::Continue; + })); + + if (track_position.track_number() == 0) + return DecoderError::corrupted("Track number was not present or 0"sv); + + if (!had_cluster_position) + return DecoderError::corrupted("Cluster was missing the cluster position"sv); + + return track_position; +} + +static DecoderErrorOr parse_cue_point(Streamer& streamer, u64 timestamp_scale) +{ + CuePoint cue_point; + + TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + switch (element_id) { + case CUE_TIME_ID: { + // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element: + // > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks. + // Matroska Ticks are specified in https://www.matroska.org/technical/notes.html: + // > For such elements, the timestamp value is stored directly in nanoseconds. + // However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees: + // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941 + // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416 + // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392 + // Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^( + auto timestamp = Time::from_nanoseconds(static_cast(TRY_READ(streamer.read_u64()) * timestamp_scale)); + cue_point.set_timestamp(timestamp); + dbgln_if(MATROSKA_DEBUG, "Read CuePoint timestamp {}ms", cue_point.timestamp().to_milliseconds()); + break; + } + case CUE_TRACK_POSITIONS_ID: { + auto track_position = TRY_READ(parse_cue_track_position(streamer)); + DECODER_TRY_ALLOC(cue_point.track_positions().try_set(track_position.track_number(), track_position)); + break; + } + default: + TRY_READ(streamer.read_unknown_element()); + break; + } + + return IterationDecision::Continue; + })); + + if (cue_point.timestamp().is_negative()) + return DecoderError::corrupted("CuePoint was missing a timestamp"sv); + + if (cue_point.track_positions().is_empty()) + return DecoderError::corrupted("CuePoint was missing track positions"sv); + + return cue_point; +} + +DecoderErrorOr Reader::parse_cues(Streamer& streamer) +{ + m_cues.clear(); + + TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + switch (element_id) { + case CUE_POINT_ID: { + auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale())); + + // FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now, + // but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead. + + for (auto track_position_entry : cue_point.track_positions()) { + if (!m_cues.contains(track_position_entry.key)) + DECODER_TRY_ALLOC(m_cues.try_set(track_position_entry.key, Vector())); + Vector& cue_points_for_track = m_cues.get(track_position_entry.key).release_value(); + cue_points_for_track.append(cue_point); + } + break; + } + default: + return DecoderError::format(DecoderErrorCategory::Corrupted, "Unknown Cues child ID {:#010x}", element_id); + } + + return IterationDecision::Continue; + })); + + return {}; +} + +DecoderErrorOr Reader::ensure_cues_are_parsed() +{ + if (m_cues_have_been_parsed) + return {}; + auto position = TRY(find_first_top_level_element_with_id("Cues"sv, CUES_ID)); + if (!position.has_value()) + return DecoderError::corrupted("No Tracks element found"sv); + Streamer streamer { m_data }; + TRY_READ(streamer.seek_to_position(position.release_value())); + TRY(parse_cues(streamer)); + m_cues_have_been_parsed = true; + return {}; +} + +DecoderErrorOr Reader::seek_to_cue_for_timestamp(SampleIterator& iterator, Time const& timestamp) +{ + auto const& cue_points = MUST(cue_points_for_track(iterator.m_track.track_number())).release_value(); + + // Take a guess at where in the cues the timestamp will be and correct from there. + auto duration = TRY(segment_information()).duration(); + size_t index = 0; + if (duration.has_value()) + index = clamp(((timestamp.to_nanoseconds() * cue_points.size()) / TRY(segment_information()).duration()->to_nanoseconds()), 0, cue_points.size() - 1); + + CuePoint const* prev_cue_point = &cue_points[index]; + + if (prev_cue_point->timestamp() == timestamp) { + TRY(iterator.seek_to_cue_point(*prev_cue_point)); + return {}; + } + + if (prev_cue_point->timestamp() > timestamp) { + while (index > 0 && prev_cue_point->timestamp() > timestamp) + prev_cue_point = &cue_points[--index]; + TRY(iterator.seek_to_cue_point(*prev_cue_point)); + return {}; + } + + while (index < cue_points.size()) { + auto const& cue_point = cue_points[index++]; + if (cue_point.timestamp() > timestamp) + break; + prev_cue_point = &cue_point; + index++; + } + + TRY(iterator.seek_to_cue_point(*prev_cue_point)); + return {}; +} + static DecoderErrorOr find_keyframe_before_timestamp(SampleIterator& iterator, Time const& timestamp) { #if MATROSKA_DEBUG @@ -662,9 +842,27 @@ static DecoderErrorOr find_keyframe_before_timestamp(SampleIterator& itera return false; } +DecoderErrorOr Reader::has_cues_for_track(u64 track_number) +{ + TRY(ensure_cues_are_parsed()); + return m_cues.contains(track_number); +} + DecoderErrorOr Reader::seek_to_random_access_point(SampleIterator& iterator, Time timestamp) { - // FIXME: Use Cues to look these up if the element is present. + if (iterator.m_last_timestamp == timestamp) + return {}; + + if (TRY(has_cues_for_track(iterator.m_track.track_number()))) { + auto seeked_iterator = iterator; + TRY(seek_to_cue_for_timestamp(seeked_iterator, timestamp)); + VERIFY(seeked_iterator.m_last_timestamp <= timestamp); + + // We only need to seek to a keyframe if it's not faster to continue from the current position. + if (timestamp < iterator.m_last_timestamp || seeked_iterator.m_last_timestamp > iterator.m_last_timestamp) + iterator = seeked_iterator; + return {}; + } // FIXME: This could cache the keyframes it finds. Is it worth doing? Probably not, most files will have Cues :^) if (timestamp < iterator.last_timestamp() || iterator.last_timestamp().is_negative()) { @@ -683,6 +881,12 @@ DecoderErrorOr Reader::seek_to_random_access_point(SampleIterator& iterato return {}; } +DecoderErrorOr const&>> Reader::cue_points_for_track(u64 track_number) +{ + TRY(ensure_cues_are_parsed()); + return m_cues.get(track_number); +} + DecoderErrorOr SampleIterator::next_block() { if (m_position >= m_data.size()) @@ -726,6 +930,25 @@ DecoderErrorOr SampleIterator::next_block() return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv); } +DecoderErrorOr SampleIterator::seek_to_cue_point(CuePoint const& cue_point) +{ + // This is a private function. The position getter can return optional, but the caller should already know that this track has a position. + auto const& cue_position = cue_point.position_for_track(m_track.track_number()).release_value(); + Streamer streamer { m_data }; + TRY_READ(streamer.seek_to_position(cue_position.cluster_position())); + + auto element_id = TRY_READ(streamer.read_variable_size_integer(false)); + if (element_id != CLUSTER_ELEMENT_ID) + return DecoderError::corrupted("Cue point's cluster position didn't point to a cluster"sv); + + m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale)); + dbgln_if(MATROSKA_DEBUG, "SampleIterator set to cue point at timestamp {}ms", m_current_cluster->timestamp().to_milliseconds()); + + m_position = streamer.position() + cue_position.block_offset(); + m_last_timestamp = cue_point.timestamp(); + return {}; +} + ErrorOr Streamer::read_string() { auto string_length = TRY(read_variable_size_integer()); diff --git a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h index 69907846ca3..b1c6b955bd0 100644 --- a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h +++ b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h @@ -41,6 +41,8 @@ public: DecoderErrorOr create_sample_iterator(u64 track_number); DecoderErrorOr seek_to_random_access_point(SampleIterator&, Time); + DecoderErrorOr const&>> cue_points_for_track(u64 track_number); + DecoderErrorOr has_cues_for_track(u64 track_number); private: Reader(ReadonlyBytes data) @@ -55,6 +57,10 @@ private: DecoderErrorOr ensure_tracks_are_parsed(); DecoderErrorOr parse_tracks(Streamer&); + DecoderErrorOr parse_cues(Streamer&); + DecoderErrorOr ensure_cues_are_parsed(); + DecoderErrorOr seek_to_cue_for_timestamp(SampleIterator&, Time const&); + RefPtr m_mapped_file; ReadonlyBytes m_data; @@ -69,6 +75,10 @@ private: Optional m_segment_information; OrderedHashMap m_tracks; + + // The vectors must be sorted by timestamp at all times. + HashMap> m_cues; + bool m_cues_have_been_parsed { false }; }; class SampleIterator { @@ -90,6 +100,7 @@ private: } DecoderErrorOr set_position(size_t position); + DecoderErrorOr seek_to_cue_point(CuePoint const& cue_point); RefPtr m_file; ReadonlyBytes m_data; diff --git a/Userland/Libraries/LibVideo/PlaybackManager.cpp b/Userland/Libraries/LibVideo/PlaybackManager.cpp index 22941e11494..17d2c52f33c 100644 --- a/Userland/Libraries/LibVideo/PlaybackManager.cpp +++ b/Userland/Libraries/LibVideo/PlaybackManager.cpp @@ -121,7 +121,7 @@ void PlaybackManager::on_decoder_error(DecoderError error) void PlaybackManager::end_seek() { - dbgln_if(PLAYBACK_MANAGER_DEBUG, "We've finished seeking, reset seek target and play"); + dbgln_if(PLAYBACK_MANAGER_DEBUG, "We've finished seeking, set media time to seek time at {}ms and change status", m_seek_to_media_time.to_milliseconds()); VERIFY(!m_seek_to_media_time.is_negative()); m_last_present_in_media_time = m_seek_to_media_time; m_seek_to_media_time = Time::min(); diff --git a/Userland/Utilities/matroska.cpp b/Userland/Utilities/matroska.cpp index d23013fdf93..f3e3b7bebb3 100644 --- a/Userland/Utilities/matroska.cpp +++ b/Userland/Utilities/matroska.cpp @@ -24,10 +24,12 @@ ErrorOr serenity_main(Main::Arguments arguments) { StringView filename; bool blocks = false; + bool cues = false; u64 track_number = 0; Core::ArgsParser args_parser; args_parser.add_option(blocks, "Print blocks for each track.", "blocks", 'b'); + args_parser.add_option(cues, "Print cue points for each track.", "cues", 'c'); args_parser.add_option(track_number, "Specify a track number to print info for, omit to print all of them.", "track", 't', "tracknumber"); args_parser.add_positional_argument(filename, "The video file to display.", "filename", Core::ArgsParser::Required::Yes); args_parser.parse(arguments); @@ -61,6 +63,28 @@ ErrorOr serenity_main(Main::Arguments arguments) outln("\t\tAudio has {} channels with a bit depth of {}", audio_track.channels, audio_track.bit_depth); } + if (cues) { + auto const& cue_points = TRY(reader.cue_points_for_track(track_entry.track_number())); + + if (cue_points.has_value()) { + outln("\tCues points:"); + + for (auto const& cue_point : cue_points.value()) { + outln("\t\tCue point at {}ms:", cue_point.timestamp().to_milliseconds()); + auto const& track_position = cue_point.position_for_track(track_entry.track_number()); + + if (!track_position.has_value()) { + outln("\t\t\tCue point has no positions for this track, this should not happen"); + continue; + } + outln("\t\t\tCluster position {}", track_position->cluster_position()); + outln("\t\t\tBlock offset {}", track_position->block_offset()); + } + } else { + outln("\tNo cue points exist for this track"); + } + } + if (blocks) { outln("\tBlocks:"); auto iterator = TRY(reader.create_sample_iterator(track_entry.track_number()));