Reader.cpp 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. * Copyright (c) 2022-2023, Gregory Bertilson <Zaggy1024@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/Function.h>
  9. #include <AK/Optional.h>
  10. #include <AK/Time.h>
  11. #include <AK/Utf8View.h>
  12. #include <LibCore/MappedFile.h>
  13. #include "Reader.h"
  14. namespace Video::Matroska {
  15. #define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
  16. // RFC 8794 - Extensible Binary Meta Language
  17. // https://datatracker.ietf.org/doc/html/rfc8794
  18. constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
  19. constexpr u32 EBML_CRC32_ELEMENT_ID = 0xBF;
  20. constexpr u32 EBML_VOID_ELEMENT_ID = 0xEC;
  21. // Matroska elements' IDs and types are listed at this URL:
  22. // https://www.matroska.org/technical/elements.html
  23. constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
  24. constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
  25. constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
  26. constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74;
  27. constexpr u32 SEEK_ELEMENT_ID = 0x4DBB;
  28. constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB;
  29. constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC;
  30. constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
  31. constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
  32. constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
  33. constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
  34. constexpr u32 MUXING_APP_ID = 0x4D80;
  35. constexpr u32 WRITING_APP_ID = 0x5741;
  36. constexpr u32 DURATION_ID = 0x4489;
  37. // Tracks
  38. constexpr u32 TRACK_ENTRY_ID = 0xAE;
  39. constexpr u32 TRACK_NUMBER_ID = 0xD7;
  40. constexpr u32 TRACK_UID_ID = 0x73C5;
  41. constexpr u32 TRACK_TYPE_ID = 0x83;
  42. constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
  43. constexpr u32 TRACK_CODEC_ID = 0x86;
  44. constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
  45. constexpr u32 TRACK_OFFSET_ID = 0x537F;
  46. constexpr u32 TRACK_VIDEO_ID = 0xE0;
  47. constexpr u32 TRACK_AUDIO_ID = 0xE1;
  48. // Video
  49. constexpr u32 PIXEL_WIDTH_ID = 0xB0;
  50. constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
  51. constexpr u32 COLOR_ENTRY_ID = 0x55B0;
  52. constexpr u32 PRIMARIES_ID = 0x55BB;
  53. constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA;
  54. constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1;
  55. constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2;
  56. // Audio
  57. constexpr u32 CHANNELS_ID = 0x9F;
  58. constexpr u32 BIT_DEPTH_ID = 0x6264;
  59. // Clusters
  60. constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
  61. constexpr u32 TIMESTAMP_ID = 0xE7;
  62. // Cues
  63. constexpr u32 CUES_ID = 0x1C53BB6B;
  64. constexpr u32 CUE_POINT_ID = 0xBB;
  65. constexpr u32 CUE_TIME_ID = 0xB3;
  66. constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7;
  67. constexpr u32 CUE_TRACK_ID = 0xF7;
  68. constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1;
  69. constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0;
  70. constexpr u32 CUE_CODEC_STATE_ID = 0xEA;
  71. constexpr u32 CUE_REFERENCE_ID = 0xDB;
  72. DecoderErrorOr<Reader> Reader::from_file(StringView path)
  73. {
  74. auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path));
  75. return from_mapped_file(move(mapped_file));
  76. }
  77. DecoderErrorOr<Reader> Reader::from_mapped_file(NonnullOwnPtr<Core::MappedFile> mapped_file)
  78. {
  79. auto reader = TRY(from_data(mapped_file->bytes()));
  80. reader.m_mapped_file = make_ref_counted<Core::SharedMappedFile>(move(mapped_file));
  81. return reader;
  82. }
  83. DecoderErrorOr<Reader> Reader::from_data(ReadonlyBytes data)
  84. {
  85. Reader reader(data);
  86. TRY(reader.parse_initial_data());
  87. return reader;
  88. }
  89. // Returns the position of the first element that is read from this master element.
  90. static DecoderErrorOr<size_t> parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function<DecoderErrorOr<IterationDecision>(u64)> element_consumer)
  91. {
  92. auto element_data_size = TRY_READ(streamer.read_variable_size_integer());
  93. dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size);
  94. bool first_element = true;
  95. auto first_element_position = streamer.position();
  96. streamer.push_octets_read();
  97. while (streamer.octets_read() < element_data_size) {
  98. dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
  99. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  100. dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id);
  101. if (element_id == EBML_CRC32_ELEMENT_ID) {
  102. // The CRC-32 Element contains a 32-bit Cyclic Redundancy Check value of all the
  103. // Element Data of the Parent Element as stored except for the CRC-32 Element itself.
  104. // When the CRC-32 Element is present, the CRC-32 Element MUST be the first ordered
  105. // EBML Element within its Parent Element for easier reading.
  106. if (!first_element)
  107. return DecoderError::corrupted("CRC32 element must be the first child"sv);
  108. // All Top-Level Elements of an EBML Document that are Master Elements SHOULD include a
  109. // CRC-32 Element as a Child Element. The CRC in use is the IEEE-CRC-32 algorithm as used
  110. // in the [ISO3309] standard and in Section 8.1.1.6.2 of [ITU.V42], with initial value of
  111. // 0xFFFFFFFF. The CRC value MUST be computed on a little-endian bytestream and MUST use
  112. // little-endian storage.
  113. // FIXME: Currently we skip the CRC-32 Element instead of checking it. It may be worth
  114. // verifying the contents of the SeekHead, Segment Info, and Tracks Elements.
  115. // Note that Cluster Elements tend to be quite large, so verifying their integrity
  116. // will result in longer buffering times in streamed contexts, so it may not be
  117. // worth the effort checking those. It would also prevent error correction in
  118. // video codecs from taking effect.
  119. TRY_READ(streamer.read_unknown_element());
  120. continue;
  121. }
  122. if (element_id == EBML_VOID_ELEMENT_ID) {
  123. // Used to void data or to avoid unexpected behaviors when using damaged data.
  124. // The content is discarded. Also used to reserve space in a subelement for later use.
  125. TRY_READ(streamer.read_unknown_element());
  126. continue;
  127. }
  128. auto result = element_consumer(element_id);
  129. if (result.is_error())
  130. return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description());
  131. if (result.release_value() == IterationDecision::Break)
  132. break;
  133. dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name);
  134. first_element = false;
  135. }
  136. streamer.pop_octets_read();
  137. return first_element_position;
  138. }
  139. static DecoderErrorOr<EBMLHeader> parse_ebml_header(Streamer& streamer)
  140. {
  141. EBMLHeader header;
  142. TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  143. switch (element_id) {
  144. case DOCTYPE_ELEMENT_ID:
  145. header.doc_type = TRY_READ(streamer.read_string());
  146. dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", header.doc_type);
  147. break;
  148. case DOCTYPE_VERSION_ELEMENT_ID:
  149. header.doc_type_version = TRY_READ(streamer.read_u64());
  150. dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", header.doc_type_version);
  151. break;
  152. default:
  153. TRY_READ(streamer.read_unknown_element());
  154. }
  155. return IterationDecision::Continue;
  156. }));
  157. return header;
  158. }
  159. DecoderErrorOr<void> Reader::parse_initial_data()
  160. {
  161. Streamer streamer { m_data };
  162. auto first_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  163. dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id);
  164. if (first_element_id != EBML_MASTER_ELEMENT_ID)
  165. return DecoderError::corrupted("First element was not an EBML header"sv);
  166. m_header = TRY(parse_ebml_header(streamer));
  167. dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
  168. auto root_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  169. if (root_element_id != SEGMENT_ELEMENT_ID)
  170. return DecoderError::corrupted("Second element was not a segment element"sv);
  171. m_segment_contents_size = TRY_READ(streamer.read_variable_size_integer());
  172. m_segment_contents_position = streamer.position();
  173. dbgln_if(true, "Segment is at {} with size {}, available size is {}", m_segment_contents_position, m_segment_contents_size, m_data.size() - m_segment_contents_position);
  174. m_segment_contents_size = min(m_segment_contents_size, m_data.size() - m_segment_contents_position);
  175. return {};
  176. }
  177. static DecoderErrorOr<void> parse_seek_head(Streamer& streamer, size_t base_position, HashMap<u32, size_t>& table)
  178. {
  179. TRY(parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id) -> DecoderErrorOr<IterationDecision> {
  180. if (seek_head_child_id == SEEK_ELEMENT_ID) {
  181. Optional<u64> seek_id;
  182. Optional<u64> seek_position;
  183. TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id) -> DecoderErrorOr<IterationDecision> {
  184. switch (seek_entry_child_id) {
  185. case SEEK_ID_ELEMENT_ID:
  186. seek_id = TRY_READ(streamer.read_u64());
  187. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Element ID value {:#010x}", seek_id.value());
  188. break;
  189. case SEEK_POSITION_ELEMENT_ID:
  190. seek_position = TRY_READ(streamer.read_u64());
  191. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Position value {}", seek_position.value());
  192. break;
  193. default:
  194. TRY_READ(streamer.read_unknown_element());
  195. }
  196. return IterationDecision::Continue;
  197. }));
  198. if (!seek_id.has_value())
  199. return DecoderError::corrupted("Seek entry is missing the element ID"sv);
  200. if (!seek_position.has_value())
  201. return DecoderError::corrupted("Seek entry is missing the seeking position"sv);
  202. if (seek_id.value() > NumericLimits<u32>::max())
  203. return DecoderError::corrupted("Seek entry's element ID is too large"sv);
  204. dbgln_if(MATROSKA_TRACE_DEBUG, "Seek entry found with ID {:#010x} and position {} offset from SeekHead at {}", seek_id.value(), seek_position.value(), base_position);
  205. // FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
  206. if (table.contains(seek_id.value())) {
  207. dbgln_if(MATROSKA_DEBUG, "Warning: Duplicate seek entry with ID {:#010x} at position {}", seek_id.value(), seek_position.value());
  208. return IterationDecision::Continue;
  209. }
  210. DECODER_TRY_ALLOC(table.try_set(seek_id.release_value(), base_position + seek_position.release_value()));
  211. } else {
  212. dbgln_if(MATROSKA_TRACE_DEBUG, "Unknown SeekHead child element ID {:#010x}", seek_head_child_id);
  213. }
  214. return IterationDecision::Continue;
  215. }));
  216. return {};
  217. }
  218. DecoderErrorOr<Optional<size_t>> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id)
  219. {
  220. dbgln_if(MATROSKA_DEBUG, "====== Finding element {} with ID {:#010x} ======", element_name, element_id);
  221. if (m_seek_entries.contains(element_id)) {
  222. dbgln_if(MATROSKA_TRACE_DEBUG, "Cache hit!");
  223. return m_seek_entries.get(element_id).release_value();
  224. }
  225. Streamer streamer { m_data };
  226. if (m_last_top_level_element_position != 0)
  227. TRY_READ(streamer.seek_to_position(m_last_top_level_element_position));
  228. else
  229. TRY_READ(streamer.seek_to_position(m_segment_contents_position));
  230. Optional<size_t> position;
  231. while (streamer.position() < m_segment_contents_position + m_segment_contents_size) {
  232. auto found_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  233. auto found_element_position = streamer.position();
  234. dbgln_if(MATROSKA_TRACE_DEBUG, "Found element ID {:#010x} with position {}.", found_element_id, found_element_position);
  235. if (found_element_id == SEEK_HEAD_ELEMENT_ID) {
  236. dbgln_if(MATROSKA_TRACE_DEBUG, "Found SeekHead, parsing it into the lookup table.");
  237. m_seek_entries.clear();
  238. TRY(parse_seek_head(streamer, found_element_position, m_seek_entries));
  239. m_last_top_level_element_position = 0;
  240. if (m_seek_entries.contains(element_id)) {
  241. dbgln_if(MATROSKA_TRACE_DEBUG, "SeekHead hit!");
  242. position = m_seek_entries.get(element_id).release_value();
  243. break;
  244. }
  245. continue;
  246. }
  247. auto result = streamer.read_unknown_element();
  248. if (result.is_error())
  249. return DecoderError::format(DecoderErrorCategory::Corrupted, "While seeking to {}: {}", element_name, result.release_error().string_literal());
  250. m_last_top_level_element_position = streamer.position();
  251. DECODER_TRY_ALLOC(m_seek_entries.try_set(found_element_id, found_element_position));
  252. if (found_element_id == element_id) {
  253. position = found_element_position;
  254. break;
  255. }
  256. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipped to position {}.", m_last_top_level_element_position);
  257. }
  258. return position;
  259. }
  260. static DecoderErrorOr<SegmentInformation> parse_information(Streamer& streamer)
  261. {
  262. SegmentInformation segment_information;
  263. TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  264. switch (element_id) {
  265. case TIMESTAMP_SCALE_ID:
  266. segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64()));
  267. dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", segment_information.timestamp_scale());
  268. break;
  269. case MUXING_APP_ID:
  270. segment_information.set_muxing_app(TRY_READ(streamer.read_string()));
  271. dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", segment_information.muxing_app().as_string());
  272. break;
  273. case WRITING_APP_ID:
  274. segment_information.set_writing_app(TRY_READ(streamer.read_string()));
  275. dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", segment_information.writing_app().as_string());
  276. break;
  277. case DURATION_ID:
  278. segment_information.set_duration_unscaled(TRY_READ(streamer.read_float()));
  279. dbgln_if(MATROSKA_DEBUG, "Read Duration attribute: {}", segment_information.duration_unscaled().value());
  280. break;
  281. default:
  282. TRY_READ(streamer.read_unknown_element());
  283. }
  284. return IterationDecision::Continue;
  285. }));
  286. return segment_information;
  287. }
  288. DecoderErrorOr<SegmentInformation> Reader::segment_information()
  289. {
  290. if (m_segment_information.has_value())
  291. return m_segment_information.value();
  292. auto position = TRY(find_first_top_level_element_with_id("Segment Information"sv, SEGMENT_INFORMATION_ELEMENT_ID));
  293. if (!position.has_value())
  294. return DecoderError::corrupted("No Segment Information element found"sv);
  295. Streamer streamer { m_data };
  296. TRY_READ(streamer.seek_to_position(position.release_value()));
  297. m_segment_information = TRY(parse_information(streamer));
  298. return m_segment_information.value();
  299. }
  300. DecoderErrorOr<void> Reader::ensure_tracks_are_parsed()
  301. {
  302. if (!m_tracks.is_empty())
  303. return {};
  304. auto position = TRY(find_first_top_level_element_with_id("Tracks"sv, TRACK_ELEMENT_ID));
  305. if (!position.has_value())
  306. return DecoderError::corrupted("No Tracks element found"sv);
  307. Streamer streamer { m_data };
  308. TRY_READ(streamer.seek_to_position(position.release_value()));
  309. TRY(parse_tracks(streamer));
  310. return {};
  311. }
  312. static DecoderErrorOr<TrackEntry::ColorFormat> parse_video_color_information(Streamer& streamer)
  313. {
  314. TrackEntry::ColorFormat color_format {};
  315. TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  316. switch (element_id) {
  317. case PRIMARIES_ID:
  318. color_format.color_primaries = static_cast<ColorPrimaries>(TRY_READ(streamer.read_u64()));
  319. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Primaries attribute: {}", color_primaries_to_string(color_format.color_primaries));
  320. break;
  321. case TRANSFER_CHARACTERISTICS_ID:
  322. color_format.transfer_characteristics = static_cast<TransferCharacteristics>(TRY_READ(streamer.read_u64()));
  323. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's TransferCharacteristics attribute: {}", transfer_characteristics_to_string(color_format.transfer_characteristics));
  324. break;
  325. case MATRIX_COEFFICIENTS_ID:
  326. color_format.matrix_coefficients = static_cast<MatrixCoefficients>(TRY_READ(streamer.read_u64()));
  327. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's MatrixCoefficients attribute: {}", matrix_coefficients_to_string(color_format.matrix_coefficients));
  328. break;
  329. case BITS_PER_CHANNEL_ID:
  330. color_format.bits_per_channel = TRY_READ(streamer.read_u64());
  331. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's BitsPerChannel attribute: {}", color_format.bits_per_channel);
  332. break;
  333. default:
  334. TRY_READ(streamer.read_unknown_element());
  335. }
  336. return IterationDecision::Continue;
  337. }));
  338. return color_format;
  339. }
  340. static DecoderErrorOr<TrackEntry::VideoTrack> parse_video_track_information(Streamer& streamer)
  341. {
  342. TrackEntry::VideoTrack video_track {};
  343. TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  344. switch (element_id) {
  345. case PIXEL_WIDTH_ID:
  346. video_track.pixel_width = TRY_READ(streamer.read_u64());
  347. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", video_track.pixel_width);
  348. break;
  349. case PIXEL_HEIGHT_ID:
  350. video_track.pixel_height = TRY_READ(streamer.read_u64());
  351. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", video_track.pixel_height);
  352. break;
  353. case COLOR_ENTRY_ID:
  354. video_track.color_format = TRY(parse_video_color_information(streamer));
  355. break;
  356. default:
  357. TRY_READ(streamer.read_unknown_element());
  358. }
  359. return IterationDecision::Continue;
  360. }));
  361. return video_track;
  362. }
  363. static DecoderErrorOr<TrackEntry::AudioTrack> parse_audio_track_information(Streamer& streamer)
  364. {
  365. TrackEntry::AudioTrack audio_track {};
  366. TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  367. switch (element_id) {
  368. case CHANNELS_ID:
  369. audio_track.channels = TRY_READ(streamer.read_u64());
  370. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", audio_track.channels);
  371. break;
  372. case BIT_DEPTH_ID:
  373. audio_track.bit_depth = TRY_READ(streamer.read_u64());
  374. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", audio_track.bit_depth);
  375. break;
  376. default:
  377. TRY_READ(streamer.read_unknown_element());
  378. }
  379. return IterationDecision::Continue;
  380. }));
  381. return audio_track;
  382. }
  383. static DecoderErrorOr<TrackEntry> parse_track_entry(Streamer& streamer)
  384. {
  385. TrackEntry track_entry;
  386. TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  387. switch (element_id) {
  388. case TRACK_NUMBER_ID:
  389. track_entry.set_track_number(TRY_READ(streamer.read_u64()));
  390. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_entry.track_number());
  391. break;
  392. case TRACK_UID_ID:
  393. track_entry.set_track_uid(TRY_READ(streamer.read_u64()));
  394. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_entry.track_uid());
  395. break;
  396. case TRACK_TYPE_ID:
  397. track_entry.set_track_type(static_cast<TrackEntry::TrackType>(TRY_READ(streamer.read_u64())));
  398. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", to_underlying(track_entry.track_type()));
  399. break;
  400. case TRACK_LANGUAGE_ID:
  401. track_entry.set_language(DECODER_TRY_ALLOC(String::from_byte_string(TRY_READ(streamer.read_string()))));
  402. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", track_entry.language());
  403. break;
  404. case TRACK_CODEC_ID:
  405. track_entry.set_codec_id(DECODER_TRY_ALLOC(String::from_byte_string(TRY_READ(streamer.read_string()))));
  406. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry.codec_id());
  407. break;
  408. case TRACK_TIMESTAMP_SCALE_ID:
  409. track_entry.set_timestamp_scale(TRY_READ(streamer.read_float()));
  410. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry.timestamp_scale());
  411. break;
  412. case TRACK_OFFSET_ID:
  413. track_entry.set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
  414. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry.timestamp_offset());
  415. break;
  416. case TRACK_VIDEO_ID:
  417. track_entry.set_video_track(TRY(parse_video_track_information(streamer)));
  418. break;
  419. case TRACK_AUDIO_ID:
  420. track_entry.set_audio_track(TRY(parse_audio_track_information(streamer)));
  421. break;
  422. default:
  423. TRY_READ(streamer.read_unknown_element());
  424. }
  425. return IterationDecision::Continue;
  426. }));
  427. return track_entry;
  428. }
  429. DecoderErrorOr<void> Reader::parse_tracks(Streamer& streamer)
  430. {
  431. TRY(parse_master_element(streamer, "Tracks"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  432. if (element_id == TRACK_ENTRY_ID) {
  433. auto track_entry = TRY(parse_track_entry(streamer));
  434. dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry.track_number());
  435. DECODER_TRY_ALLOC(m_tracks.try_set(track_entry.track_number(), track_entry));
  436. } else {
  437. TRY_READ(streamer.read_unknown_element());
  438. }
  439. return IterationDecision::Continue;
  440. }));
  441. return {};
  442. }
  443. DecoderErrorOr<void> Reader::for_each_track(TrackEntryCallback callback)
  444. {
  445. TRY(ensure_tracks_are_parsed());
  446. for (auto const& track_entry : m_tracks) {
  447. auto decision = TRY(callback(track_entry.value));
  448. if (decision == IterationDecision::Break)
  449. break;
  450. }
  451. return {};
  452. }
  453. DecoderErrorOr<void> Reader::for_each_track_of_type(TrackEntry::TrackType type, TrackEntryCallback callback)
  454. {
  455. return for_each_track([&](TrackEntry const& track_entry) -> DecoderErrorOr<IterationDecision> {
  456. if (track_entry.track_type() != type)
  457. return IterationDecision::Continue;
  458. return callback(track_entry);
  459. });
  460. }
  461. DecoderErrorOr<TrackEntry> Reader::track_for_track_number(u64 track_number)
  462. {
  463. TRY(ensure_tracks_are_parsed());
  464. auto optional_track_entry = m_tracks.get(track_number);
  465. if (!optional_track_entry.has_value())
  466. return DecoderError::format(DecoderErrorCategory::Invalid, "No track found with number {}", track_number);
  467. return optional_track_entry.release_value();
  468. }
  469. DecoderErrorOr<size_t> Reader::track_count()
  470. {
  471. TRY(ensure_tracks_are_parsed());
  472. return m_tracks.size();
  473. }
  474. constexpr size_t get_element_id_size(u32 element_id)
  475. {
  476. return sizeof(element_id) - (count_leading_zeroes(element_id) / 8);
  477. }
  478. static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_scale)
  479. {
  480. Optional<u64> timestamp;
  481. auto first_element_position = TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  482. switch (element_id) {
  483. case TIMESTAMP_ID:
  484. timestamp = TRY_READ(streamer.read_u64());
  485. return IterationDecision::Break;
  486. default:
  487. TRY_READ(streamer.read_unknown_element());
  488. }
  489. return IterationDecision::Continue;
  490. }));
  491. if (!timestamp.has_value())
  492. return DecoderError::corrupted("Cluster was missing a timestamp"sv);
  493. if (first_element_position == 0)
  494. return DecoderError::corrupted("Cluster had no children"sv);
  495. dbgln_if(MATROSKA_TRACE_DEBUG, "Seeking back to position {}", first_element_position);
  496. TRY_READ(streamer.seek_to_position(first_element_position));
  497. Cluster cluster;
  498. cluster.set_timestamp(Duration::from_nanoseconds(timestamp.release_value() * timestamp_scale));
  499. return cluster;
  500. }
  501. static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Duration cluster_timestamp, u64 segment_timestamp_scale, TrackEntry track)
  502. {
  503. Block block;
  504. auto content_size = TRY_READ(streamer.read_variable_size_integer());
  505. auto position_before_track_number = streamer.position();
  506. block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
  507. // https://www.matroska.org/technical/notes.html
  508. // Block Timestamps:
  509. // The Block Element and SimpleBlock Element store their timestamps as signed integers,
  510. // relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
  511. // timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
  512. // `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
  513. //
  514. // When a CodecDelay Element is set, its value MUST be subtracted from each Block timestamp
  515. // of that track. To get the timestamp in nanoseconds of the first frame in a Block or
  516. // SimpleBlock, the formula becomes:
  517. // `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
  518. Duration timestamp_offset = Duration::from_nanoseconds(static_cast<i64>(static_cast<double>(TRY_READ(streamer.read_i16()) * segment_timestamp_scale) * track.timestamp_scale()));
  519. timestamp_offset -= Duration::from_nanoseconds(static_cast<i64>(track.codec_delay()));
  520. // This is only mentioned in the elements specification under TrackOffset.
  521. // https://www.matroska.org/technical/elements.html
  522. timestamp_offset += Duration::from_nanoseconds(static_cast<i64>(track.timestamp_offset()));
  523. block.set_timestamp(cluster_timestamp + timestamp_offset);
  524. auto flags = TRY_READ(streamer.read_octet());
  525. block.set_only_keyframes((flags & (1u << 7u)) != 0);
  526. block.set_invisible((flags & (1u << 3u)) != 0);
  527. block.set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
  528. block.set_discardable((flags & 1u) != 0);
  529. auto total_frame_content_size = content_size - (streamer.position() - position_before_track_number);
  530. Vector<ReadonlyBytes> frames;
  531. if (block.lacing() == Block::Lacing::EBML) {
  532. auto octets_read_before_frame_sizes = streamer.octets_read();
  533. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  534. Vector<u64> frame_sizes;
  535. frame_sizes.ensure_capacity(frame_count);
  536. u64 frame_size_sum = 0;
  537. u64 previous_frame_size;
  538. auto first_frame_size = TRY_READ(streamer.read_variable_size_integer());
  539. frame_sizes.append(first_frame_size);
  540. frame_size_sum += first_frame_size;
  541. previous_frame_size = first_frame_size;
  542. for (int i = 0; i < frame_count - 2; i++) {
  543. auto frame_size_difference = TRY_READ(streamer.read_variable_size_signed_integer());
  544. u64 frame_size;
  545. // FIXME: x - (-y) == x + y?
  546. if (frame_size_difference < 0)
  547. frame_size = previous_frame_size - (-frame_size_difference);
  548. else
  549. frame_size = previous_frame_size + frame_size_difference;
  550. frame_sizes.append(frame_size);
  551. frame_size_sum += frame_size;
  552. previous_frame_size = frame_size;
  553. }
  554. frame_sizes.append(total_frame_content_size - frame_size_sum - (streamer.octets_read() - octets_read_before_frame_sizes));
  555. for (int i = 0; i < frame_count; i++) {
  556. // FIXME: ReadonlyBytes instead of copying the frame data?
  557. auto current_frame_size = frame_sizes.at(i);
  558. frames.append(TRY_READ(streamer.read_raw_octets(current_frame_size)));
  559. }
  560. } else if (block.lacing() == Block::Lacing::FixedSize) {
  561. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  562. auto individual_frame_size = total_frame_content_size / frame_count;
  563. for (int i = 0; i < frame_count; i++)
  564. frames.append(TRY_READ(streamer.read_raw_octets(individual_frame_size)));
  565. } else {
  566. frames.append(TRY_READ(streamer.read_raw_octets(total_frame_content_size)));
  567. }
  568. block.set_frames(move(frames));
  569. return block;
  570. }
  571. DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
  572. {
  573. auto optional_position = TRY(find_first_top_level_element_with_id("Cluster"sv, CLUSTER_ELEMENT_ID));
  574. if (!optional_position.has_value())
  575. return DecoderError::corrupted("No clusters are present in the segment"sv);
  576. ReadonlyBytes segment_view = m_data.slice(m_segment_contents_position, m_segment_contents_size);
  577. // We need to have the element ID included so that the iterator knows where it is.
  578. auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
  579. dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
  580. return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
  581. }
  582. static DecoderErrorOr<CueTrackPosition> parse_cue_track_position(Streamer& streamer)
  583. {
  584. CueTrackPosition track_position;
  585. bool had_cluster_position = false;
  586. TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  587. switch (element_id) {
  588. case CUE_TRACK_ID:
  589. track_position.set_track_number(TRY_READ(streamer.read_u64()));
  590. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions track number {}", track_position.track_number());
  591. break;
  592. case CUE_CLUSTER_POSITION_ID:
  593. track_position.set_cluster_position(TRY_READ(streamer.read_u64()));
  594. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions cluster position {}", track_position.cluster_position());
  595. had_cluster_position = true;
  596. break;
  597. case CUE_RELATIVE_POSITION_ID:
  598. track_position.set_block_offset(TRY_READ(streamer.read_u64()));
  599. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions relative position {}", track_position.block_offset());
  600. break;
  601. case CUE_CODEC_STATE_ID:
  602. // Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state.
  603. // FIXME: Do something with this value.
  604. dbgln_if(MATROSKA_DEBUG, "Found CodecState, skipping");
  605. TRY_READ(streamer.read_unknown_element());
  606. break;
  607. case CUE_REFERENCE_ID:
  608. return DecoderError::not_implemented();
  609. default:
  610. TRY_READ(streamer.read_unknown_element());
  611. break;
  612. }
  613. return IterationDecision::Continue;
  614. }));
  615. if (track_position.track_number() == 0)
  616. return DecoderError::corrupted("Track number was not present or 0"sv);
  617. if (!had_cluster_position)
  618. return DecoderError::corrupted("Cluster was missing the cluster position"sv);
  619. return track_position;
  620. }
  621. static DecoderErrorOr<CuePoint> parse_cue_point(Streamer& streamer, u64 timestamp_scale)
  622. {
  623. CuePoint cue_point;
  624. TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  625. switch (element_id) {
  626. case CUE_TIME_ID: {
  627. // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element:
  628. // > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks.
  629. // Matroska Ticks are specified in https://www.matroska.org/technical/notes.html:
  630. // > For such elements, the timestamp value is stored directly in nanoseconds.
  631. // However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees:
  632. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941
  633. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416
  634. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392
  635. // Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^(
  636. auto timestamp = Duration::from_nanoseconds(static_cast<i64>(TRY_READ(streamer.read_u64()) * timestamp_scale));
  637. cue_point.set_timestamp(timestamp);
  638. dbgln_if(MATROSKA_DEBUG, "Read CuePoint timestamp {}ms", cue_point.timestamp().to_milliseconds());
  639. break;
  640. }
  641. case CUE_TRACK_POSITIONS_ID: {
  642. auto track_position = TRY_READ(parse_cue_track_position(streamer));
  643. DECODER_TRY_ALLOC(cue_point.track_positions().try_set(track_position.track_number(), track_position));
  644. break;
  645. }
  646. default:
  647. TRY_READ(streamer.read_unknown_element());
  648. break;
  649. }
  650. return IterationDecision::Continue;
  651. }));
  652. if (cue_point.timestamp().is_negative())
  653. return DecoderError::corrupted("CuePoint was missing a timestamp"sv);
  654. if (cue_point.track_positions().is_empty())
  655. return DecoderError::corrupted("CuePoint was missing track positions"sv);
  656. return cue_point;
  657. }
  658. DecoderErrorOr<void> Reader::parse_cues(Streamer& streamer)
  659. {
  660. m_cues.clear();
  661. TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
  662. switch (element_id) {
  663. case CUE_POINT_ID: {
  664. auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale()));
  665. // FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now,
  666. // but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead.
  667. for (auto track_position_entry : cue_point.track_positions()) {
  668. if (!m_cues.contains(track_position_entry.key))
  669. DECODER_TRY_ALLOC(m_cues.try_set(track_position_entry.key, Vector<CuePoint>()));
  670. Vector<CuePoint>& cue_points_for_track = m_cues.get(track_position_entry.key).release_value();
  671. cue_points_for_track.append(cue_point);
  672. }
  673. break;
  674. }
  675. default:
  676. return DecoderError::format(DecoderErrorCategory::Corrupted, "Unknown Cues child ID {:#010x}", element_id);
  677. }
  678. return IterationDecision::Continue;
  679. }));
  680. return {};
  681. }
  682. DecoderErrorOr<void> Reader::ensure_cues_are_parsed()
  683. {
  684. if (m_cues_have_been_parsed)
  685. return {};
  686. auto position = TRY(find_first_top_level_element_with_id("Cues"sv, CUES_ID));
  687. if (!position.has_value())
  688. return DecoderError::corrupted("No Tracks element found"sv);
  689. Streamer streamer { m_data };
  690. TRY_READ(streamer.seek_to_position(position.release_value()));
  691. TRY(parse_cues(streamer));
  692. m_cues_have_been_parsed = true;
  693. return {};
  694. }
  695. DecoderErrorOr<void> Reader::seek_to_cue_for_timestamp(SampleIterator& iterator, Duration const& timestamp)
  696. {
  697. auto const& cue_points = MUST(cue_points_for_track(iterator.m_track.track_number())).release_value();
  698. // Take a guess at where in the cues the timestamp will be and correct from there.
  699. auto duration = TRY(segment_information()).duration();
  700. size_t index = 0;
  701. if (duration.has_value())
  702. index = clamp(((timestamp.to_nanoseconds() * cue_points.size()) / TRY(segment_information()).duration()->to_nanoseconds()), 0, cue_points.size() - 1);
  703. CuePoint const* prev_cue_point = &cue_points[index];
  704. dbgln_if(MATROSKA_DEBUG, "Finding Matroska cue points for timestamp {}ms starting from cue at {}ms", timestamp.to_milliseconds(), prev_cue_point->timestamp().to_milliseconds());
  705. if (prev_cue_point->timestamp() == timestamp) {
  706. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  707. return {};
  708. }
  709. if (prev_cue_point->timestamp() > timestamp) {
  710. while (index > 0 && prev_cue_point->timestamp() > timestamp) {
  711. prev_cue_point = &cue_points[--index];
  712. dbgln_if(MATROSKA_DEBUG, "Checking previous cue point {}ms", prev_cue_point->timestamp().to_milliseconds());
  713. }
  714. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  715. return {};
  716. }
  717. while (++index < cue_points.size()) {
  718. auto const& cue_point = cue_points[index];
  719. dbgln_if(MATROSKA_DEBUG, "Checking future cue point {}ms", cue_point.timestamp().to_milliseconds());
  720. if (cue_point.timestamp() > timestamp)
  721. break;
  722. prev_cue_point = &cue_point;
  723. }
  724. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  725. return {};
  726. }
  727. static DecoderErrorOr<void> search_clusters_for_keyframe_before_timestamp(SampleIterator& iterator, Duration const& timestamp)
  728. {
  729. #if MATROSKA_DEBUG
  730. size_t inter_frames_count;
  731. #endif
  732. Optional<SampleIterator> last_keyframe;
  733. while (true) {
  734. SampleIterator rewind_iterator = iterator;
  735. auto block = TRY(iterator.next_block());
  736. if (block.only_keyframes()) {
  737. last_keyframe.emplace(rewind_iterator);
  738. #if MATROSKA_DEBUG
  739. inter_frames_count = 0;
  740. #endif
  741. }
  742. if (block.timestamp() > timestamp)
  743. break;
  744. #if MATROSKA_DEBUG
  745. inter_frames_count++;
  746. #endif
  747. }
  748. if (last_keyframe.has_value()) {
  749. #if MATROSKA_DEBUG
  750. dbgln("Seeked to a keyframe with {} inter frames to skip", inter_frames_count);
  751. #endif
  752. iterator = last_keyframe.release_value();
  753. }
  754. return {};
  755. }
  756. DecoderErrorOr<bool> Reader::has_cues_for_track(u64 track_number)
  757. {
  758. TRY(ensure_cues_are_parsed());
  759. return m_cues.contains(track_number);
  760. }
  761. DecoderErrorOr<SampleIterator> Reader::seek_to_random_access_point(SampleIterator iterator, Duration timestamp)
  762. {
  763. if (TRY(has_cues_for_track(iterator.m_track.track_number()))) {
  764. TRY(seek_to_cue_for_timestamp(iterator, timestamp));
  765. VERIFY(iterator.last_timestamp().has_value());
  766. return iterator;
  767. }
  768. if (!iterator.last_timestamp().has_value() || timestamp < iterator.last_timestamp().value()) {
  769. // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
  770. iterator = TRY(create_sample_iterator(iterator.m_track.track_number()));
  771. TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
  772. return iterator;
  773. }
  774. TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
  775. return iterator;
  776. }
  777. DecoderErrorOr<Optional<Vector<CuePoint> const&>> Reader::cue_points_for_track(u64 track_number)
  778. {
  779. TRY(ensure_cues_are_parsed());
  780. return m_cues.get(track_number);
  781. }
  782. DecoderErrorOr<Block> SampleIterator::next_block()
  783. {
  784. if (m_position >= m_data.size())
  785. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "Still at end of stream :^)"sv);
  786. Streamer streamer { m_data };
  787. TRY_READ(streamer.seek_to_position(m_position));
  788. Optional<Block> block;
  789. while (streamer.has_octet()) {
  790. #if MATROSKA_TRACE_DEBUG
  791. auto element_position = streamer.position();
  792. #endif
  793. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  794. #if MATROSKA_TRACE_DEBUG
  795. dbgln("Iterator found element with ID {:#010x} at offset {} within the segment.", element_id, element_position);
  796. #endif
  797. if (element_id == CLUSTER_ELEMENT_ID) {
  798. dbgln_if(MATROSKA_DEBUG, " Iterator is parsing new cluster.");
  799. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  800. } else if (element_id == SIMPLE_BLOCK_ID) {
  801. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is parsing new block.");
  802. auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
  803. if (candidate_block.track_number() == m_track.track_number())
  804. block = move(candidate_block);
  805. } else {
  806. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is skipping unknown element with ID {:#010x}.", element_id);
  807. TRY_READ(streamer.read_unknown_element());
  808. }
  809. m_position = streamer.position();
  810. if (block.has_value()) {
  811. m_last_timestamp = block->timestamp();
  812. return block.release_value();
  813. }
  814. }
  815. m_current_cluster.clear();
  816. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv);
  817. }
  818. DecoderErrorOr<void> SampleIterator::seek_to_cue_point(CuePoint const& cue_point)
  819. {
  820. // This is a private function. The position getter can return optional, but the caller should already know that this track has a position.
  821. auto const& cue_position = cue_point.position_for_track(m_track.track_number()).release_value();
  822. Streamer streamer { m_data };
  823. TRY_READ(streamer.seek_to_position(cue_position.cluster_position()));
  824. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  825. if (element_id != CLUSTER_ELEMENT_ID)
  826. return DecoderError::corrupted("Cue point's cluster position didn't point to a cluster"sv);
  827. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  828. dbgln_if(MATROSKA_DEBUG, "SampleIterator set to cue point at timestamp {}ms", m_current_cluster->timestamp().to_milliseconds());
  829. m_position = streamer.position() + cue_position.block_offset();
  830. m_last_timestamp = cue_point.timestamp();
  831. return {};
  832. }
  833. ErrorOr<ByteString> Streamer::read_string()
  834. {
  835. auto string_length = TRY(read_variable_size_integer());
  836. if (remaining() < string_length)
  837. return Error::from_string_literal("String length extends past the end of the stream");
  838. auto string_data = data_as_chars();
  839. auto string_value = ByteString(string_data, strnlen(string_data, string_length));
  840. TRY(read_raw_octets(string_length));
  841. return string_value;
  842. }
  843. ErrorOr<u8> Streamer::read_octet()
  844. {
  845. if (!has_octet()) {
  846. dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
  847. return Error::from_string_literal("Stream is out of data");
  848. }
  849. u8 byte = *data();
  850. m_octets_read.last()++;
  851. m_position++;
  852. return byte;
  853. }
  854. ErrorOr<i16> Streamer::read_i16()
  855. {
  856. return (TRY(read_octet()) << 8) | TRY(read_octet());
  857. }
  858. ErrorOr<u64> Streamer::read_variable_size_integer(bool mask_length)
  859. {
  860. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT from offset {:p}", position());
  861. auto length_descriptor = TRY(read_octet());
  862. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
  863. if (length_descriptor == 0)
  864. return Error::from_string_literal("read_variable_size_integer: Length descriptor has no terminating set bit");
  865. size_t length = 0;
  866. while (length < 8) {
  867. if (((length_descriptor >> (8 - length)) & 1) == 1)
  868. break;
  869. length++;
  870. }
  871. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
  872. if (length > 8)
  873. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  874. u64 result;
  875. if (mask_length)
  876. result = length_descriptor & ~(1u << (8 - length));
  877. else
  878. result = length_descriptor;
  879. dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
  880. for (size_t i = 1; i < length; i++) {
  881. u8 next_octet = TRY(read_octet());
  882. dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
  883. result = (result << 8u) | next_octet;
  884. dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
  885. }
  886. return result;
  887. }
  888. ErrorOr<i64> Streamer::read_variable_size_signed_integer()
  889. {
  890. auto length_descriptor = TRY(read_octet());
  891. if (length_descriptor == 0)
  892. return Error::from_string_literal("read_variable_sized_signed_integer: Length descriptor has no terminating set bit");
  893. i64 length = 0;
  894. while (length < 8) {
  895. if (((length_descriptor >> (8 - length)) & 1) == 1)
  896. break;
  897. length++;
  898. }
  899. if (length > 8)
  900. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  901. i64 result = length_descriptor & ~(1u << (8 - length));
  902. for (i64 i = 1; i < length; i++) {
  903. u8 next_octet = TRY(read_octet());
  904. result = (result << 8u) | next_octet;
  905. }
  906. result -= AK::exp2<i64>(length * 7 - 1) - 1;
  907. return result;
  908. }
  909. ErrorOr<ReadonlyBytes> Streamer::read_raw_octets(size_t num_octets)
  910. {
  911. if (remaining() < num_octets)
  912. return Error::from_string_literal("Tried to drop octets past the end of the stream");
  913. ReadonlyBytes result = { data(), num_octets };
  914. m_position += num_octets;
  915. m_octets_read.last() += num_octets;
  916. return result;
  917. }
  918. ErrorOr<u64> Streamer::read_u64()
  919. {
  920. auto integer_length = TRY(read_variable_size_integer());
  921. u64 result = 0;
  922. for (size_t i = 0; i < integer_length; i++) {
  923. result = (result << 8u) + TRY(read_octet());
  924. }
  925. return result;
  926. }
  927. ErrorOr<double> Streamer::read_float()
  928. {
  929. auto length = TRY(read_variable_size_integer());
  930. if (length != 4u && length != 8u)
  931. return Error::from_string_literal("Float size must be 4 or 8 bytes");
  932. union {
  933. u64 value;
  934. float float_value;
  935. double double_value;
  936. } read_data;
  937. read_data.value = 0;
  938. for (size_t i = 0; i < length; i++) {
  939. read_data.value = (read_data.value << 8u) + TRY(read_octet());
  940. }
  941. if (length == 4u)
  942. return read_data.float_value;
  943. return read_data.double_value;
  944. }
  945. ErrorOr<void> Streamer::read_unknown_element()
  946. {
  947. auto element_length = TRY(read_variable_size_integer());
  948. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipping unknown element of size {}.", element_length);
  949. TRY(read_raw_octets(element_length));
  950. return {};
  951. }
  952. ErrorOr<void> Streamer::seek_to_position(size_t position)
  953. {
  954. if (position >= m_data.size())
  955. return Error::from_string_literal("Attempted to seek past the end of the stream");
  956. m_position = position;
  957. return {};
  958. }
  959. }