Reader.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. * Copyright (c) 2022, Gregory Bertilson <Zaggy1024@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Function.h>
  8. #include <AK/Optional.h>
  9. #include <AK/Time.h>
  10. #include <AK/Utf8View.h>
  11. #include <LibCore/MappedFile.h>
  12. #include "Reader.h"
  13. namespace Video::Matroska {
  14. #define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
  15. // Elements IDs and types are listed at this URL:
  16. // https://www.matroska.org/technical/elements.html
  17. constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
  18. constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
  19. constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
  20. constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
  21. constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74;
  22. constexpr u32 SEEK_ELEMENT_ID = 0x4DBB;
  23. constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB;
  24. constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC;
  25. constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
  26. constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
  27. constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
  28. constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
  29. constexpr u32 MUXING_APP_ID = 0x4D80;
  30. constexpr u32 WRITING_APP_ID = 0x5741;
  31. constexpr u32 DURATION_ID = 0x4489;
  32. // Tracks
  33. constexpr u32 TRACK_ENTRY_ID = 0xAE;
  34. constexpr u32 TRACK_NUMBER_ID = 0xD7;
  35. constexpr u32 TRACK_UID_ID = 0x73C5;
  36. constexpr u32 TRACK_TYPE_ID = 0x83;
  37. constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
  38. constexpr u32 TRACK_CODEC_ID = 0x86;
  39. constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
  40. constexpr u32 TRACK_OFFSET_ID = 0x537F;
  41. constexpr u32 TRACK_VIDEO_ID = 0xE0;
  42. constexpr u32 TRACK_AUDIO_ID = 0xE1;
  43. // Video
  44. constexpr u32 PIXEL_WIDTH_ID = 0xB0;
  45. constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
  46. constexpr u32 COLOR_ENTRY_ID = 0x55B0;
  47. constexpr u32 PRIMARIES_ID = 0x55BB;
  48. constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA;
  49. constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1;
  50. constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2;
  51. // Audio
  52. constexpr u32 CHANNELS_ID = 0x9F;
  53. constexpr u32 BIT_DEPTH_ID = 0x6264;
  54. // Clusters
  55. constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
  56. constexpr u32 TIMESTAMP_ID = 0xE7;
  57. DecoderErrorOr<Reader> Reader::from_file(StringView path)
  58. {
  59. auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path));
  60. auto reader = TRY(from_data(mapped_file->bytes()));
  61. reader.m_mapped_file = mapped_file;
  62. return reader;
  63. }
  64. DecoderErrorOr<Reader> Reader::from_data(ReadonlyBytes data)
  65. {
  66. Reader reader(data);
  67. TRY(reader.parse_initial_data());
  68. return reader;
  69. }
  70. static DecoderErrorOr<void> parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function<DecoderErrorOr<IterationDecision>(u64, size_t)> element_consumer)
  71. {
  72. auto element_data_size = TRY_READ(streamer.read_variable_size_integer());
  73. dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size);
  74. streamer.push_octets_read();
  75. while (streamer.octets_read() < element_data_size) {
  76. dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
  77. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  78. auto element_position = streamer.position();
  79. dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id);
  80. auto result = element_consumer(element_id, element_position);
  81. if (result.is_error())
  82. return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description());
  83. if (result.release_value() == IterationDecision::Break)
  84. break;
  85. dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name);
  86. }
  87. streamer.pop_octets_read();
  88. return {};
  89. }
  90. static DecoderErrorOr<EBMLHeader> parse_ebml_header(Streamer& streamer)
  91. {
  92. EBMLHeader header;
  93. TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  94. switch (element_id) {
  95. case DOCTYPE_ELEMENT_ID:
  96. header.doc_type = TRY_READ(streamer.read_string());
  97. dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", header.doc_type);
  98. break;
  99. case DOCTYPE_VERSION_ELEMENT_ID:
  100. header.doc_type_version = TRY_READ(streamer.read_u64());
  101. dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", header.doc_type_version);
  102. break;
  103. default:
  104. TRY_READ(streamer.read_unknown_element());
  105. }
  106. return IterationDecision::Continue;
  107. }));
  108. return header;
  109. }
  110. DecoderErrorOr<void> Reader::parse_initial_data()
  111. {
  112. Streamer streamer { m_data };
  113. auto first_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  114. dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id);
  115. if (first_element_id != EBML_MASTER_ELEMENT_ID)
  116. return DecoderError::corrupted("First element was not an EBML header"sv);
  117. m_header = TRY(parse_ebml_header(streamer));
  118. dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
  119. auto root_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  120. if (root_element_id != SEGMENT_ELEMENT_ID)
  121. return DecoderError::corrupted("Second element was not a segment element"sv);
  122. m_segment_contents_size = TRY_READ(streamer.read_variable_size_integer());
  123. m_segment_contents_position = streamer.position();
  124. dbgln_if(true, "Segment is at {} with size {}, available size is {}", m_segment_contents_position, m_segment_contents_size, m_data.size() - m_segment_contents_position);
  125. m_segment_contents_size = min(m_segment_contents_size, m_data.size() - m_segment_contents_position);
  126. return {};
  127. }
  128. static DecoderErrorOr<void> parse_seek_head(Streamer& streamer, size_t base_position, HashMap<u32, size_t>& table)
  129. {
  130. return parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  131. if (seek_head_child_id == SEEK_ELEMENT_ID) {
  132. Optional<u64> seek_id;
  133. Optional<u64> seek_position;
  134. TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  135. switch (seek_entry_child_id) {
  136. case SEEK_ID_ELEMENT_ID:
  137. seek_id = TRY_READ(streamer.read_u64());
  138. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Element ID value {:#010x}", seek_id.value());
  139. break;
  140. case SEEK_POSITION_ELEMENT_ID:
  141. seek_position = TRY_READ(streamer.read_u64());
  142. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Position value {}", seek_position.value());
  143. break;
  144. default:
  145. TRY_READ(streamer.read_unknown_element());
  146. }
  147. return IterationDecision::Continue;
  148. }));
  149. if (!seek_id.has_value())
  150. return DecoderError::corrupted("Seek entry is missing the element ID"sv);
  151. if (!seek_position.has_value())
  152. return DecoderError::corrupted("Seek entry is missing the seeking position"sv);
  153. if (seek_id.value() > NumericLimits<u32>::max())
  154. return DecoderError::corrupted("Seek entry's element ID is too large"sv);
  155. dbgln_if(MATROSKA_TRACE_DEBUG, "Seek entry found with ID {:#010x} and position {} offset from SeekHead at {}", seek_id.value(), seek_position.value(), base_position);
  156. // FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
  157. if (table.contains(seek_id.value())) {
  158. dbgln_if(MATROSKA_DEBUG, "Warning: Duplicate seek entry with ID {:#010x} at position {}", seek_id.value(), seek_position.value());
  159. return IterationDecision::Continue;
  160. }
  161. DECODER_TRY_ALLOC(table.try_set(seek_id.release_value(), base_position + seek_position.release_value()));
  162. } else {
  163. dbgln_if(MATROSKA_TRACE_DEBUG, "Unknown SeekHead child element ID {:#010x}", seek_head_child_id);
  164. }
  165. return IterationDecision::Continue;
  166. });
  167. }
  168. DecoderErrorOr<Optional<size_t>> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id)
  169. {
  170. dbgln_if(MATROSKA_DEBUG, "====== Finding element {} with ID {:#010x} ======", element_name, element_id);
  171. if (m_seek_entries.contains(element_id)) {
  172. dbgln_if(MATROSKA_TRACE_DEBUG, "Cache hit!");
  173. return m_seek_entries.get(element_id).release_value();
  174. }
  175. Streamer streamer { m_data };
  176. if (m_last_top_level_element_position != 0)
  177. TRY_READ(streamer.seek_to_position(m_last_top_level_element_position));
  178. else
  179. TRY_READ(streamer.seek_to_position(m_segment_contents_position));
  180. Optional<size_t> position;
  181. while (streamer.position() < m_segment_contents_position + m_segment_contents_size) {
  182. auto found_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  183. auto found_element_position = streamer.position();
  184. dbgln_if(MATROSKA_TRACE_DEBUG, "Found element ID {:#010x} with position {}.", found_element_id, found_element_position);
  185. if (found_element_id == SEEK_HEAD_ELEMENT_ID) {
  186. dbgln_if(MATROSKA_TRACE_DEBUG, "Found SeekHead, parsing it into the lookup table.");
  187. m_seek_entries.clear();
  188. TRY(parse_seek_head(streamer, found_element_position, m_seek_entries));
  189. m_last_top_level_element_position = 0;
  190. if (m_seek_entries.contains(element_id)) {
  191. dbgln_if(MATROSKA_TRACE_DEBUG, "SeekHead hit!");
  192. position = m_seek_entries.get(element_id).release_value();
  193. break;
  194. }
  195. continue;
  196. }
  197. auto result = streamer.read_unknown_element();
  198. if (result.is_error())
  199. return DecoderError::format(DecoderErrorCategory::Corrupted, "While seeking to {}: {}", element_name, result.release_error().string_literal());
  200. m_last_top_level_element_position = streamer.position();
  201. DECODER_TRY_ALLOC(m_seek_entries.try_set(found_element_id, found_element_position));
  202. if (found_element_id == element_id) {
  203. position = found_element_position;
  204. break;
  205. }
  206. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipped to position {}.", m_last_top_level_element_position);
  207. }
  208. return position;
  209. }
  210. static DecoderErrorOr<SegmentInformation> parse_information(Streamer& streamer)
  211. {
  212. SegmentInformation segment_information;
  213. TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  214. switch (element_id) {
  215. case TIMESTAMP_SCALE_ID:
  216. segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64()));
  217. dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", segment_information.timestamp_scale());
  218. break;
  219. case MUXING_APP_ID:
  220. segment_information.set_muxing_app(TRY_READ(streamer.read_string()));
  221. dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", segment_information.muxing_app().as_string());
  222. break;
  223. case WRITING_APP_ID:
  224. segment_information.set_writing_app(TRY_READ(streamer.read_string()));
  225. dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", segment_information.writing_app().as_string());
  226. break;
  227. case DURATION_ID:
  228. segment_information.set_duration_unscaled(TRY_READ(streamer.read_float()));
  229. dbgln_if(MATROSKA_DEBUG, "Read Duration attribute: {}", segment_information.duration_unscaled().value());
  230. break;
  231. default:
  232. TRY_READ(streamer.read_unknown_element());
  233. }
  234. return IterationDecision::Continue;
  235. }));
  236. return segment_information;
  237. }
  238. DecoderErrorOr<SegmentInformation> Reader::segment_information()
  239. {
  240. if (m_segment_information.has_value())
  241. return m_segment_information.value();
  242. auto position = TRY(find_first_top_level_element_with_id("Segment Information"sv, SEGMENT_INFORMATION_ELEMENT_ID));
  243. if (!position.has_value())
  244. return DecoderError::corrupted("No Segment Information element found"sv);
  245. Streamer streamer { m_data };
  246. TRY_READ(streamer.seek_to_position(position.release_value()));
  247. m_segment_information = TRY(parse_information(streamer));
  248. return m_segment_information.value();
  249. }
  250. DecoderErrorOr<void> Reader::ensure_tracks_are_parsed()
  251. {
  252. if (!m_tracks.is_empty())
  253. return {};
  254. auto position = TRY(find_first_top_level_element_with_id("Tracks"sv, TRACK_ELEMENT_ID));
  255. if (!position.has_value())
  256. return DecoderError::corrupted("No Tracks element found"sv);
  257. Streamer streamer { m_data };
  258. TRY_READ(streamer.seek_to_position(position.release_value()));
  259. TRY(parse_tracks(streamer));
  260. return {};
  261. }
  262. static DecoderErrorOr<TrackEntry::ColorFormat> parse_video_color_information(Streamer& streamer)
  263. {
  264. TrackEntry::ColorFormat color_format {};
  265. TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  266. switch (element_id) {
  267. case PRIMARIES_ID:
  268. color_format.color_primaries = static_cast<ColorPrimaries>(TRY_READ(streamer.read_u64()));
  269. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Primaries attribute: {}", color_primaries_to_string(color_format.color_primaries));
  270. break;
  271. case TRANSFER_CHARACTERISTICS_ID:
  272. color_format.transfer_characteristics = static_cast<TransferCharacteristics>(TRY_READ(streamer.read_u64()));
  273. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's TransferCharacteristics attribute: {}", transfer_characteristics_to_string(color_format.transfer_characteristics));
  274. break;
  275. case MATRIX_COEFFICIENTS_ID:
  276. color_format.matrix_coefficients = static_cast<MatrixCoefficients>(TRY_READ(streamer.read_u64()));
  277. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's MatrixCoefficients attribute: {}", matrix_coefficients_to_string(color_format.matrix_coefficients));
  278. break;
  279. case BITS_PER_CHANNEL_ID:
  280. color_format.bits_per_channel = TRY_READ(streamer.read_u64());
  281. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's BitsPerChannel attribute: {}", color_format.bits_per_channel);
  282. break;
  283. default:
  284. TRY_READ(streamer.read_unknown_element());
  285. }
  286. return IterationDecision::Continue;
  287. }));
  288. return color_format;
  289. }
  290. static DecoderErrorOr<TrackEntry::VideoTrack> parse_video_track_information(Streamer& streamer)
  291. {
  292. TrackEntry::VideoTrack video_track {};
  293. TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  294. switch (element_id) {
  295. case PIXEL_WIDTH_ID:
  296. video_track.pixel_width = TRY_READ(streamer.read_u64());
  297. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", video_track.pixel_width);
  298. break;
  299. case PIXEL_HEIGHT_ID:
  300. video_track.pixel_height = TRY_READ(streamer.read_u64());
  301. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", video_track.pixel_height);
  302. break;
  303. case COLOR_ENTRY_ID:
  304. video_track.color_format = TRY(parse_video_color_information(streamer));
  305. break;
  306. default:
  307. TRY_READ(streamer.read_unknown_element());
  308. }
  309. return IterationDecision::Continue;
  310. }));
  311. return video_track;
  312. }
  313. static DecoderErrorOr<TrackEntry::AudioTrack> parse_audio_track_information(Streamer& streamer)
  314. {
  315. TrackEntry::AudioTrack audio_track {};
  316. TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  317. switch (element_id) {
  318. case CHANNELS_ID:
  319. audio_track.channels = TRY_READ(streamer.read_u64());
  320. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", audio_track.channels);
  321. break;
  322. case BIT_DEPTH_ID:
  323. audio_track.bit_depth = TRY_READ(streamer.read_u64());
  324. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", audio_track.bit_depth);
  325. break;
  326. default:
  327. TRY_READ(streamer.read_unknown_element());
  328. }
  329. return IterationDecision::Continue;
  330. }));
  331. return audio_track;
  332. }
  333. static DecoderErrorOr<TrackEntry> parse_track_entry(Streamer& streamer)
  334. {
  335. TrackEntry track_entry;
  336. TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  337. switch (element_id) {
  338. case TRACK_NUMBER_ID:
  339. track_entry.set_track_number(TRY_READ(streamer.read_u64()));
  340. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_entry.track_number());
  341. break;
  342. case TRACK_UID_ID:
  343. track_entry.set_track_uid(TRY_READ(streamer.read_u64()));
  344. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_entry.track_uid());
  345. break;
  346. case TRACK_TYPE_ID:
  347. track_entry.set_track_type(static_cast<TrackEntry::TrackType>(TRY_READ(streamer.read_u64())));
  348. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", to_underlying(track_entry.track_type()));
  349. break;
  350. case TRACK_LANGUAGE_ID:
  351. track_entry.set_language(TRY_READ(streamer.read_string()));
  352. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", track_entry.language());
  353. break;
  354. case TRACK_CODEC_ID:
  355. track_entry.set_codec_id(TRY_READ(streamer.read_string()));
  356. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry.codec_id());
  357. break;
  358. case TRACK_TIMESTAMP_SCALE_ID:
  359. track_entry.set_timestamp_scale(TRY_READ(streamer.read_float()));
  360. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry.timestamp_scale());
  361. break;
  362. case TRACK_OFFSET_ID:
  363. track_entry.set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
  364. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry.timestamp_offset());
  365. break;
  366. case TRACK_VIDEO_ID:
  367. track_entry.set_video_track(TRY(parse_video_track_information(streamer)));
  368. break;
  369. case TRACK_AUDIO_ID:
  370. track_entry.set_audio_track(TRY(parse_audio_track_information(streamer)));
  371. break;
  372. default:
  373. TRY_READ(streamer.read_unknown_element());
  374. }
  375. return IterationDecision::Continue;
  376. }));
  377. return track_entry;
  378. }
  379. DecoderErrorOr<void> Reader::parse_tracks(Streamer& streamer)
  380. {
  381. return parse_master_element(streamer, "Tracks"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  382. if (element_id == TRACK_ENTRY_ID) {
  383. auto track_entry = TRY(parse_track_entry(streamer));
  384. dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry.track_number());
  385. DECODER_TRY_ALLOC(m_tracks.try_set(track_entry.track_number(), track_entry));
  386. } else {
  387. TRY_READ(streamer.read_unknown_element());
  388. }
  389. return IterationDecision::Continue;
  390. });
  391. }
  392. DecoderErrorOr<void> Reader::for_each_track(TrackEntryCallback callback)
  393. {
  394. TRY(ensure_tracks_are_parsed());
  395. for (auto const& track_entry : m_tracks) {
  396. auto decision = TRY(callback(track_entry.value));
  397. if (decision == IterationDecision::Break)
  398. break;
  399. }
  400. return {};
  401. }
  402. DecoderErrorOr<void> Reader::for_each_track_of_type(TrackEntry::TrackType type, TrackEntryCallback callback)
  403. {
  404. return for_each_track([&](TrackEntry const& track_entry) -> DecoderErrorOr<IterationDecision> {
  405. if (track_entry.track_type() != type)
  406. return IterationDecision::Continue;
  407. return callback(track_entry);
  408. });
  409. }
  410. DecoderErrorOr<TrackEntry> Reader::track_for_track_number(u64 track_number)
  411. {
  412. TRY(ensure_tracks_are_parsed());
  413. auto optional_track_entry = m_tracks.get(track_number);
  414. if (!optional_track_entry.has_value())
  415. return DecoderError::format(DecoderErrorCategory::Invalid, "No track found with number {}", track_number);
  416. return optional_track_entry.release_value();
  417. }
  418. DecoderErrorOr<size_t> Reader::track_count()
  419. {
  420. TRY(ensure_tracks_are_parsed());
  421. return m_tracks.size();
  422. }
  423. constexpr size_t get_element_id_size(u32 element_id)
  424. {
  425. return sizeof(element_id) - (count_leading_zeroes(element_id) / 8);
  426. }
  427. static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_scale)
  428. {
  429. Optional<u64> timestamp;
  430. size_t first_element_position = 0;
  431. TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id, size_t position) -> DecoderErrorOr<IterationDecision> {
  432. if (first_element_position == 0)
  433. first_element_position = position - get_element_id_size(element_id);
  434. switch (element_id) {
  435. case TIMESTAMP_ID:
  436. timestamp = TRY_READ(streamer.read_u64());
  437. return IterationDecision::Break;
  438. default:
  439. TRY_READ(streamer.read_unknown_element());
  440. }
  441. return IterationDecision::Continue;
  442. }));
  443. if (!timestamp.has_value())
  444. return DecoderError::corrupted("Cluster was missing a timestamp"sv);
  445. if (first_element_position == 0)
  446. return DecoderError::corrupted("Cluster had no children"sv);
  447. dbgln_if(MATROSKA_TRACE_DEBUG, "Seeking back to position {}", first_element_position);
  448. TRY_READ(streamer.seek_to_position(first_element_position));
  449. Cluster cluster;
  450. cluster.set_timestamp(Time::from_nanoseconds(timestamp.release_value() * timestamp_scale));
  451. return cluster;
  452. }
  453. static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster_timestamp, u64 segment_timestamp_scale, TrackEntry track)
  454. {
  455. Block block;
  456. auto content_size = TRY_READ(streamer.read_variable_size_integer());
  457. auto position_before_track_number = streamer.position();
  458. block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
  459. // https://www.matroska.org/technical/notes.html
  460. // Block Timestamps:
  461. // The Block Element and SimpleBlock Element store their timestamps as signed integers,
  462. // relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
  463. // timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
  464. // `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
  465. //
  466. // When a CodecDelay Element is set, its value MUST be substracted from each Block timestamp
  467. // of that track. To get the timestamp in nanoseconds of the first frame in a Block or
  468. // SimpleBlock, the formula becomes:
  469. // `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
  470. Time timestamp_offset = Time::from_nanoseconds(static_cast<i64>(static_cast<double>(TRY_READ(streamer.read_i16()) * segment_timestamp_scale) * track.timestamp_scale()));
  471. timestamp_offset -= Time::from_nanoseconds(static_cast<i64>(track.codec_delay()));
  472. // This is only mentioned in the elements specification under TrackOffset.
  473. // https://www.matroska.org/technical/elements.html
  474. timestamp_offset += Time::from_nanoseconds(static_cast<i64>(track.timestamp_offset()));
  475. block.set_timestamp(cluster_timestamp + timestamp_offset);
  476. auto flags = TRY_READ(streamer.read_octet());
  477. block.set_only_keyframes((flags & (1u << 7u)) != 0);
  478. block.set_invisible((flags & (1u << 3u)) != 0);
  479. block.set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
  480. block.set_discardable((flags & 1u) != 0);
  481. auto total_frame_content_size = content_size - (streamer.position() - position_before_track_number);
  482. Vector<ReadonlyBytes> frames;
  483. if (block.lacing() == Block::Lacing::EBML) {
  484. auto octets_read_before_frame_sizes = streamer.octets_read();
  485. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  486. Vector<u64> frame_sizes;
  487. frame_sizes.ensure_capacity(frame_count);
  488. u64 frame_size_sum = 0;
  489. u64 previous_frame_size;
  490. auto first_frame_size = TRY_READ(streamer.read_variable_size_integer());
  491. frame_sizes.append(first_frame_size);
  492. frame_size_sum += first_frame_size;
  493. previous_frame_size = first_frame_size;
  494. for (int i = 0; i < frame_count - 2; i++) {
  495. auto frame_size_difference = TRY_READ(streamer.read_variable_size_signed_integer());
  496. u64 frame_size;
  497. // FIXME: x - (-y) == x + y?
  498. if (frame_size_difference < 0)
  499. frame_size = previous_frame_size - (-frame_size_difference);
  500. else
  501. frame_size = previous_frame_size + frame_size_difference;
  502. frame_sizes.append(frame_size);
  503. frame_size_sum += frame_size;
  504. previous_frame_size = frame_size;
  505. }
  506. frame_sizes.append(total_frame_content_size - frame_size_sum - (streamer.octets_read() - octets_read_before_frame_sizes));
  507. for (int i = 0; i < frame_count; i++) {
  508. // FIXME: ReadonlyBytes instead of copying the frame data?
  509. auto current_frame_size = frame_sizes.at(i);
  510. frames.append(TRY_READ(streamer.read_raw_octets(current_frame_size)));
  511. }
  512. } else if (block.lacing() == Block::Lacing::FixedSize) {
  513. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  514. auto individual_frame_size = total_frame_content_size / frame_count;
  515. for (int i = 0; i < frame_count; i++)
  516. frames.append(TRY_READ(streamer.read_raw_octets(individual_frame_size)));
  517. } else {
  518. frames.append(TRY_READ(streamer.read_raw_octets(total_frame_content_size)));
  519. }
  520. block.set_frames(move(frames));
  521. return block;
  522. }
  523. DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
  524. {
  525. auto optional_position = TRY(find_first_top_level_element_with_id("Cluster"sv, CLUSTER_ELEMENT_ID));
  526. if (!optional_position.has_value())
  527. return DecoderError::corrupted("No clusters are present in the segment"sv);
  528. ReadonlyBytes segment_view = m_data.slice(m_segment_contents_position, m_segment_contents_size);
  529. // We need to have the element ID included so that the iterator knows where it is.
  530. auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
  531. dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
  532. return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
  533. }
  534. static DecoderErrorOr<bool> find_keyframe_before_timestamp(SampleIterator& iterator, Time const& timestamp)
  535. {
  536. #if MATROSKA_DEBUG
  537. size_t inter_frames_count;
  538. #endif
  539. Optional<SampleIterator> last_keyframe;
  540. while (true) {
  541. SampleIterator rewind_iterator = iterator;
  542. auto block = TRY(iterator.next_block());
  543. if (block.only_keyframes()) {
  544. last_keyframe.emplace(rewind_iterator);
  545. #if MATROSKA_DEBUG
  546. inter_frames_count = 0;
  547. #endif
  548. }
  549. if (block.timestamp() > timestamp)
  550. break;
  551. #if MATROSKA_DEBUG
  552. inter_frames_count++;
  553. #endif
  554. }
  555. if (last_keyframe.has_value()) {
  556. #if MATROSKA_DEBUG
  557. dbgln("Seeked to a keyframe with {} inter frames to skip", inter_frames_count);
  558. #endif
  559. iterator = last_keyframe.release_value();
  560. return true;
  561. }
  562. return false;
  563. }
  564. DecoderErrorOr<void> Reader::seek_to_random_access_point(SampleIterator& iterator, Time timestamp)
  565. {
  566. // FIXME: Use Cues to look these up if the element is present.
  567. // FIXME: This could cache the keyframes it finds. Is it worth doing? Probably not, most files will have Cues :^)
  568. if (timestamp < iterator.last_timestamp() || iterator.last_timestamp().is_negative()) {
  569. // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
  570. iterator = TRY(create_sample_iterator(iterator.m_track.track_number()));
  571. if (!TRY(find_keyframe_before_timestamp(iterator, timestamp)))
  572. return DecoderError::corrupted("No random access points found"sv);
  573. return {};
  574. }
  575. auto seeked_iterator = iterator;
  576. if (TRY(find_keyframe_before_timestamp(seeked_iterator, timestamp)))
  577. iterator = seeked_iterator;
  578. VERIFY(iterator.last_timestamp() <= timestamp);
  579. return {};
  580. }
  581. DecoderErrorOr<Block> SampleIterator::next_block()
  582. {
  583. if (m_position >= m_data.size())
  584. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "Still at end of stream :^)"sv);
  585. Streamer streamer { m_data };
  586. TRY_READ(streamer.seek_to_position(m_position));
  587. Optional<Block> block;
  588. while (streamer.has_octet()) {
  589. #if MATROSKA_TRACE_DEBUG
  590. auto element_position = streamer.position();
  591. #endif
  592. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  593. #if MATROSKA_TRACE_DEBUG
  594. dbgln("Iterator found element with ID {:#010x} at offset {} within the segment.", element_id, element_position);
  595. #endif
  596. if (element_id == CLUSTER_ELEMENT_ID) {
  597. dbgln_if(MATROSKA_DEBUG, " Iterator is parsing new cluster.");
  598. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  599. } else if (element_id == SIMPLE_BLOCK_ID) {
  600. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is parsing new block.");
  601. auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
  602. if (candidate_block.track_number() == m_track.track_number())
  603. block = move(candidate_block);
  604. } else {
  605. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is skipping unknown element with ID {:#010x}.", element_id);
  606. TRY_READ(streamer.read_unknown_element());
  607. }
  608. m_position = streamer.position();
  609. if (block.has_value()) {
  610. m_last_timestamp = block->timestamp();
  611. return block.release_value();
  612. }
  613. }
  614. m_current_cluster.clear();
  615. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv);
  616. }
  617. ErrorOr<String> Streamer::read_string()
  618. {
  619. auto string_length = TRY(read_variable_size_integer());
  620. if (remaining() < string_length)
  621. return Error::from_string_literal("String length extends past the end of the stream");
  622. auto string_value = String(data_as_chars(), string_length);
  623. TRY(read_raw_octets(string_length));
  624. return string_value;
  625. }
  626. ErrorOr<u8> Streamer::read_octet()
  627. {
  628. if (!has_octet()) {
  629. dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
  630. return Error::from_string_literal("Stream is out of data");
  631. }
  632. u8 byte = *data();
  633. m_octets_read.last()++;
  634. m_position++;
  635. return byte;
  636. }
  637. ErrorOr<i16> Streamer::read_i16()
  638. {
  639. return (TRY(read_octet()) << 8) | TRY(read_octet());
  640. }
  641. ErrorOr<u64> Streamer::read_variable_size_integer(bool mask_length)
  642. {
  643. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", data());
  644. auto length_descriptor = TRY(read_octet());
  645. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
  646. if (length_descriptor == 0)
  647. return Error::from_string_literal("read_variable_size_integer: Length descriptor has no terminating set bit");
  648. size_t length = 0;
  649. while (length < 8) {
  650. if (((length_descriptor >> (8 - length)) & 1) == 1)
  651. break;
  652. length++;
  653. }
  654. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
  655. if (length > 8)
  656. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  657. u64 result;
  658. if (mask_length)
  659. result = length_descriptor & ~(1u << (8 - length));
  660. else
  661. result = length_descriptor;
  662. dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
  663. for (size_t i = 1; i < length; i++) {
  664. u8 next_octet = TRY(read_octet());
  665. dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
  666. result = (result << 8u) | next_octet;
  667. dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
  668. }
  669. return result;
  670. }
  671. ErrorOr<i64> Streamer::read_variable_size_signed_integer()
  672. {
  673. auto length_descriptor = TRY(read_octet());
  674. if (length_descriptor == 0)
  675. return Error::from_string_literal("read_variable_sized_signed_integer: Length descriptor has no terminating set bit");
  676. i64 length = 0;
  677. while (length < 8) {
  678. if (((length_descriptor >> (8 - length)) & 1) == 1)
  679. break;
  680. length++;
  681. }
  682. if (length > 8)
  683. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  684. i64 result = length_descriptor & ~(1u << (8 - length));
  685. for (i64 i = 1; i < length; i++) {
  686. u8 next_octet = TRY(read_octet());
  687. result = (result << 8u) | next_octet;
  688. }
  689. result -= AK::exp2<i64>(length * 7 - 1) - 1;
  690. return result;
  691. }
  692. ErrorOr<ReadonlyBytes> Streamer::read_raw_octets(size_t num_octets)
  693. {
  694. if (remaining() < num_octets)
  695. return Error::from_string_literal("Tried to drop octets past the end of the stream");
  696. ReadonlyBytes result = { data(), num_octets };
  697. m_position += num_octets;
  698. m_octets_read.last() += num_octets;
  699. return result;
  700. }
  701. ErrorOr<u64> Streamer::read_u64()
  702. {
  703. auto integer_length = TRY(read_variable_size_integer());
  704. u64 result = 0;
  705. for (size_t i = 0; i < integer_length; i++) {
  706. result = (result << 8u) + TRY(read_octet());
  707. }
  708. return result;
  709. }
  710. ErrorOr<double> Streamer::read_float()
  711. {
  712. auto length = TRY(read_variable_size_integer());
  713. if (length != 4u && length != 8u)
  714. return Error::from_string_literal("Float size must be 4 or 8 bytes");
  715. union {
  716. u64 value;
  717. float float_value;
  718. double double_value;
  719. } read_data;
  720. read_data.value = 0;
  721. for (size_t i = 0; i < length; i++) {
  722. read_data.value = (read_data.value << 8u) + TRY(read_octet());
  723. }
  724. if (length == 4u)
  725. return read_data.float_value;
  726. return read_data.double_value;
  727. }
  728. ErrorOr<void> Streamer::read_unknown_element()
  729. {
  730. auto element_length = TRY(read_variable_size_integer());
  731. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipping unknown element of size {}.", element_length);
  732. TRY(read_raw_octets(element_length));
  733. return {};
  734. }
  735. ErrorOr<void> Streamer::seek_to_position(size_t position)
  736. {
  737. if (position >= m_data.size())
  738. return Error::from_string_literal("Attempted to seek past the end of the stream");
  739. m_position = position;
  740. return {};
  741. }
  742. }