Reader.cpp 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. * Copyright (c) 2022, Gregory Bertilson <Zaggy1024@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Function.h>
  8. #include <AK/Optional.h>
  9. #include <AK/Time.h>
  10. #include <AK/Utf8View.h>
  11. #include <LibCore/MappedFile.h>
  12. #include "Reader.h"
  13. namespace Video::Matroska {
  14. #define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
  15. // Elements IDs and types are listed at this URL:
  16. // https://www.matroska.org/technical/elements.html
  17. constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
  18. constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
  19. constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
  20. constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
  21. constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74;
  22. constexpr u32 SEEK_ELEMENT_ID = 0x4DBB;
  23. constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB;
  24. constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC;
  25. constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
  26. constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
  27. constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
  28. constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
  29. constexpr u32 MUXING_APP_ID = 0x4D80;
  30. constexpr u32 WRITING_APP_ID = 0x5741;
  31. constexpr u32 DURATION_ID = 0x4489;
  32. // Tracks
  33. constexpr u32 TRACK_ENTRY_ID = 0xAE;
  34. constexpr u32 TRACK_NUMBER_ID = 0xD7;
  35. constexpr u32 TRACK_UID_ID = 0x73C5;
  36. constexpr u32 TRACK_TYPE_ID = 0x83;
  37. constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
  38. constexpr u32 TRACK_CODEC_ID = 0x86;
  39. constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
  40. constexpr u32 TRACK_OFFSET_ID = 0x537F;
  41. constexpr u32 TRACK_VIDEO_ID = 0xE0;
  42. constexpr u32 TRACK_AUDIO_ID = 0xE1;
  43. // Video
  44. constexpr u32 PIXEL_WIDTH_ID = 0xB0;
  45. constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
  46. constexpr u32 COLOR_ENTRY_ID = 0x55B0;
  47. constexpr u32 PRIMARIES_ID = 0x55BB;
  48. constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA;
  49. constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1;
  50. constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2;
  51. // Audio
  52. constexpr u32 CHANNELS_ID = 0x9F;
  53. constexpr u32 BIT_DEPTH_ID = 0x6264;
  54. // Clusters
  55. constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
  56. constexpr u32 TIMESTAMP_ID = 0xE7;
  57. // Cues
  58. constexpr u32 CUES_ID = 0x1C53BB6B;
  59. constexpr u32 CUE_POINT_ID = 0xBB;
  60. constexpr u32 CUE_TIME_ID = 0xB3;
  61. constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7;
  62. constexpr u32 CUE_TRACK_ID = 0xF7;
  63. constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1;
  64. constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0;
  65. constexpr u32 CUE_CODEC_STATE_ID = 0xEA;
  66. constexpr u32 CUE_REFERENCE_ID = 0xDB;
  67. DecoderErrorOr<Reader> Reader::from_file(StringView path)
  68. {
  69. auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path));
  70. auto reader = TRY(from_data(mapped_file->bytes()));
  71. reader.m_mapped_file = mapped_file;
  72. return reader;
  73. }
  74. DecoderErrorOr<Reader> Reader::from_data(ReadonlyBytes data)
  75. {
  76. Reader reader(data);
  77. TRY(reader.parse_initial_data());
  78. return reader;
  79. }
  80. static DecoderErrorOr<void> parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function<DecoderErrorOr<IterationDecision>(u64, size_t)> element_consumer)
  81. {
  82. auto element_data_size = TRY_READ(streamer.read_variable_size_integer());
  83. dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size);
  84. streamer.push_octets_read();
  85. while (streamer.octets_read() < element_data_size) {
  86. dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
  87. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  88. auto element_position = streamer.position();
  89. dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id);
  90. auto result = element_consumer(element_id, element_position);
  91. if (result.is_error())
  92. return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description());
  93. if (result.release_value() == IterationDecision::Break)
  94. break;
  95. dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name);
  96. }
  97. streamer.pop_octets_read();
  98. return {};
  99. }
  100. static DecoderErrorOr<EBMLHeader> parse_ebml_header(Streamer& streamer)
  101. {
  102. EBMLHeader header;
  103. TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  104. switch (element_id) {
  105. case DOCTYPE_ELEMENT_ID:
  106. header.doc_type = TRY_READ(streamer.read_string());
  107. dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", header.doc_type);
  108. break;
  109. case DOCTYPE_VERSION_ELEMENT_ID:
  110. header.doc_type_version = TRY_READ(streamer.read_u64());
  111. dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", header.doc_type_version);
  112. break;
  113. default:
  114. TRY_READ(streamer.read_unknown_element());
  115. }
  116. return IterationDecision::Continue;
  117. }));
  118. return header;
  119. }
  120. DecoderErrorOr<void> Reader::parse_initial_data()
  121. {
  122. Streamer streamer { m_data };
  123. auto first_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  124. dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id);
  125. if (first_element_id != EBML_MASTER_ELEMENT_ID)
  126. return DecoderError::corrupted("First element was not an EBML header"sv);
  127. m_header = TRY(parse_ebml_header(streamer));
  128. dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
  129. auto root_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  130. if (root_element_id != SEGMENT_ELEMENT_ID)
  131. return DecoderError::corrupted("Second element was not a segment element"sv);
  132. m_segment_contents_size = TRY_READ(streamer.read_variable_size_integer());
  133. m_segment_contents_position = streamer.position();
  134. dbgln_if(true, "Segment is at {} with size {}, available size is {}", m_segment_contents_position, m_segment_contents_size, m_data.size() - m_segment_contents_position);
  135. m_segment_contents_size = min(m_segment_contents_size, m_data.size() - m_segment_contents_position);
  136. return {};
  137. }
  138. static DecoderErrorOr<void> parse_seek_head(Streamer& streamer, size_t base_position, HashMap<u32, size_t>& table)
  139. {
  140. return parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  141. if (seek_head_child_id == SEEK_ELEMENT_ID) {
  142. Optional<u64> seek_id;
  143. Optional<u64> seek_position;
  144. TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  145. switch (seek_entry_child_id) {
  146. case SEEK_ID_ELEMENT_ID:
  147. seek_id = TRY_READ(streamer.read_u64());
  148. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Element ID value {:#010x}", seek_id.value());
  149. break;
  150. case SEEK_POSITION_ELEMENT_ID:
  151. seek_position = TRY_READ(streamer.read_u64());
  152. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Position value {}", seek_position.value());
  153. break;
  154. default:
  155. TRY_READ(streamer.read_unknown_element());
  156. }
  157. return IterationDecision::Continue;
  158. }));
  159. if (!seek_id.has_value())
  160. return DecoderError::corrupted("Seek entry is missing the element ID"sv);
  161. if (!seek_position.has_value())
  162. return DecoderError::corrupted("Seek entry is missing the seeking position"sv);
  163. if (seek_id.value() > NumericLimits<u32>::max())
  164. return DecoderError::corrupted("Seek entry's element ID is too large"sv);
  165. dbgln_if(MATROSKA_TRACE_DEBUG, "Seek entry found with ID {:#010x} and position {} offset from SeekHead at {}", seek_id.value(), seek_position.value(), base_position);
  166. // FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
  167. if (table.contains(seek_id.value())) {
  168. dbgln_if(MATROSKA_DEBUG, "Warning: Duplicate seek entry with ID {:#010x} at position {}", seek_id.value(), seek_position.value());
  169. return IterationDecision::Continue;
  170. }
  171. DECODER_TRY_ALLOC(table.try_set(seek_id.release_value(), base_position + seek_position.release_value()));
  172. } else {
  173. dbgln_if(MATROSKA_TRACE_DEBUG, "Unknown SeekHead child element ID {:#010x}", seek_head_child_id);
  174. }
  175. return IterationDecision::Continue;
  176. });
  177. }
  178. DecoderErrorOr<Optional<size_t>> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id)
  179. {
  180. dbgln_if(MATROSKA_DEBUG, "====== Finding element {} with ID {:#010x} ======", element_name, element_id);
  181. if (m_seek_entries.contains(element_id)) {
  182. dbgln_if(MATROSKA_TRACE_DEBUG, "Cache hit!");
  183. return m_seek_entries.get(element_id).release_value();
  184. }
  185. Streamer streamer { m_data };
  186. if (m_last_top_level_element_position != 0)
  187. TRY_READ(streamer.seek_to_position(m_last_top_level_element_position));
  188. else
  189. TRY_READ(streamer.seek_to_position(m_segment_contents_position));
  190. Optional<size_t> position;
  191. while (streamer.position() < m_segment_contents_position + m_segment_contents_size) {
  192. auto found_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  193. auto found_element_position = streamer.position();
  194. dbgln_if(MATROSKA_TRACE_DEBUG, "Found element ID {:#010x} with position {}.", found_element_id, found_element_position);
  195. if (found_element_id == SEEK_HEAD_ELEMENT_ID) {
  196. dbgln_if(MATROSKA_TRACE_DEBUG, "Found SeekHead, parsing it into the lookup table.");
  197. m_seek_entries.clear();
  198. TRY(parse_seek_head(streamer, found_element_position, m_seek_entries));
  199. m_last_top_level_element_position = 0;
  200. if (m_seek_entries.contains(element_id)) {
  201. dbgln_if(MATROSKA_TRACE_DEBUG, "SeekHead hit!");
  202. position = m_seek_entries.get(element_id).release_value();
  203. break;
  204. }
  205. continue;
  206. }
  207. auto result = streamer.read_unknown_element();
  208. if (result.is_error())
  209. return DecoderError::format(DecoderErrorCategory::Corrupted, "While seeking to {}: {}", element_name, result.release_error().string_literal());
  210. m_last_top_level_element_position = streamer.position();
  211. DECODER_TRY_ALLOC(m_seek_entries.try_set(found_element_id, found_element_position));
  212. if (found_element_id == element_id) {
  213. position = found_element_position;
  214. break;
  215. }
  216. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipped to position {}.", m_last_top_level_element_position);
  217. }
  218. return position;
  219. }
  220. static DecoderErrorOr<SegmentInformation> parse_information(Streamer& streamer)
  221. {
  222. SegmentInformation segment_information;
  223. TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  224. switch (element_id) {
  225. case TIMESTAMP_SCALE_ID:
  226. segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64()));
  227. dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", segment_information.timestamp_scale());
  228. break;
  229. case MUXING_APP_ID:
  230. segment_information.set_muxing_app(TRY_READ(streamer.read_string()));
  231. dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", segment_information.muxing_app().as_string());
  232. break;
  233. case WRITING_APP_ID:
  234. segment_information.set_writing_app(TRY_READ(streamer.read_string()));
  235. dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", segment_information.writing_app().as_string());
  236. break;
  237. case DURATION_ID:
  238. segment_information.set_duration_unscaled(TRY_READ(streamer.read_float()));
  239. dbgln_if(MATROSKA_DEBUG, "Read Duration attribute: {}", segment_information.duration_unscaled().value());
  240. break;
  241. default:
  242. TRY_READ(streamer.read_unknown_element());
  243. }
  244. return IterationDecision::Continue;
  245. }));
  246. return segment_information;
  247. }
  248. DecoderErrorOr<SegmentInformation> Reader::segment_information()
  249. {
  250. if (m_segment_information.has_value())
  251. return m_segment_information.value();
  252. auto position = TRY(find_first_top_level_element_with_id("Segment Information"sv, SEGMENT_INFORMATION_ELEMENT_ID));
  253. if (!position.has_value())
  254. return DecoderError::corrupted("No Segment Information element found"sv);
  255. Streamer streamer { m_data };
  256. TRY_READ(streamer.seek_to_position(position.release_value()));
  257. m_segment_information = TRY(parse_information(streamer));
  258. return m_segment_information.value();
  259. }
  260. DecoderErrorOr<void> Reader::ensure_tracks_are_parsed()
  261. {
  262. if (!m_tracks.is_empty())
  263. return {};
  264. auto position = TRY(find_first_top_level_element_with_id("Tracks"sv, TRACK_ELEMENT_ID));
  265. if (!position.has_value())
  266. return DecoderError::corrupted("No Tracks element found"sv);
  267. Streamer streamer { m_data };
  268. TRY_READ(streamer.seek_to_position(position.release_value()));
  269. TRY(parse_tracks(streamer));
  270. return {};
  271. }
  272. static DecoderErrorOr<TrackEntry::ColorFormat> parse_video_color_information(Streamer& streamer)
  273. {
  274. TrackEntry::ColorFormat color_format {};
  275. TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  276. switch (element_id) {
  277. case PRIMARIES_ID:
  278. color_format.color_primaries = static_cast<ColorPrimaries>(TRY_READ(streamer.read_u64()));
  279. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Primaries attribute: {}", color_primaries_to_string(color_format.color_primaries));
  280. break;
  281. case TRANSFER_CHARACTERISTICS_ID:
  282. color_format.transfer_characteristics = static_cast<TransferCharacteristics>(TRY_READ(streamer.read_u64()));
  283. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's TransferCharacteristics attribute: {}", transfer_characteristics_to_string(color_format.transfer_characteristics));
  284. break;
  285. case MATRIX_COEFFICIENTS_ID:
  286. color_format.matrix_coefficients = static_cast<MatrixCoefficients>(TRY_READ(streamer.read_u64()));
  287. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's MatrixCoefficients attribute: {}", matrix_coefficients_to_string(color_format.matrix_coefficients));
  288. break;
  289. case BITS_PER_CHANNEL_ID:
  290. color_format.bits_per_channel = TRY_READ(streamer.read_u64());
  291. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's BitsPerChannel attribute: {}", color_format.bits_per_channel);
  292. break;
  293. default:
  294. TRY_READ(streamer.read_unknown_element());
  295. }
  296. return IterationDecision::Continue;
  297. }));
  298. return color_format;
  299. }
  300. static DecoderErrorOr<TrackEntry::VideoTrack> parse_video_track_information(Streamer& streamer)
  301. {
  302. TrackEntry::VideoTrack video_track {};
  303. TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  304. switch (element_id) {
  305. case PIXEL_WIDTH_ID:
  306. video_track.pixel_width = TRY_READ(streamer.read_u64());
  307. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", video_track.pixel_width);
  308. break;
  309. case PIXEL_HEIGHT_ID:
  310. video_track.pixel_height = TRY_READ(streamer.read_u64());
  311. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", video_track.pixel_height);
  312. break;
  313. case COLOR_ENTRY_ID:
  314. video_track.color_format = TRY(parse_video_color_information(streamer));
  315. break;
  316. default:
  317. TRY_READ(streamer.read_unknown_element());
  318. }
  319. return IterationDecision::Continue;
  320. }));
  321. return video_track;
  322. }
  323. static DecoderErrorOr<TrackEntry::AudioTrack> parse_audio_track_information(Streamer& streamer)
  324. {
  325. TrackEntry::AudioTrack audio_track {};
  326. TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  327. switch (element_id) {
  328. case CHANNELS_ID:
  329. audio_track.channels = TRY_READ(streamer.read_u64());
  330. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", audio_track.channels);
  331. break;
  332. case BIT_DEPTH_ID:
  333. audio_track.bit_depth = TRY_READ(streamer.read_u64());
  334. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", audio_track.bit_depth);
  335. break;
  336. default:
  337. TRY_READ(streamer.read_unknown_element());
  338. }
  339. return IterationDecision::Continue;
  340. }));
  341. return audio_track;
  342. }
  343. static DecoderErrorOr<TrackEntry> parse_track_entry(Streamer& streamer)
  344. {
  345. TrackEntry track_entry;
  346. TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  347. switch (element_id) {
  348. case TRACK_NUMBER_ID:
  349. track_entry.set_track_number(TRY_READ(streamer.read_u64()));
  350. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_entry.track_number());
  351. break;
  352. case TRACK_UID_ID:
  353. track_entry.set_track_uid(TRY_READ(streamer.read_u64()));
  354. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_entry.track_uid());
  355. break;
  356. case TRACK_TYPE_ID:
  357. track_entry.set_track_type(static_cast<TrackEntry::TrackType>(TRY_READ(streamer.read_u64())));
  358. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", to_underlying(track_entry.track_type()));
  359. break;
  360. case TRACK_LANGUAGE_ID:
  361. track_entry.set_language(TRY_READ(streamer.read_string()));
  362. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", track_entry.language());
  363. break;
  364. case TRACK_CODEC_ID:
  365. track_entry.set_codec_id(TRY_READ(streamer.read_string()));
  366. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry.codec_id());
  367. break;
  368. case TRACK_TIMESTAMP_SCALE_ID:
  369. track_entry.set_timestamp_scale(TRY_READ(streamer.read_float()));
  370. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry.timestamp_scale());
  371. break;
  372. case TRACK_OFFSET_ID:
  373. track_entry.set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
  374. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry.timestamp_offset());
  375. break;
  376. case TRACK_VIDEO_ID:
  377. track_entry.set_video_track(TRY(parse_video_track_information(streamer)));
  378. break;
  379. case TRACK_AUDIO_ID:
  380. track_entry.set_audio_track(TRY(parse_audio_track_information(streamer)));
  381. break;
  382. default:
  383. TRY_READ(streamer.read_unknown_element());
  384. }
  385. return IterationDecision::Continue;
  386. }));
  387. return track_entry;
  388. }
  389. DecoderErrorOr<void> Reader::parse_tracks(Streamer& streamer)
  390. {
  391. return parse_master_element(streamer, "Tracks"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  392. if (element_id == TRACK_ENTRY_ID) {
  393. auto track_entry = TRY(parse_track_entry(streamer));
  394. dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry.track_number());
  395. DECODER_TRY_ALLOC(m_tracks.try_set(track_entry.track_number(), track_entry));
  396. } else {
  397. TRY_READ(streamer.read_unknown_element());
  398. }
  399. return IterationDecision::Continue;
  400. });
  401. }
  402. DecoderErrorOr<void> Reader::for_each_track(TrackEntryCallback callback)
  403. {
  404. TRY(ensure_tracks_are_parsed());
  405. for (auto const& track_entry : m_tracks) {
  406. auto decision = TRY(callback(track_entry.value));
  407. if (decision == IterationDecision::Break)
  408. break;
  409. }
  410. return {};
  411. }
  412. DecoderErrorOr<void> Reader::for_each_track_of_type(TrackEntry::TrackType type, TrackEntryCallback callback)
  413. {
  414. return for_each_track([&](TrackEntry const& track_entry) -> DecoderErrorOr<IterationDecision> {
  415. if (track_entry.track_type() != type)
  416. return IterationDecision::Continue;
  417. return callback(track_entry);
  418. });
  419. }
  420. DecoderErrorOr<TrackEntry> Reader::track_for_track_number(u64 track_number)
  421. {
  422. TRY(ensure_tracks_are_parsed());
  423. auto optional_track_entry = m_tracks.get(track_number);
  424. if (!optional_track_entry.has_value())
  425. return DecoderError::format(DecoderErrorCategory::Invalid, "No track found with number {}", track_number);
  426. return optional_track_entry.release_value();
  427. }
  428. DecoderErrorOr<size_t> Reader::track_count()
  429. {
  430. TRY(ensure_tracks_are_parsed());
  431. return m_tracks.size();
  432. }
  433. constexpr size_t get_element_id_size(u32 element_id)
  434. {
  435. return sizeof(element_id) - (count_leading_zeroes(element_id) / 8);
  436. }
  437. static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_scale)
  438. {
  439. Optional<u64> timestamp;
  440. size_t first_element_position = 0;
  441. TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id, size_t position) -> DecoderErrorOr<IterationDecision> {
  442. if (first_element_position == 0)
  443. first_element_position = position - get_element_id_size(element_id);
  444. switch (element_id) {
  445. case TIMESTAMP_ID:
  446. timestamp = TRY_READ(streamer.read_u64());
  447. return IterationDecision::Break;
  448. default:
  449. TRY_READ(streamer.read_unknown_element());
  450. }
  451. return IterationDecision::Continue;
  452. }));
  453. if (!timestamp.has_value())
  454. return DecoderError::corrupted("Cluster was missing a timestamp"sv);
  455. if (first_element_position == 0)
  456. return DecoderError::corrupted("Cluster had no children"sv);
  457. dbgln_if(MATROSKA_TRACE_DEBUG, "Seeking back to position {}", first_element_position);
  458. TRY_READ(streamer.seek_to_position(first_element_position));
  459. Cluster cluster;
  460. cluster.set_timestamp(Time::from_nanoseconds(timestamp.release_value() * timestamp_scale));
  461. return cluster;
  462. }
  463. static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster_timestamp, u64 segment_timestamp_scale, TrackEntry track)
  464. {
  465. Block block;
  466. auto content_size = TRY_READ(streamer.read_variable_size_integer());
  467. auto position_before_track_number = streamer.position();
  468. block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
  469. // https://www.matroska.org/technical/notes.html
  470. // Block Timestamps:
  471. // The Block Element and SimpleBlock Element store their timestamps as signed integers,
  472. // relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
  473. // timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
  474. // `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
  475. //
  476. // When a CodecDelay Element is set, its value MUST be substracted from each Block timestamp
  477. // of that track. To get the timestamp in nanoseconds of the first frame in a Block or
  478. // SimpleBlock, the formula becomes:
  479. // `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
  480. Time timestamp_offset = Time::from_nanoseconds(static_cast<i64>(static_cast<double>(TRY_READ(streamer.read_i16()) * segment_timestamp_scale) * track.timestamp_scale()));
  481. timestamp_offset -= Time::from_nanoseconds(static_cast<i64>(track.codec_delay()));
  482. // This is only mentioned in the elements specification under TrackOffset.
  483. // https://www.matroska.org/technical/elements.html
  484. timestamp_offset += Time::from_nanoseconds(static_cast<i64>(track.timestamp_offset()));
  485. block.set_timestamp(cluster_timestamp + timestamp_offset);
  486. auto flags = TRY_READ(streamer.read_octet());
  487. block.set_only_keyframes((flags & (1u << 7u)) != 0);
  488. block.set_invisible((flags & (1u << 3u)) != 0);
  489. block.set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
  490. block.set_discardable((flags & 1u) != 0);
  491. auto total_frame_content_size = content_size - (streamer.position() - position_before_track_number);
  492. Vector<ReadonlyBytes> frames;
  493. if (block.lacing() == Block::Lacing::EBML) {
  494. auto octets_read_before_frame_sizes = streamer.octets_read();
  495. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  496. Vector<u64> frame_sizes;
  497. frame_sizes.ensure_capacity(frame_count);
  498. u64 frame_size_sum = 0;
  499. u64 previous_frame_size;
  500. auto first_frame_size = TRY_READ(streamer.read_variable_size_integer());
  501. frame_sizes.append(first_frame_size);
  502. frame_size_sum += first_frame_size;
  503. previous_frame_size = first_frame_size;
  504. for (int i = 0; i < frame_count - 2; i++) {
  505. auto frame_size_difference = TRY_READ(streamer.read_variable_size_signed_integer());
  506. u64 frame_size;
  507. // FIXME: x - (-y) == x + y?
  508. if (frame_size_difference < 0)
  509. frame_size = previous_frame_size - (-frame_size_difference);
  510. else
  511. frame_size = previous_frame_size + frame_size_difference;
  512. frame_sizes.append(frame_size);
  513. frame_size_sum += frame_size;
  514. previous_frame_size = frame_size;
  515. }
  516. frame_sizes.append(total_frame_content_size - frame_size_sum - (streamer.octets_read() - octets_read_before_frame_sizes));
  517. for (int i = 0; i < frame_count; i++) {
  518. // FIXME: ReadonlyBytes instead of copying the frame data?
  519. auto current_frame_size = frame_sizes.at(i);
  520. frames.append(TRY_READ(streamer.read_raw_octets(current_frame_size)));
  521. }
  522. } else if (block.lacing() == Block::Lacing::FixedSize) {
  523. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  524. auto individual_frame_size = total_frame_content_size / frame_count;
  525. for (int i = 0; i < frame_count; i++)
  526. frames.append(TRY_READ(streamer.read_raw_octets(individual_frame_size)));
  527. } else {
  528. frames.append(TRY_READ(streamer.read_raw_octets(total_frame_content_size)));
  529. }
  530. block.set_frames(move(frames));
  531. return block;
  532. }
  533. DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
  534. {
  535. auto optional_position = TRY(find_first_top_level_element_with_id("Cluster"sv, CLUSTER_ELEMENT_ID));
  536. if (!optional_position.has_value())
  537. return DecoderError::corrupted("No clusters are present in the segment"sv);
  538. ReadonlyBytes segment_view = m_data.slice(m_segment_contents_position, m_segment_contents_size);
  539. // We need to have the element ID included so that the iterator knows where it is.
  540. auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
  541. dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
  542. return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
  543. }
  544. static DecoderErrorOr<CueTrackPosition> parse_cue_track_position(Streamer& streamer)
  545. {
  546. CueTrackPosition track_position;
  547. bool had_cluster_position = false;
  548. TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  549. switch (element_id) {
  550. case CUE_TRACK_ID:
  551. track_position.set_track_number(TRY_READ(streamer.read_u64()));
  552. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions track number {}", track_position.track_number());
  553. break;
  554. case CUE_CLUSTER_POSITION_ID:
  555. track_position.set_cluster_position(TRY_READ(streamer.read_u64()));
  556. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions cluster position {}", track_position.cluster_position());
  557. had_cluster_position = true;
  558. break;
  559. case CUE_RELATIVE_POSITION_ID:
  560. track_position.set_block_offset(TRY_READ(streamer.read_u64()));
  561. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions relative position {}", track_position.block_offset());
  562. break;
  563. case CUE_CODEC_STATE_ID:
  564. // Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state.
  565. // FIXME: Do something with this value.
  566. dbgln_if(MATROSKA_DEBUG, "Found CodecState, skipping");
  567. TRY_READ(streamer.read_unknown_element());
  568. break;
  569. case CUE_REFERENCE_ID:
  570. return DecoderError::not_implemented();
  571. default:
  572. TRY_READ(streamer.read_unknown_element());
  573. break;
  574. }
  575. return IterationDecision::Continue;
  576. }));
  577. if (track_position.track_number() == 0)
  578. return DecoderError::corrupted("Track number was not present or 0"sv);
  579. if (!had_cluster_position)
  580. return DecoderError::corrupted("Cluster was missing the cluster position"sv);
  581. return track_position;
  582. }
  583. static DecoderErrorOr<CuePoint> parse_cue_point(Streamer& streamer, u64 timestamp_scale)
  584. {
  585. CuePoint cue_point;
  586. TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  587. switch (element_id) {
  588. case CUE_TIME_ID: {
  589. // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element:
  590. // > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks.
  591. // Matroska Ticks are specified in https://www.matroska.org/technical/notes.html:
  592. // > For such elements, the timestamp value is stored directly in nanoseconds.
  593. // However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees:
  594. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941
  595. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416
  596. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392
  597. // Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^(
  598. auto timestamp = Time::from_nanoseconds(static_cast<i64>(TRY_READ(streamer.read_u64()) * timestamp_scale));
  599. cue_point.set_timestamp(timestamp);
  600. dbgln_if(MATROSKA_DEBUG, "Read CuePoint timestamp {}ms", cue_point.timestamp().to_milliseconds());
  601. break;
  602. }
  603. case CUE_TRACK_POSITIONS_ID: {
  604. auto track_position = TRY_READ(parse_cue_track_position(streamer));
  605. DECODER_TRY_ALLOC(cue_point.track_positions().try_set(track_position.track_number(), track_position));
  606. break;
  607. }
  608. default:
  609. TRY_READ(streamer.read_unknown_element());
  610. break;
  611. }
  612. return IterationDecision::Continue;
  613. }));
  614. if (cue_point.timestamp().is_negative())
  615. return DecoderError::corrupted("CuePoint was missing a timestamp"sv);
  616. if (cue_point.track_positions().is_empty())
  617. return DecoderError::corrupted("CuePoint was missing track positions"sv);
  618. return cue_point;
  619. }
  620. DecoderErrorOr<void> Reader::parse_cues(Streamer& streamer)
  621. {
  622. m_cues.clear();
  623. TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  624. switch (element_id) {
  625. case CUE_POINT_ID: {
  626. auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale()));
  627. // FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now,
  628. // but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead.
  629. for (auto track_position_entry : cue_point.track_positions()) {
  630. if (!m_cues.contains(track_position_entry.key))
  631. DECODER_TRY_ALLOC(m_cues.try_set(track_position_entry.key, Vector<CuePoint>()));
  632. Vector<CuePoint>& cue_points_for_track = m_cues.get(track_position_entry.key).release_value();
  633. cue_points_for_track.append(cue_point);
  634. }
  635. break;
  636. }
  637. default:
  638. return DecoderError::format(DecoderErrorCategory::Corrupted, "Unknown Cues child ID {:#010x}", element_id);
  639. }
  640. return IterationDecision::Continue;
  641. }));
  642. return {};
  643. }
  644. DecoderErrorOr<void> Reader::ensure_cues_are_parsed()
  645. {
  646. if (m_cues_have_been_parsed)
  647. return {};
  648. auto position = TRY(find_first_top_level_element_with_id("Cues"sv, CUES_ID));
  649. if (!position.has_value())
  650. return DecoderError::corrupted("No Tracks element found"sv);
  651. Streamer streamer { m_data };
  652. TRY_READ(streamer.seek_to_position(position.release_value()));
  653. TRY(parse_cues(streamer));
  654. m_cues_have_been_parsed = true;
  655. return {};
  656. }
  657. DecoderErrorOr<void> Reader::seek_to_cue_for_timestamp(SampleIterator& iterator, Time const& timestamp)
  658. {
  659. auto const& cue_points = MUST(cue_points_for_track(iterator.m_track.track_number())).release_value();
  660. // Take a guess at where in the cues the timestamp will be and correct from there.
  661. auto duration = TRY(segment_information()).duration();
  662. size_t index = 0;
  663. if (duration.has_value())
  664. index = clamp(((timestamp.to_nanoseconds() * cue_points.size()) / TRY(segment_information()).duration()->to_nanoseconds()), 0, cue_points.size() - 1);
  665. CuePoint const* prev_cue_point = &cue_points[index];
  666. if (prev_cue_point->timestamp() == timestamp) {
  667. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  668. return {};
  669. }
  670. if (prev_cue_point->timestamp() > timestamp) {
  671. while (index > 0 && prev_cue_point->timestamp() > timestamp)
  672. prev_cue_point = &cue_points[--index];
  673. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  674. return {};
  675. }
  676. while (index < cue_points.size()) {
  677. auto const& cue_point = cue_points[index++];
  678. if (cue_point.timestamp() > timestamp)
  679. break;
  680. prev_cue_point = &cue_point;
  681. index++;
  682. }
  683. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  684. return {};
  685. }
  686. static DecoderErrorOr<bool> find_keyframe_before_timestamp(SampleIterator& iterator, Time const& timestamp)
  687. {
  688. #if MATROSKA_DEBUG
  689. size_t inter_frames_count;
  690. #endif
  691. Optional<SampleIterator> last_keyframe;
  692. while (true) {
  693. SampleIterator rewind_iterator = iterator;
  694. auto block = TRY(iterator.next_block());
  695. if (block.only_keyframes()) {
  696. last_keyframe.emplace(rewind_iterator);
  697. #if MATROSKA_DEBUG
  698. inter_frames_count = 0;
  699. #endif
  700. }
  701. if (block.timestamp() > timestamp)
  702. break;
  703. #if MATROSKA_DEBUG
  704. inter_frames_count++;
  705. #endif
  706. }
  707. if (last_keyframe.has_value()) {
  708. #if MATROSKA_DEBUG
  709. dbgln("Seeked to a keyframe with {} inter frames to skip", inter_frames_count);
  710. #endif
  711. iterator = last_keyframe.release_value();
  712. return true;
  713. }
  714. return false;
  715. }
  716. DecoderErrorOr<bool> Reader::has_cues_for_track(u64 track_number)
  717. {
  718. TRY(ensure_cues_are_parsed());
  719. return m_cues.contains(track_number);
  720. }
  721. DecoderErrorOr<void> Reader::seek_to_random_access_point(SampleIterator& iterator, Time timestamp)
  722. {
  723. if (iterator.m_last_timestamp == timestamp)
  724. return {};
  725. if (TRY(has_cues_for_track(iterator.m_track.track_number()))) {
  726. auto seeked_iterator = iterator;
  727. TRY(seek_to_cue_for_timestamp(seeked_iterator, timestamp));
  728. VERIFY(seeked_iterator.m_last_timestamp <= timestamp);
  729. // We only need to seek to a keyframe if it's not faster to continue from the current position.
  730. if (timestamp < iterator.m_last_timestamp || seeked_iterator.m_last_timestamp > iterator.m_last_timestamp)
  731. iterator = seeked_iterator;
  732. return {};
  733. }
  734. // FIXME: This could cache the keyframes it finds. Is it worth doing? Probably not, most files will have Cues :^)
  735. if (timestamp < iterator.last_timestamp() || iterator.last_timestamp().is_negative()) {
  736. // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
  737. iterator = TRY(create_sample_iterator(iterator.m_track.track_number()));
  738. if (!TRY(find_keyframe_before_timestamp(iterator, timestamp)))
  739. return DecoderError::corrupted("No random access points found"sv);
  740. return {};
  741. }
  742. auto seeked_iterator = iterator;
  743. if (TRY(find_keyframe_before_timestamp(seeked_iterator, timestamp)))
  744. iterator = seeked_iterator;
  745. VERIFY(iterator.last_timestamp() <= timestamp);
  746. return {};
  747. }
  748. DecoderErrorOr<Optional<Vector<CuePoint> const&>> Reader::cue_points_for_track(u64 track_number)
  749. {
  750. TRY(ensure_cues_are_parsed());
  751. return m_cues.get(track_number);
  752. }
  753. DecoderErrorOr<Block> SampleIterator::next_block()
  754. {
  755. if (m_position >= m_data.size())
  756. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "Still at end of stream :^)"sv);
  757. Streamer streamer { m_data };
  758. TRY_READ(streamer.seek_to_position(m_position));
  759. Optional<Block> block;
  760. while (streamer.has_octet()) {
  761. #if MATROSKA_TRACE_DEBUG
  762. auto element_position = streamer.position();
  763. #endif
  764. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  765. #if MATROSKA_TRACE_DEBUG
  766. dbgln("Iterator found element with ID {:#010x} at offset {} within the segment.", element_id, element_position);
  767. #endif
  768. if (element_id == CLUSTER_ELEMENT_ID) {
  769. dbgln_if(MATROSKA_DEBUG, " Iterator is parsing new cluster.");
  770. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  771. } else if (element_id == SIMPLE_BLOCK_ID) {
  772. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is parsing new block.");
  773. auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
  774. if (candidate_block.track_number() == m_track.track_number())
  775. block = move(candidate_block);
  776. } else {
  777. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is skipping unknown element with ID {:#010x}.", element_id);
  778. TRY_READ(streamer.read_unknown_element());
  779. }
  780. m_position = streamer.position();
  781. if (block.has_value()) {
  782. m_last_timestamp = block->timestamp();
  783. return block.release_value();
  784. }
  785. }
  786. m_current_cluster.clear();
  787. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv);
  788. }
  789. DecoderErrorOr<void> SampleIterator::seek_to_cue_point(CuePoint const& cue_point)
  790. {
  791. // This is a private function. The position getter can return optional, but the caller should already know that this track has a position.
  792. auto const& cue_position = cue_point.position_for_track(m_track.track_number()).release_value();
  793. Streamer streamer { m_data };
  794. TRY_READ(streamer.seek_to_position(cue_position.cluster_position()));
  795. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  796. if (element_id != CLUSTER_ELEMENT_ID)
  797. return DecoderError::corrupted("Cue point's cluster position didn't point to a cluster"sv);
  798. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  799. dbgln_if(MATROSKA_DEBUG, "SampleIterator set to cue point at timestamp {}ms", m_current_cluster->timestamp().to_milliseconds());
  800. m_position = streamer.position() + cue_position.block_offset();
  801. m_last_timestamp = cue_point.timestamp();
  802. return {};
  803. }
  804. ErrorOr<String> Streamer::read_string()
  805. {
  806. auto string_length = TRY(read_variable_size_integer());
  807. if (remaining() < string_length)
  808. return Error::from_string_literal("String length extends past the end of the stream");
  809. auto string_value = String(data_as_chars(), string_length);
  810. TRY(read_raw_octets(string_length));
  811. return string_value;
  812. }
  813. ErrorOr<u8> Streamer::read_octet()
  814. {
  815. if (!has_octet()) {
  816. dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
  817. return Error::from_string_literal("Stream is out of data");
  818. }
  819. u8 byte = *data();
  820. m_octets_read.last()++;
  821. m_position++;
  822. return byte;
  823. }
  824. ErrorOr<i16> Streamer::read_i16()
  825. {
  826. return (TRY(read_octet()) << 8) | TRY(read_octet());
  827. }
  828. ErrorOr<u64> Streamer::read_variable_size_integer(bool mask_length)
  829. {
  830. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", data());
  831. auto length_descriptor = TRY(read_octet());
  832. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
  833. if (length_descriptor == 0)
  834. return Error::from_string_literal("read_variable_size_integer: Length descriptor has no terminating set bit");
  835. size_t length = 0;
  836. while (length < 8) {
  837. if (((length_descriptor >> (8 - length)) & 1) == 1)
  838. break;
  839. length++;
  840. }
  841. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
  842. if (length > 8)
  843. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  844. u64 result;
  845. if (mask_length)
  846. result = length_descriptor & ~(1u << (8 - length));
  847. else
  848. result = length_descriptor;
  849. dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
  850. for (size_t i = 1; i < length; i++) {
  851. u8 next_octet = TRY(read_octet());
  852. dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
  853. result = (result << 8u) | next_octet;
  854. dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
  855. }
  856. return result;
  857. }
  858. ErrorOr<i64> Streamer::read_variable_size_signed_integer()
  859. {
  860. auto length_descriptor = TRY(read_octet());
  861. if (length_descriptor == 0)
  862. return Error::from_string_literal("read_variable_sized_signed_integer: Length descriptor has no terminating set bit");
  863. i64 length = 0;
  864. while (length < 8) {
  865. if (((length_descriptor >> (8 - length)) & 1) == 1)
  866. break;
  867. length++;
  868. }
  869. if (length > 8)
  870. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  871. i64 result = length_descriptor & ~(1u << (8 - length));
  872. for (i64 i = 1; i < length; i++) {
  873. u8 next_octet = TRY(read_octet());
  874. result = (result << 8u) | next_octet;
  875. }
  876. result -= AK::exp2<i64>(length * 7 - 1) - 1;
  877. return result;
  878. }
  879. ErrorOr<ReadonlyBytes> Streamer::read_raw_octets(size_t num_octets)
  880. {
  881. if (remaining() < num_octets)
  882. return Error::from_string_literal("Tried to drop octets past the end of the stream");
  883. ReadonlyBytes result = { data(), num_octets };
  884. m_position += num_octets;
  885. m_octets_read.last() += num_octets;
  886. return result;
  887. }
  888. ErrorOr<u64> Streamer::read_u64()
  889. {
  890. auto integer_length = TRY(read_variable_size_integer());
  891. u64 result = 0;
  892. for (size_t i = 0; i < integer_length; i++) {
  893. result = (result << 8u) + TRY(read_octet());
  894. }
  895. return result;
  896. }
  897. ErrorOr<double> Streamer::read_float()
  898. {
  899. auto length = TRY(read_variable_size_integer());
  900. if (length != 4u && length != 8u)
  901. return Error::from_string_literal("Float size must be 4 or 8 bytes");
  902. union {
  903. u64 value;
  904. float float_value;
  905. double double_value;
  906. } read_data;
  907. read_data.value = 0;
  908. for (size_t i = 0; i < length; i++) {
  909. read_data.value = (read_data.value << 8u) + TRY(read_octet());
  910. }
  911. if (length == 4u)
  912. return read_data.float_value;
  913. return read_data.double_value;
  914. }
  915. ErrorOr<void> Streamer::read_unknown_element()
  916. {
  917. auto element_length = TRY(read_variable_size_integer());
  918. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipping unknown element of size {}.", element_length);
  919. TRY(read_raw_octets(element_length));
  920. return {};
  921. }
  922. ErrorOr<void> Streamer::seek_to_position(size_t position)
  923. {
  924. if (position >= m_data.size())
  925. return Error::from_string_literal("Attempted to seek past the end of the stream");
  926. m_position = position;
  927. return {};
  928. }
  929. }