Reader.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. * Copyright (c) 2022, Gregory Bertilson <Zaggy1024@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/Function.h>
  9. #include <AK/Optional.h>
  10. #include <AK/Time.h>
  11. #include <AK/Utf8View.h>
  12. #include <LibCore/MappedFile.h>
  13. #include "Reader.h"
  14. namespace Video::Matroska {
  15. #define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
  16. // Elements IDs and types are listed at this URL:
  17. // https://www.matroska.org/technical/elements.html
  18. constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
  19. constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
  20. constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
  21. constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
  22. constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74;
  23. constexpr u32 SEEK_ELEMENT_ID = 0x4DBB;
  24. constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB;
  25. constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC;
  26. constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
  27. constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
  28. constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
  29. constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
  30. constexpr u32 MUXING_APP_ID = 0x4D80;
  31. constexpr u32 WRITING_APP_ID = 0x5741;
  32. constexpr u32 DURATION_ID = 0x4489;
  33. // Tracks
  34. constexpr u32 TRACK_ENTRY_ID = 0xAE;
  35. constexpr u32 TRACK_NUMBER_ID = 0xD7;
  36. constexpr u32 TRACK_UID_ID = 0x73C5;
  37. constexpr u32 TRACK_TYPE_ID = 0x83;
  38. constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
  39. constexpr u32 TRACK_CODEC_ID = 0x86;
  40. constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
  41. constexpr u32 TRACK_OFFSET_ID = 0x537F;
  42. constexpr u32 TRACK_VIDEO_ID = 0xE0;
  43. constexpr u32 TRACK_AUDIO_ID = 0xE1;
  44. // Video
  45. constexpr u32 PIXEL_WIDTH_ID = 0xB0;
  46. constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
  47. constexpr u32 COLOR_ENTRY_ID = 0x55B0;
  48. constexpr u32 PRIMARIES_ID = 0x55BB;
  49. constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA;
  50. constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1;
  51. constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2;
  52. // Audio
  53. constexpr u32 CHANNELS_ID = 0x9F;
  54. constexpr u32 BIT_DEPTH_ID = 0x6264;
  55. // Clusters
  56. constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
  57. constexpr u32 TIMESTAMP_ID = 0xE7;
  58. // Cues
  59. constexpr u32 CUES_ID = 0x1C53BB6B;
  60. constexpr u32 CUE_POINT_ID = 0xBB;
  61. constexpr u32 CUE_TIME_ID = 0xB3;
  62. constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7;
  63. constexpr u32 CUE_TRACK_ID = 0xF7;
  64. constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1;
  65. constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0;
  66. constexpr u32 CUE_CODEC_STATE_ID = 0xEA;
  67. constexpr u32 CUE_REFERENCE_ID = 0xDB;
  68. DecoderErrorOr<Reader> Reader::from_file(StringView path)
  69. {
  70. auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path));
  71. auto reader = TRY(from_data(mapped_file->bytes()));
  72. reader.m_mapped_file = mapped_file;
  73. return reader;
  74. }
  75. DecoderErrorOr<Reader> Reader::from_data(ReadonlyBytes data)
  76. {
  77. Reader reader(data);
  78. TRY(reader.parse_initial_data());
  79. return reader;
  80. }
  81. static DecoderErrorOr<void> parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function<DecoderErrorOr<IterationDecision>(u64, size_t)> element_consumer)
  82. {
  83. auto element_data_size = TRY_READ(streamer.read_variable_size_integer());
  84. dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size);
  85. streamer.push_octets_read();
  86. while (streamer.octets_read() < element_data_size) {
  87. dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
  88. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  89. auto element_position = streamer.position();
  90. dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id);
  91. auto result = element_consumer(element_id, element_position);
  92. if (result.is_error())
  93. return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description());
  94. if (result.release_value() == IterationDecision::Break)
  95. break;
  96. dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name);
  97. }
  98. streamer.pop_octets_read();
  99. return {};
  100. }
  101. static DecoderErrorOr<EBMLHeader> parse_ebml_header(Streamer& streamer)
  102. {
  103. EBMLHeader header;
  104. TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  105. switch (element_id) {
  106. case DOCTYPE_ELEMENT_ID:
  107. header.doc_type = TRY_READ(streamer.read_string());
  108. dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", header.doc_type);
  109. break;
  110. case DOCTYPE_VERSION_ELEMENT_ID:
  111. header.doc_type_version = TRY_READ(streamer.read_u64());
  112. dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", header.doc_type_version);
  113. break;
  114. default:
  115. TRY_READ(streamer.read_unknown_element());
  116. }
  117. return IterationDecision::Continue;
  118. }));
  119. return header;
  120. }
  121. DecoderErrorOr<void> Reader::parse_initial_data()
  122. {
  123. Streamer streamer { m_data };
  124. auto first_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  125. dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id);
  126. if (first_element_id != EBML_MASTER_ELEMENT_ID)
  127. return DecoderError::corrupted("First element was not an EBML header"sv);
  128. m_header = TRY(parse_ebml_header(streamer));
  129. dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
  130. auto root_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  131. if (root_element_id != SEGMENT_ELEMENT_ID)
  132. return DecoderError::corrupted("Second element was not a segment element"sv);
  133. m_segment_contents_size = TRY_READ(streamer.read_variable_size_integer());
  134. m_segment_contents_position = streamer.position();
  135. dbgln_if(true, "Segment is at {} with size {}, available size is {}", m_segment_contents_position, m_segment_contents_size, m_data.size() - m_segment_contents_position);
  136. m_segment_contents_size = min(m_segment_contents_size, m_data.size() - m_segment_contents_position);
  137. return {};
  138. }
  139. static DecoderErrorOr<void> parse_seek_head(Streamer& streamer, size_t base_position, HashMap<u32, size_t>& table)
  140. {
  141. return parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  142. if (seek_head_child_id == SEEK_ELEMENT_ID) {
  143. Optional<u64> seek_id;
  144. Optional<u64> seek_position;
  145. TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id, size_t) -> DecoderErrorOr<IterationDecision> {
  146. switch (seek_entry_child_id) {
  147. case SEEK_ID_ELEMENT_ID:
  148. seek_id = TRY_READ(streamer.read_u64());
  149. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Element ID value {:#010x}", seek_id.value());
  150. break;
  151. case SEEK_POSITION_ELEMENT_ID:
  152. seek_position = TRY_READ(streamer.read_u64());
  153. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Position value {}", seek_position.value());
  154. break;
  155. default:
  156. TRY_READ(streamer.read_unknown_element());
  157. }
  158. return IterationDecision::Continue;
  159. }));
  160. if (!seek_id.has_value())
  161. return DecoderError::corrupted("Seek entry is missing the element ID"sv);
  162. if (!seek_position.has_value())
  163. return DecoderError::corrupted("Seek entry is missing the seeking position"sv);
  164. if (seek_id.value() > NumericLimits<u32>::max())
  165. return DecoderError::corrupted("Seek entry's element ID is too large"sv);
  166. dbgln_if(MATROSKA_TRACE_DEBUG, "Seek entry found with ID {:#010x} and position {} offset from SeekHead at {}", seek_id.value(), seek_position.value(), base_position);
  167. // FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
  168. if (table.contains(seek_id.value())) {
  169. dbgln_if(MATROSKA_DEBUG, "Warning: Duplicate seek entry with ID {:#010x} at position {}", seek_id.value(), seek_position.value());
  170. return IterationDecision::Continue;
  171. }
  172. DECODER_TRY_ALLOC(table.try_set(seek_id.release_value(), base_position + seek_position.release_value()));
  173. } else {
  174. dbgln_if(MATROSKA_TRACE_DEBUG, "Unknown SeekHead child element ID {:#010x}", seek_head_child_id);
  175. }
  176. return IterationDecision::Continue;
  177. });
  178. }
  179. DecoderErrorOr<Optional<size_t>> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id)
  180. {
  181. dbgln_if(MATROSKA_DEBUG, "====== Finding element {} with ID {:#010x} ======", element_name, element_id);
  182. if (m_seek_entries.contains(element_id)) {
  183. dbgln_if(MATROSKA_TRACE_DEBUG, "Cache hit!");
  184. return m_seek_entries.get(element_id).release_value();
  185. }
  186. Streamer streamer { m_data };
  187. if (m_last_top_level_element_position != 0)
  188. TRY_READ(streamer.seek_to_position(m_last_top_level_element_position));
  189. else
  190. TRY_READ(streamer.seek_to_position(m_segment_contents_position));
  191. Optional<size_t> position;
  192. while (streamer.position() < m_segment_contents_position + m_segment_contents_size) {
  193. auto found_element_id = TRY_READ(streamer.read_variable_size_integer(false));
  194. auto found_element_position = streamer.position();
  195. dbgln_if(MATROSKA_TRACE_DEBUG, "Found element ID {:#010x} with position {}.", found_element_id, found_element_position);
  196. if (found_element_id == SEEK_HEAD_ELEMENT_ID) {
  197. dbgln_if(MATROSKA_TRACE_DEBUG, "Found SeekHead, parsing it into the lookup table.");
  198. m_seek_entries.clear();
  199. TRY(parse_seek_head(streamer, found_element_position, m_seek_entries));
  200. m_last_top_level_element_position = 0;
  201. if (m_seek_entries.contains(element_id)) {
  202. dbgln_if(MATROSKA_TRACE_DEBUG, "SeekHead hit!");
  203. position = m_seek_entries.get(element_id).release_value();
  204. break;
  205. }
  206. continue;
  207. }
  208. auto result = streamer.read_unknown_element();
  209. if (result.is_error())
  210. return DecoderError::format(DecoderErrorCategory::Corrupted, "While seeking to {}: {}", element_name, result.release_error().string_literal());
  211. m_last_top_level_element_position = streamer.position();
  212. DECODER_TRY_ALLOC(m_seek_entries.try_set(found_element_id, found_element_position));
  213. if (found_element_id == element_id) {
  214. position = found_element_position;
  215. break;
  216. }
  217. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipped to position {}.", m_last_top_level_element_position);
  218. }
  219. return position;
  220. }
  221. static DecoderErrorOr<SegmentInformation> parse_information(Streamer& streamer)
  222. {
  223. SegmentInformation segment_information;
  224. TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  225. switch (element_id) {
  226. case TIMESTAMP_SCALE_ID:
  227. segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64()));
  228. dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", segment_information.timestamp_scale());
  229. break;
  230. case MUXING_APP_ID:
  231. segment_information.set_muxing_app(TRY_READ(streamer.read_string()));
  232. dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", segment_information.muxing_app().as_string());
  233. break;
  234. case WRITING_APP_ID:
  235. segment_information.set_writing_app(TRY_READ(streamer.read_string()));
  236. dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", segment_information.writing_app().as_string());
  237. break;
  238. case DURATION_ID:
  239. segment_information.set_duration_unscaled(TRY_READ(streamer.read_float()));
  240. dbgln_if(MATROSKA_DEBUG, "Read Duration attribute: {}", segment_information.duration_unscaled().value());
  241. break;
  242. default:
  243. TRY_READ(streamer.read_unknown_element());
  244. }
  245. return IterationDecision::Continue;
  246. }));
  247. return segment_information;
  248. }
  249. DecoderErrorOr<SegmentInformation> Reader::segment_information()
  250. {
  251. if (m_segment_information.has_value())
  252. return m_segment_information.value();
  253. auto position = TRY(find_first_top_level_element_with_id("Segment Information"sv, SEGMENT_INFORMATION_ELEMENT_ID));
  254. if (!position.has_value())
  255. return DecoderError::corrupted("No Segment Information element found"sv);
  256. Streamer streamer { m_data };
  257. TRY_READ(streamer.seek_to_position(position.release_value()));
  258. m_segment_information = TRY(parse_information(streamer));
  259. return m_segment_information.value();
  260. }
  261. DecoderErrorOr<void> Reader::ensure_tracks_are_parsed()
  262. {
  263. if (!m_tracks.is_empty())
  264. return {};
  265. auto position = TRY(find_first_top_level_element_with_id("Tracks"sv, TRACK_ELEMENT_ID));
  266. if (!position.has_value())
  267. return DecoderError::corrupted("No Tracks element found"sv);
  268. Streamer streamer { m_data };
  269. TRY_READ(streamer.seek_to_position(position.release_value()));
  270. TRY(parse_tracks(streamer));
  271. return {};
  272. }
  273. static DecoderErrorOr<TrackEntry::ColorFormat> parse_video_color_information(Streamer& streamer)
  274. {
  275. TrackEntry::ColorFormat color_format {};
  276. TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  277. switch (element_id) {
  278. case PRIMARIES_ID:
  279. color_format.color_primaries = static_cast<ColorPrimaries>(TRY_READ(streamer.read_u64()));
  280. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Primaries attribute: {}", color_primaries_to_string(color_format.color_primaries));
  281. break;
  282. case TRANSFER_CHARACTERISTICS_ID:
  283. color_format.transfer_characteristics = static_cast<TransferCharacteristics>(TRY_READ(streamer.read_u64()));
  284. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's TransferCharacteristics attribute: {}", transfer_characteristics_to_string(color_format.transfer_characteristics));
  285. break;
  286. case MATRIX_COEFFICIENTS_ID:
  287. color_format.matrix_coefficients = static_cast<MatrixCoefficients>(TRY_READ(streamer.read_u64()));
  288. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's MatrixCoefficients attribute: {}", matrix_coefficients_to_string(color_format.matrix_coefficients));
  289. break;
  290. case BITS_PER_CHANNEL_ID:
  291. color_format.bits_per_channel = TRY_READ(streamer.read_u64());
  292. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's BitsPerChannel attribute: {}", color_format.bits_per_channel);
  293. break;
  294. default:
  295. TRY_READ(streamer.read_unknown_element());
  296. }
  297. return IterationDecision::Continue;
  298. }));
  299. return color_format;
  300. }
  301. static DecoderErrorOr<TrackEntry::VideoTrack> parse_video_track_information(Streamer& streamer)
  302. {
  303. TrackEntry::VideoTrack video_track {};
  304. TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  305. switch (element_id) {
  306. case PIXEL_WIDTH_ID:
  307. video_track.pixel_width = TRY_READ(streamer.read_u64());
  308. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", video_track.pixel_width);
  309. break;
  310. case PIXEL_HEIGHT_ID:
  311. video_track.pixel_height = TRY_READ(streamer.read_u64());
  312. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", video_track.pixel_height);
  313. break;
  314. case COLOR_ENTRY_ID:
  315. video_track.color_format = TRY(parse_video_color_information(streamer));
  316. break;
  317. default:
  318. TRY_READ(streamer.read_unknown_element());
  319. }
  320. return IterationDecision::Continue;
  321. }));
  322. return video_track;
  323. }
  324. static DecoderErrorOr<TrackEntry::AudioTrack> parse_audio_track_information(Streamer& streamer)
  325. {
  326. TrackEntry::AudioTrack audio_track {};
  327. TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  328. switch (element_id) {
  329. case CHANNELS_ID:
  330. audio_track.channels = TRY_READ(streamer.read_u64());
  331. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", audio_track.channels);
  332. break;
  333. case BIT_DEPTH_ID:
  334. audio_track.bit_depth = TRY_READ(streamer.read_u64());
  335. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", audio_track.bit_depth);
  336. break;
  337. default:
  338. TRY_READ(streamer.read_unknown_element());
  339. }
  340. return IterationDecision::Continue;
  341. }));
  342. return audio_track;
  343. }
  344. static DecoderErrorOr<TrackEntry> parse_track_entry(Streamer& streamer)
  345. {
  346. TrackEntry track_entry;
  347. TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  348. switch (element_id) {
  349. case TRACK_NUMBER_ID:
  350. track_entry.set_track_number(TRY_READ(streamer.read_u64()));
  351. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_entry.track_number());
  352. break;
  353. case TRACK_UID_ID:
  354. track_entry.set_track_uid(TRY_READ(streamer.read_u64()));
  355. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_entry.track_uid());
  356. break;
  357. case TRACK_TYPE_ID:
  358. track_entry.set_track_type(static_cast<TrackEntry::TrackType>(TRY_READ(streamer.read_u64())));
  359. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", to_underlying(track_entry.track_type()));
  360. break;
  361. case TRACK_LANGUAGE_ID:
  362. track_entry.set_language(TRY_READ(streamer.read_string()));
  363. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", track_entry.language());
  364. break;
  365. case TRACK_CODEC_ID:
  366. track_entry.set_codec_id(TRY_READ(streamer.read_string()));
  367. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry.codec_id());
  368. break;
  369. case TRACK_TIMESTAMP_SCALE_ID:
  370. track_entry.set_timestamp_scale(TRY_READ(streamer.read_float()));
  371. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry.timestamp_scale());
  372. break;
  373. case TRACK_OFFSET_ID:
  374. track_entry.set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
  375. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry.timestamp_offset());
  376. break;
  377. case TRACK_VIDEO_ID:
  378. track_entry.set_video_track(TRY(parse_video_track_information(streamer)));
  379. break;
  380. case TRACK_AUDIO_ID:
  381. track_entry.set_audio_track(TRY(parse_audio_track_information(streamer)));
  382. break;
  383. default:
  384. TRY_READ(streamer.read_unknown_element());
  385. }
  386. return IterationDecision::Continue;
  387. }));
  388. return track_entry;
  389. }
  390. DecoderErrorOr<void> Reader::parse_tracks(Streamer& streamer)
  391. {
  392. return parse_master_element(streamer, "Tracks"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  393. if (element_id == TRACK_ENTRY_ID) {
  394. auto track_entry = TRY(parse_track_entry(streamer));
  395. dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry.track_number());
  396. DECODER_TRY_ALLOC(m_tracks.try_set(track_entry.track_number(), track_entry));
  397. } else {
  398. TRY_READ(streamer.read_unknown_element());
  399. }
  400. return IterationDecision::Continue;
  401. });
  402. }
  403. DecoderErrorOr<void> Reader::for_each_track(TrackEntryCallback callback)
  404. {
  405. TRY(ensure_tracks_are_parsed());
  406. for (auto const& track_entry : m_tracks) {
  407. auto decision = TRY(callback(track_entry.value));
  408. if (decision == IterationDecision::Break)
  409. break;
  410. }
  411. return {};
  412. }
  413. DecoderErrorOr<void> Reader::for_each_track_of_type(TrackEntry::TrackType type, TrackEntryCallback callback)
  414. {
  415. return for_each_track([&](TrackEntry const& track_entry) -> DecoderErrorOr<IterationDecision> {
  416. if (track_entry.track_type() != type)
  417. return IterationDecision::Continue;
  418. return callback(track_entry);
  419. });
  420. }
  421. DecoderErrorOr<TrackEntry> Reader::track_for_track_number(u64 track_number)
  422. {
  423. TRY(ensure_tracks_are_parsed());
  424. auto optional_track_entry = m_tracks.get(track_number);
  425. if (!optional_track_entry.has_value())
  426. return DecoderError::format(DecoderErrorCategory::Invalid, "No track found with number {}", track_number);
  427. return optional_track_entry.release_value();
  428. }
  429. DecoderErrorOr<size_t> Reader::track_count()
  430. {
  431. TRY(ensure_tracks_are_parsed());
  432. return m_tracks.size();
  433. }
  434. constexpr size_t get_element_id_size(u32 element_id)
  435. {
  436. return sizeof(element_id) - (count_leading_zeroes(element_id) / 8);
  437. }
  438. static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_scale)
  439. {
  440. Optional<u64> timestamp;
  441. size_t first_element_position = 0;
  442. TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id, size_t position) -> DecoderErrorOr<IterationDecision> {
  443. if (first_element_position == 0)
  444. first_element_position = position - get_element_id_size(element_id);
  445. switch (element_id) {
  446. case TIMESTAMP_ID:
  447. timestamp = TRY_READ(streamer.read_u64());
  448. return IterationDecision::Break;
  449. default:
  450. TRY_READ(streamer.read_unknown_element());
  451. }
  452. return IterationDecision::Continue;
  453. }));
  454. if (!timestamp.has_value())
  455. return DecoderError::corrupted("Cluster was missing a timestamp"sv);
  456. if (first_element_position == 0)
  457. return DecoderError::corrupted("Cluster had no children"sv);
  458. dbgln_if(MATROSKA_TRACE_DEBUG, "Seeking back to position {}", first_element_position);
  459. TRY_READ(streamer.seek_to_position(first_element_position));
  460. Cluster cluster;
  461. cluster.set_timestamp(Time::from_nanoseconds(timestamp.release_value() * timestamp_scale));
  462. return cluster;
  463. }
  464. static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster_timestamp, u64 segment_timestamp_scale, TrackEntry track)
  465. {
  466. Block block;
  467. auto content_size = TRY_READ(streamer.read_variable_size_integer());
  468. auto position_before_track_number = streamer.position();
  469. block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
  470. // https://www.matroska.org/technical/notes.html
  471. // Block Timestamps:
  472. // The Block Element and SimpleBlock Element store their timestamps as signed integers,
  473. // relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
  474. // timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
  475. // `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
  476. //
  477. // When a CodecDelay Element is set, its value MUST be subtracted from each Block timestamp
  478. // of that track. To get the timestamp in nanoseconds of the first frame in a Block or
  479. // SimpleBlock, the formula becomes:
  480. // `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
  481. Time timestamp_offset = Time::from_nanoseconds(static_cast<i64>(static_cast<double>(TRY_READ(streamer.read_i16()) * segment_timestamp_scale) * track.timestamp_scale()));
  482. timestamp_offset -= Time::from_nanoseconds(static_cast<i64>(track.codec_delay()));
  483. // This is only mentioned in the elements specification under TrackOffset.
  484. // https://www.matroska.org/technical/elements.html
  485. timestamp_offset += Time::from_nanoseconds(static_cast<i64>(track.timestamp_offset()));
  486. block.set_timestamp(cluster_timestamp + timestamp_offset);
  487. auto flags = TRY_READ(streamer.read_octet());
  488. block.set_only_keyframes((flags & (1u << 7u)) != 0);
  489. block.set_invisible((flags & (1u << 3u)) != 0);
  490. block.set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
  491. block.set_discardable((flags & 1u) != 0);
  492. auto total_frame_content_size = content_size - (streamer.position() - position_before_track_number);
  493. Vector<ReadonlyBytes> frames;
  494. if (block.lacing() == Block::Lacing::EBML) {
  495. auto octets_read_before_frame_sizes = streamer.octets_read();
  496. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  497. Vector<u64> frame_sizes;
  498. frame_sizes.ensure_capacity(frame_count);
  499. u64 frame_size_sum = 0;
  500. u64 previous_frame_size;
  501. auto first_frame_size = TRY_READ(streamer.read_variable_size_integer());
  502. frame_sizes.append(first_frame_size);
  503. frame_size_sum += first_frame_size;
  504. previous_frame_size = first_frame_size;
  505. for (int i = 0; i < frame_count - 2; i++) {
  506. auto frame_size_difference = TRY_READ(streamer.read_variable_size_signed_integer());
  507. u64 frame_size;
  508. // FIXME: x - (-y) == x + y?
  509. if (frame_size_difference < 0)
  510. frame_size = previous_frame_size - (-frame_size_difference);
  511. else
  512. frame_size = previous_frame_size + frame_size_difference;
  513. frame_sizes.append(frame_size);
  514. frame_size_sum += frame_size;
  515. previous_frame_size = frame_size;
  516. }
  517. frame_sizes.append(total_frame_content_size - frame_size_sum - (streamer.octets_read() - octets_read_before_frame_sizes));
  518. for (int i = 0; i < frame_count; i++) {
  519. // FIXME: ReadonlyBytes instead of copying the frame data?
  520. auto current_frame_size = frame_sizes.at(i);
  521. frames.append(TRY_READ(streamer.read_raw_octets(current_frame_size)));
  522. }
  523. } else if (block.lacing() == Block::Lacing::FixedSize) {
  524. auto frame_count = TRY_READ(streamer.read_octet()) + 1;
  525. auto individual_frame_size = total_frame_content_size / frame_count;
  526. for (int i = 0; i < frame_count; i++)
  527. frames.append(TRY_READ(streamer.read_raw_octets(individual_frame_size)));
  528. } else {
  529. frames.append(TRY_READ(streamer.read_raw_octets(total_frame_content_size)));
  530. }
  531. block.set_frames(move(frames));
  532. return block;
  533. }
  534. DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
  535. {
  536. auto optional_position = TRY(find_first_top_level_element_with_id("Cluster"sv, CLUSTER_ELEMENT_ID));
  537. if (!optional_position.has_value())
  538. return DecoderError::corrupted("No clusters are present in the segment"sv);
  539. ReadonlyBytes segment_view = m_data.slice(m_segment_contents_position, m_segment_contents_size);
  540. // We need to have the element ID included so that the iterator knows where it is.
  541. auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
  542. dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
  543. return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
  544. }
  545. static DecoderErrorOr<CueTrackPosition> parse_cue_track_position(Streamer& streamer)
  546. {
  547. CueTrackPosition track_position;
  548. bool had_cluster_position = false;
  549. TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  550. switch (element_id) {
  551. case CUE_TRACK_ID:
  552. track_position.set_track_number(TRY_READ(streamer.read_u64()));
  553. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions track number {}", track_position.track_number());
  554. break;
  555. case CUE_CLUSTER_POSITION_ID:
  556. track_position.set_cluster_position(TRY_READ(streamer.read_u64()));
  557. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions cluster position {}", track_position.cluster_position());
  558. had_cluster_position = true;
  559. break;
  560. case CUE_RELATIVE_POSITION_ID:
  561. track_position.set_block_offset(TRY_READ(streamer.read_u64()));
  562. dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions relative position {}", track_position.block_offset());
  563. break;
  564. case CUE_CODEC_STATE_ID:
  565. // Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state.
  566. // FIXME: Do something with this value.
  567. dbgln_if(MATROSKA_DEBUG, "Found CodecState, skipping");
  568. TRY_READ(streamer.read_unknown_element());
  569. break;
  570. case CUE_REFERENCE_ID:
  571. return DecoderError::not_implemented();
  572. default:
  573. TRY_READ(streamer.read_unknown_element());
  574. break;
  575. }
  576. return IterationDecision::Continue;
  577. }));
  578. if (track_position.track_number() == 0)
  579. return DecoderError::corrupted("Track number was not present or 0"sv);
  580. if (!had_cluster_position)
  581. return DecoderError::corrupted("Cluster was missing the cluster position"sv);
  582. return track_position;
  583. }
  584. static DecoderErrorOr<CuePoint> parse_cue_point(Streamer& streamer, u64 timestamp_scale)
  585. {
  586. CuePoint cue_point;
  587. TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  588. switch (element_id) {
  589. case CUE_TIME_ID: {
  590. // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element:
  591. // > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks.
  592. // Matroska Ticks are specified in https://www.matroska.org/technical/notes.html:
  593. // > For such elements, the timestamp value is stored directly in nanoseconds.
  594. // However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees:
  595. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941
  596. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416
  597. // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392
  598. // Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^(
  599. auto timestamp = Time::from_nanoseconds(static_cast<i64>(TRY_READ(streamer.read_u64()) * timestamp_scale));
  600. cue_point.set_timestamp(timestamp);
  601. dbgln_if(MATROSKA_DEBUG, "Read CuePoint timestamp {}ms", cue_point.timestamp().to_milliseconds());
  602. break;
  603. }
  604. case CUE_TRACK_POSITIONS_ID: {
  605. auto track_position = TRY_READ(parse_cue_track_position(streamer));
  606. DECODER_TRY_ALLOC(cue_point.track_positions().try_set(track_position.track_number(), track_position));
  607. break;
  608. }
  609. default:
  610. TRY_READ(streamer.read_unknown_element());
  611. break;
  612. }
  613. return IterationDecision::Continue;
  614. }));
  615. if (cue_point.timestamp().is_negative())
  616. return DecoderError::corrupted("CuePoint was missing a timestamp"sv);
  617. if (cue_point.track_positions().is_empty())
  618. return DecoderError::corrupted("CuePoint was missing track positions"sv);
  619. return cue_point;
  620. }
  621. DecoderErrorOr<void> Reader::parse_cues(Streamer& streamer)
  622. {
  623. m_cues.clear();
  624. TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id, size_t) -> DecoderErrorOr<IterationDecision> {
  625. switch (element_id) {
  626. case CUE_POINT_ID: {
  627. auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale()));
  628. // FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now,
  629. // but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead.
  630. for (auto track_position_entry : cue_point.track_positions()) {
  631. if (!m_cues.contains(track_position_entry.key))
  632. DECODER_TRY_ALLOC(m_cues.try_set(track_position_entry.key, Vector<CuePoint>()));
  633. Vector<CuePoint>& cue_points_for_track = m_cues.get(track_position_entry.key).release_value();
  634. cue_points_for_track.append(cue_point);
  635. }
  636. break;
  637. }
  638. default:
  639. return DecoderError::format(DecoderErrorCategory::Corrupted, "Unknown Cues child ID {:#010x}", element_id);
  640. }
  641. return IterationDecision::Continue;
  642. }));
  643. return {};
  644. }
  645. DecoderErrorOr<void> Reader::ensure_cues_are_parsed()
  646. {
  647. if (m_cues_have_been_parsed)
  648. return {};
  649. auto position = TRY(find_first_top_level_element_with_id("Cues"sv, CUES_ID));
  650. if (!position.has_value())
  651. return DecoderError::corrupted("No Tracks element found"sv);
  652. Streamer streamer { m_data };
  653. TRY_READ(streamer.seek_to_position(position.release_value()));
  654. TRY(parse_cues(streamer));
  655. m_cues_have_been_parsed = true;
  656. return {};
  657. }
  658. DecoderErrorOr<void> Reader::seek_to_cue_for_timestamp(SampleIterator& iterator, Time const& timestamp)
  659. {
  660. auto const& cue_points = MUST(cue_points_for_track(iterator.m_track.track_number())).release_value();
  661. // Take a guess at where in the cues the timestamp will be and correct from there.
  662. auto duration = TRY(segment_information()).duration();
  663. size_t index = 0;
  664. if (duration.has_value())
  665. index = clamp(((timestamp.to_nanoseconds() * cue_points.size()) / TRY(segment_information()).duration()->to_nanoseconds()), 0, cue_points.size() - 1);
  666. CuePoint const* prev_cue_point = &cue_points[index];
  667. dbgln_if(MATROSKA_DEBUG, "Finding Matroska cue points for timestamp {}ms starting from cue at {}ms", timestamp.to_milliseconds(), prev_cue_point->timestamp().to_milliseconds());
  668. if (prev_cue_point->timestamp() == timestamp) {
  669. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  670. return {};
  671. }
  672. if (prev_cue_point->timestamp() > timestamp) {
  673. while (index > 0 && prev_cue_point->timestamp() > timestamp) {
  674. prev_cue_point = &cue_points[--index];
  675. dbgln_if(MATROSKA_DEBUG, "Checking previous cue point {}ms", prev_cue_point->timestamp().to_milliseconds());
  676. }
  677. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  678. return {};
  679. }
  680. while (++index < cue_points.size()) {
  681. auto const& cue_point = cue_points[index];
  682. dbgln_if(MATROSKA_DEBUG, "Checking future cue point {}ms", cue_point.timestamp().to_milliseconds());
  683. if (cue_point.timestamp() > timestamp)
  684. break;
  685. prev_cue_point = &cue_point;
  686. }
  687. TRY(iterator.seek_to_cue_point(*prev_cue_point));
  688. return {};
  689. }
  690. static DecoderErrorOr<void> search_clusters_for_keyframe_before_timestamp(SampleIterator& iterator, Time const& timestamp)
  691. {
  692. #if MATROSKA_DEBUG
  693. size_t inter_frames_count;
  694. #endif
  695. Optional<SampleIterator> last_keyframe;
  696. while (true) {
  697. SampleIterator rewind_iterator = iterator;
  698. auto block = TRY(iterator.next_block());
  699. if (block.only_keyframes()) {
  700. last_keyframe.emplace(rewind_iterator);
  701. #if MATROSKA_DEBUG
  702. inter_frames_count = 0;
  703. #endif
  704. }
  705. if (block.timestamp() > timestamp)
  706. break;
  707. #if MATROSKA_DEBUG
  708. inter_frames_count++;
  709. #endif
  710. }
  711. if (last_keyframe.has_value()) {
  712. #if MATROSKA_DEBUG
  713. dbgln("Seeked to a keyframe with {} inter frames to skip", inter_frames_count);
  714. #endif
  715. iterator = last_keyframe.release_value();
  716. }
  717. return {};
  718. }
  719. DecoderErrorOr<bool> Reader::has_cues_for_track(u64 track_number)
  720. {
  721. TRY(ensure_cues_are_parsed());
  722. return m_cues.contains(track_number);
  723. }
  724. DecoderErrorOr<SampleIterator> Reader::seek_to_random_access_point(SampleIterator iterator, Time timestamp)
  725. {
  726. if (TRY(has_cues_for_track(iterator.m_track.track_number()))) {
  727. TRY(seek_to_cue_for_timestamp(iterator, timestamp));
  728. VERIFY(iterator.last_timestamp().has_value());
  729. return iterator;
  730. }
  731. if (!iterator.last_timestamp().has_value() || timestamp < iterator.last_timestamp().value()) {
  732. // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
  733. iterator = TRY(create_sample_iterator(iterator.m_track.track_number()));
  734. TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
  735. return iterator;
  736. }
  737. TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
  738. return iterator;
  739. }
  740. DecoderErrorOr<Optional<Vector<CuePoint> const&>> Reader::cue_points_for_track(u64 track_number)
  741. {
  742. TRY(ensure_cues_are_parsed());
  743. return m_cues.get(track_number);
  744. }
  745. DecoderErrorOr<Block> SampleIterator::next_block()
  746. {
  747. if (m_position >= m_data.size())
  748. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "Still at end of stream :^)"sv);
  749. Streamer streamer { m_data };
  750. TRY_READ(streamer.seek_to_position(m_position));
  751. Optional<Block> block;
  752. while (streamer.has_octet()) {
  753. #if MATROSKA_TRACE_DEBUG
  754. auto element_position = streamer.position();
  755. #endif
  756. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  757. #if MATROSKA_TRACE_DEBUG
  758. dbgln("Iterator found element with ID {:#010x} at offset {} within the segment.", element_id, element_position);
  759. #endif
  760. if (element_id == CLUSTER_ELEMENT_ID) {
  761. dbgln_if(MATROSKA_DEBUG, " Iterator is parsing new cluster.");
  762. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  763. } else if (element_id == SIMPLE_BLOCK_ID) {
  764. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is parsing new block.");
  765. auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
  766. if (candidate_block.track_number() == m_track.track_number())
  767. block = move(candidate_block);
  768. } else {
  769. dbgln_if(MATROSKA_TRACE_DEBUG, " Iterator is skipping unknown element with ID {:#010x}.", element_id);
  770. TRY_READ(streamer.read_unknown_element());
  771. }
  772. m_position = streamer.position();
  773. if (block.has_value()) {
  774. m_last_timestamp = block->timestamp();
  775. return block.release_value();
  776. }
  777. }
  778. m_current_cluster.clear();
  779. return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv);
  780. }
  781. DecoderErrorOr<void> SampleIterator::seek_to_cue_point(CuePoint const& cue_point)
  782. {
  783. // This is a private function. The position getter can return optional, but the caller should already know that this track has a position.
  784. auto const& cue_position = cue_point.position_for_track(m_track.track_number()).release_value();
  785. Streamer streamer { m_data };
  786. TRY_READ(streamer.seek_to_position(cue_position.cluster_position()));
  787. auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
  788. if (element_id != CLUSTER_ELEMENT_ID)
  789. return DecoderError::corrupted("Cue point's cluster position didn't point to a cluster"sv);
  790. m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
  791. dbgln_if(MATROSKA_DEBUG, "SampleIterator set to cue point at timestamp {}ms", m_current_cluster->timestamp().to_milliseconds());
  792. m_position = streamer.position() + cue_position.block_offset();
  793. m_last_timestamp = cue_point.timestamp();
  794. return {};
  795. }
  796. ErrorOr<DeprecatedString> Streamer::read_string()
  797. {
  798. auto string_length = TRY(read_variable_size_integer());
  799. if (remaining() < string_length)
  800. return Error::from_string_literal("String length extends past the end of the stream");
  801. auto string_value = DeprecatedString(data_as_chars(), string_length);
  802. TRY(read_raw_octets(string_length));
  803. return string_value;
  804. }
  805. ErrorOr<u8> Streamer::read_octet()
  806. {
  807. if (!has_octet()) {
  808. dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
  809. return Error::from_string_literal("Stream is out of data");
  810. }
  811. u8 byte = *data();
  812. m_octets_read.last()++;
  813. m_position++;
  814. return byte;
  815. }
  816. ErrorOr<i16> Streamer::read_i16()
  817. {
  818. return (TRY(read_octet()) << 8) | TRY(read_octet());
  819. }
  820. ErrorOr<u64> Streamer::read_variable_size_integer(bool mask_length)
  821. {
  822. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", data());
  823. auto length_descriptor = TRY(read_octet());
  824. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
  825. if (length_descriptor == 0)
  826. return Error::from_string_literal("read_variable_size_integer: Length descriptor has no terminating set bit");
  827. size_t length = 0;
  828. while (length < 8) {
  829. if (((length_descriptor >> (8 - length)) & 1) == 1)
  830. break;
  831. length++;
  832. }
  833. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
  834. if (length > 8)
  835. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  836. u64 result;
  837. if (mask_length)
  838. result = length_descriptor & ~(1u << (8 - length));
  839. else
  840. result = length_descriptor;
  841. dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
  842. for (size_t i = 1; i < length; i++) {
  843. u8 next_octet = TRY(read_octet());
  844. dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
  845. result = (result << 8u) | next_octet;
  846. dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
  847. }
  848. return result;
  849. }
  850. ErrorOr<i64> Streamer::read_variable_size_signed_integer()
  851. {
  852. auto length_descriptor = TRY(read_octet());
  853. if (length_descriptor == 0)
  854. return Error::from_string_literal("read_variable_sized_signed_integer: Length descriptor has no terminating set bit");
  855. i64 length = 0;
  856. while (length < 8) {
  857. if (((length_descriptor >> (8 - length)) & 1) == 1)
  858. break;
  859. length++;
  860. }
  861. if (length > 8)
  862. return Error::from_string_literal("read_variable_size_integer: Length is too large");
  863. i64 result = length_descriptor & ~(1u << (8 - length));
  864. for (i64 i = 1; i < length; i++) {
  865. u8 next_octet = TRY(read_octet());
  866. result = (result << 8u) | next_octet;
  867. }
  868. result -= AK::exp2<i64>(length * 7 - 1) - 1;
  869. return result;
  870. }
  871. ErrorOr<ReadonlyBytes> Streamer::read_raw_octets(size_t num_octets)
  872. {
  873. if (remaining() < num_octets)
  874. return Error::from_string_literal("Tried to drop octets past the end of the stream");
  875. ReadonlyBytes result = { data(), num_octets };
  876. m_position += num_octets;
  877. m_octets_read.last() += num_octets;
  878. return result;
  879. }
  880. ErrorOr<u64> Streamer::read_u64()
  881. {
  882. auto integer_length = TRY(read_variable_size_integer());
  883. u64 result = 0;
  884. for (size_t i = 0; i < integer_length; i++) {
  885. result = (result << 8u) + TRY(read_octet());
  886. }
  887. return result;
  888. }
  889. ErrorOr<double> Streamer::read_float()
  890. {
  891. auto length = TRY(read_variable_size_integer());
  892. if (length != 4u && length != 8u)
  893. return Error::from_string_literal("Float size must be 4 or 8 bytes");
  894. union {
  895. u64 value;
  896. float float_value;
  897. double double_value;
  898. } read_data;
  899. read_data.value = 0;
  900. for (size_t i = 0; i < length; i++) {
  901. read_data.value = (read_data.value << 8u) + TRY(read_octet());
  902. }
  903. if (length == 4u)
  904. return read_data.float_value;
  905. return read_data.double_value;
  906. }
  907. ErrorOr<void> Streamer::read_unknown_element()
  908. {
  909. auto element_length = TRY(read_variable_size_integer());
  910. dbgln_if(MATROSKA_TRACE_DEBUG, "Skipping unknown element of size {}.", element_length);
  911. TRY(read_raw_octets(element_length));
  912. return {};
  913. }
  914. ErrorOr<void> Streamer::seek_to_position(size_t position)
  915. {
  916. if (position >= m_data.size())
  917. return Error::from_string_literal("Attempted to seek past the end of the stream");
  918. m_position = position;
  919. return {};
  920. }
  921. }