MatroskaReader.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "MatroskaReader.h"
  7. #include <AK/Function.h>
  8. #include <AK/MappedFile.h>
  9. #include <AK/Optional.h>
  10. #include <AK/Utf8View.h>
  11. namespace Video {
  12. #define CHECK_HAS_VALUE(x) \
  13. if (!(x).has_value()) \
  14. return false
  15. constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
  16. constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
  17. constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
  18. constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
  19. constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
  20. constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
  21. constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
  22. constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
  23. constexpr u32 MUXING_APP_ID = 0x4D80;
  24. constexpr u32 WRITING_APP_ID = 0x5741;
  25. constexpr u32 TRACK_ENTRY_ID = 0xAE;
  26. constexpr u32 TRACK_NUMBER_ID = 0xD7;
  27. constexpr u32 TRACK_UID_ID = 0x73C5;
  28. constexpr u32 TRACK_TYPE_ID = 0x83;
  29. constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
  30. constexpr u32 TRACK_CODEC_ID = 0x86;
  31. constexpr u32 TRACK_VIDEO_ID = 0xE0;
  32. constexpr u32 TRACK_AUDIO_ID = 0xE1;
  33. constexpr u32 PIXEL_WIDTH_ID = 0xB0;
  34. constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
  35. constexpr u32 CHANNELS_ID = 0x9F;
  36. constexpr u32 BIT_DEPTH_ID = 0x6264;
  37. constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
  38. constexpr u32 TIMESTAMP_ID = 0xE7;
  39. OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_file(StringView const& path)
  40. {
  41. auto mapped_file_result = MappedFile::map(path);
  42. if (mapped_file_result.is_error())
  43. return {};
  44. auto mapped_file = mapped_file_result.release_value();
  45. return parse_matroska_from_data((u8*)mapped_file->data(), mapped_file->size());
  46. }
  47. OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_data(u8 const* data, size_t size)
  48. {
  49. MatroskaReader reader(data, size);
  50. return reader.parse();
  51. }
  52. OwnPtr<MatroskaDocument> MatroskaReader::parse()
  53. {
  54. auto first_element_id = m_streamer.read_variable_size_integer(false);
  55. dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id.value());
  56. if (!first_element_id.has_value() || first_element_id.value() != EBML_MASTER_ELEMENT_ID)
  57. return {};
  58. auto header = parse_ebml_header();
  59. if (!header.has_value())
  60. return {};
  61. dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
  62. auto root_element_id = m_streamer.read_variable_size_integer(false);
  63. if (!root_element_id.has_value() || root_element_id.value() != SEGMENT_ELEMENT_ID)
  64. return {};
  65. auto matroska_document = make<MatroskaDocument>(header.value());
  66. auto segment_parse_success = parse_segment_elements(*matroska_document);
  67. if (!segment_parse_success)
  68. return {};
  69. return matroska_document;
  70. }
  71. bool MatroskaReader::parse_master_element([[maybe_unused]] StringView const& element_name, Function<bool(u64)> element_consumer)
  72. {
  73. auto element_data_size = m_streamer.read_variable_size_integer();
  74. CHECK_HAS_VALUE(element_data_size);
  75. dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size.value());
  76. m_streamer.push_octets_read();
  77. while (m_streamer.octets_read() < element_data_size.value()) {
  78. dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
  79. auto optional_element_id = m_streamer.read_variable_size_integer(false);
  80. CHECK_HAS_VALUE(optional_element_id);
  81. auto element_id = optional_element_id.value();
  82. dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}\n", element_name, element_id);
  83. if (!element_consumer(element_id)) {
  84. dbgln_if(MATROSKA_DEBUG, "{:s} consumer failed on ID {:#010x}\n", element_name.to_string().characters(), element_id);
  85. return false;
  86. }
  87. dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", m_streamer.octets_read(), element_name);
  88. }
  89. m_streamer.pop_octets_read();
  90. return true;
  91. }
  92. Optional<EBMLHeader> MatroskaReader::parse_ebml_header()
  93. {
  94. EBMLHeader header;
  95. auto success = parse_master_element("Header", [&](u64 element_id) {
  96. if (element_id == DOCTYPE_ELEMENT_ID) {
  97. auto doc_type = read_string_element();
  98. CHECK_HAS_VALUE(doc_type);
  99. header.doc_type = doc_type.value();
  100. dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", doc_type.value());
  101. } else if (element_id == DOCTYPE_VERSION_ELEMENT_ID) {
  102. auto doc_type_version = read_u64_element();
  103. CHECK_HAS_VALUE(doc_type_version);
  104. header.doc_type_version = doc_type_version.value();
  105. dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", doc_type_version.value());
  106. } else {
  107. return read_unknown_element();
  108. }
  109. return true;
  110. });
  111. if (!success)
  112. return {};
  113. return header;
  114. }
  115. bool MatroskaReader::parse_segment_elements(MatroskaDocument& matroska_document)
  116. {
  117. dbgln_if(MATROSKA_DEBUG, "Parsing segment elements");
  118. auto success = parse_master_element("Segment", [&](u64 element_id) {
  119. if (element_id == SEGMENT_INFORMATION_ELEMENT_ID) {
  120. auto segment_information = parse_information();
  121. if (!segment_information)
  122. return false;
  123. matroska_document.set_segment_information(move(segment_information));
  124. } else if (element_id == TRACK_ELEMENT_ID) {
  125. return parse_tracks(matroska_document);
  126. } else if (element_id == CLUSTER_ELEMENT_ID) {
  127. auto cluster = parse_cluster();
  128. if (!cluster)
  129. return false;
  130. matroska_document.clusters().append(cluster.release_nonnull());
  131. } else {
  132. return read_unknown_element();
  133. }
  134. return true;
  135. });
  136. dbgln("Success {}", success);
  137. return success;
  138. }
  139. OwnPtr<SegmentInformation> MatroskaReader::parse_information()
  140. {
  141. auto segment_information = make<SegmentInformation>();
  142. auto success = parse_master_element("Segment Information", [&](u64 element_id) {
  143. if (element_id == TIMESTAMP_SCALE_ID) {
  144. auto timestamp_scale = read_u64_element();
  145. CHECK_HAS_VALUE(timestamp_scale);
  146. segment_information->set_timestamp_scale(timestamp_scale.value());
  147. dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", timestamp_scale.value());
  148. } else if (element_id == MUXING_APP_ID) {
  149. auto muxing_app = read_string_element();
  150. CHECK_HAS_VALUE(muxing_app);
  151. segment_information->set_muxing_app(muxing_app.value());
  152. dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", muxing_app.value());
  153. } else if (element_id == WRITING_APP_ID) {
  154. auto writing_app = read_string_element();
  155. CHECK_HAS_VALUE(writing_app);
  156. segment_information->set_writing_app(writing_app.value());
  157. dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", writing_app.value());
  158. } else {
  159. return read_unknown_element();
  160. }
  161. return true;
  162. });
  163. if (!success)
  164. return {};
  165. return segment_information;
  166. }
  167. bool MatroskaReader::parse_tracks(MatroskaDocument& matroska_document)
  168. {
  169. auto success = parse_master_element("Tracks", [&](u64 element_id) {
  170. if (element_id == TRACK_ENTRY_ID) {
  171. dbgln_if(MATROSKA_DEBUG, "Parsing track");
  172. auto track_entry = parse_track_entry();
  173. if (!track_entry)
  174. return false;
  175. auto track_number = track_entry->track_number();
  176. matroska_document.add_track(track_number, track_entry.release_nonnull());
  177. dbgln_if(MATROSKA_DEBUG, "Track {} added to document", track_number);
  178. } else {
  179. return read_unknown_element();
  180. }
  181. return true;
  182. });
  183. return success;
  184. }
  185. OwnPtr<TrackEntry> MatroskaReader::parse_track_entry()
  186. {
  187. auto track_entry = make<TrackEntry>();
  188. auto success = parse_master_element("Track", [&](u64 element_id) {
  189. if (element_id == TRACK_NUMBER_ID) {
  190. auto track_number = read_u64_element();
  191. CHECK_HAS_VALUE(track_number);
  192. track_entry->set_track_number(track_number.value());
  193. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_number.value());
  194. } else if (element_id == TRACK_UID_ID) {
  195. auto track_uid = read_u64_element();
  196. CHECK_HAS_VALUE(track_uid);
  197. track_entry->set_track_uid(track_uid.value());
  198. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_uid.value());
  199. } else if (element_id == TRACK_TYPE_ID) {
  200. auto track_type = read_u64_element();
  201. CHECK_HAS_VALUE(track_type);
  202. track_entry->set_track_type(static_cast<TrackEntry::TrackType>(track_type.value()));
  203. dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", track_type.value());
  204. } else if (element_id == TRACK_LANGUAGE_ID) {
  205. auto language = read_string_element();
  206. CHECK_HAS_VALUE(language);
  207. track_entry->set_language(language.value());
  208. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", language.value());
  209. } else if (element_id == TRACK_CODEC_ID) {
  210. auto codec_id = read_string_element();
  211. CHECK_HAS_VALUE(codec_id);
  212. track_entry->set_codec_id(codec_id.value());
  213. dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", codec_id.value());
  214. } else if (element_id == TRACK_VIDEO_ID) {
  215. auto video_track = parse_video_track_information();
  216. CHECK_HAS_VALUE(video_track);
  217. track_entry->set_video_track(video_track.value());
  218. } else if (element_id == TRACK_AUDIO_ID) {
  219. auto audio_track = parse_audio_track_information();
  220. CHECK_HAS_VALUE(audio_track);
  221. track_entry->set_audio_track(audio_track.value());
  222. } else {
  223. return read_unknown_element();
  224. }
  225. return true;
  226. });
  227. if (!success)
  228. return {};
  229. return track_entry;
  230. }
  231. Optional<TrackEntry::VideoTrack> MatroskaReader::parse_video_track_information()
  232. {
  233. TrackEntry::VideoTrack video_track {};
  234. auto success = parse_master_element("VideoTrack", [&](u64 element_id) {
  235. if (element_id == PIXEL_WIDTH_ID) {
  236. auto pixel_width = read_u64_element();
  237. CHECK_HAS_VALUE(pixel_width);
  238. video_track.pixel_width = pixel_width.value();
  239. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", pixel_width.value());
  240. } else if (element_id == PIXEL_HEIGHT_ID) {
  241. auto pixel_height = read_u64_element();
  242. CHECK_HAS_VALUE(pixel_height);
  243. video_track.pixel_height = pixel_height.value();
  244. dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", pixel_height.value());
  245. } else {
  246. return read_unknown_element();
  247. }
  248. return true;
  249. });
  250. if (!success)
  251. return {};
  252. return video_track;
  253. }
  254. Optional<TrackEntry::AudioTrack> MatroskaReader::parse_audio_track_information()
  255. {
  256. TrackEntry::AudioTrack audio_track {};
  257. auto success = parse_master_element("AudioTrack", [&](u64 element_id) {
  258. if (element_id == CHANNELS_ID) {
  259. auto channels = read_u64_element();
  260. CHECK_HAS_VALUE(channels);
  261. audio_track.channels = channels.value();
  262. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", channels.value());
  263. } else if (element_id == BIT_DEPTH_ID) {
  264. auto bit_depth = read_u64_element();
  265. CHECK_HAS_VALUE(bit_depth);
  266. audio_track.bit_depth = bit_depth.value();
  267. dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", bit_depth.value());
  268. } else {
  269. return read_unknown_element();
  270. }
  271. return true;
  272. });
  273. if (!success)
  274. return {};
  275. return audio_track;
  276. }
  277. OwnPtr<Cluster> MatroskaReader::parse_cluster()
  278. {
  279. auto cluster = make<Cluster>();
  280. auto success = parse_master_element("Cluster", [&](u64 element_id) {
  281. if (element_id == SIMPLE_BLOCK_ID) {
  282. auto simple_block = parse_simple_block();
  283. if (!simple_block)
  284. return false;
  285. cluster->blocks().append(simple_block.release_nonnull());
  286. } else if (element_id == TIMESTAMP_ID) {
  287. auto timestamp = read_u64_element();
  288. if (!timestamp.has_value())
  289. return false;
  290. cluster->set_timestamp(timestamp.value());
  291. } else {
  292. auto success = read_unknown_element();
  293. if (!success)
  294. return false;
  295. }
  296. return true;
  297. });
  298. if (!success)
  299. return {};
  300. return cluster;
  301. }
  302. OwnPtr<Block> MatroskaReader::parse_simple_block()
  303. {
  304. auto block = make<Block>();
  305. auto content_size = m_streamer.read_variable_size_integer();
  306. if (!content_size.has_value())
  307. return {};
  308. auto octets_read_before_track_number = m_streamer.octets_read();
  309. auto track_number = m_streamer.read_variable_size_integer();
  310. if (!track_number.has_value())
  311. return {};
  312. block->set_track_number(track_number.value());
  313. if (m_streamer.remaining() < 3)
  314. return {};
  315. block->set_timestamp(m_streamer.read_i16());
  316. auto flags = m_streamer.read_octet();
  317. block->set_only_keyframes(flags & (1u << 7u));
  318. block->set_invisible(flags & (1u << 3u));
  319. block->set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
  320. block->set_discardable(flags & 1u);
  321. auto total_frame_content_size = content_size.value() - (m_streamer.octets_read() - octets_read_before_track_number);
  322. if (block->lacing() == Block::Lacing::EBML) {
  323. auto octets_read_before_frame_sizes = m_streamer.octets_read();
  324. auto frame_count = m_streamer.read_octet() + 1;
  325. Vector<u64> frame_sizes;
  326. frame_sizes.ensure_capacity(frame_count);
  327. u64 frame_size_sum = 0;
  328. u64 previous_frame_size;
  329. auto first_frame_size = m_streamer.read_variable_size_integer();
  330. if (!first_frame_size.has_value())
  331. return {};
  332. frame_sizes.append(first_frame_size.value());
  333. frame_size_sum += first_frame_size.value();
  334. previous_frame_size = first_frame_size.value();
  335. for (int i = 0; i < frame_count - 2; i++) {
  336. auto frame_size_difference = m_streamer.read_variable_sized_signed_integer();
  337. if (!frame_size_difference.has_value())
  338. return {};
  339. u64 frame_size;
  340. if (frame_size_difference.value() < 0)
  341. frame_size = previous_frame_size - (-frame_size_difference.value());
  342. else
  343. frame_size = previous_frame_size + frame_size_difference.value();
  344. frame_sizes.append(frame_size);
  345. frame_size_sum += frame_size;
  346. previous_frame_size = frame_size;
  347. }
  348. frame_sizes.append(total_frame_content_size - frame_size_sum - (m_streamer.octets_read() - octets_read_before_frame_sizes));
  349. for (int i = 0; i < frame_count; i++) {
  350. auto current_frame_size = frame_sizes.at(i);
  351. auto frame_result = ByteBuffer::copy(m_streamer.data(), current_frame_size);
  352. if (!frame_result.has_value())
  353. return {};
  354. block->add_frame(frame_result.release_value());
  355. m_streamer.drop_octets(current_frame_size);
  356. }
  357. } else if (block->lacing() == Block::Lacing::FixedSize) {
  358. auto frame_count = m_streamer.read_octet() + 1;
  359. auto individual_frame_size = total_frame_content_size / frame_count;
  360. for (int i = 0; i < frame_count; i++) {
  361. auto frame_result = ByteBuffer::copy(m_streamer.data(), individual_frame_size);
  362. if (!frame_result.has_value())
  363. return {};
  364. block->add_frame(frame_result.release_value());
  365. m_streamer.drop_octets(individual_frame_size);
  366. }
  367. } else {
  368. auto frame_result = ByteBuffer::copy(m_streamer.data(), total_frame_content_size);
  369. if (!frame_result.has_value())
  370. return {};
  371. block->add_frame(frame_result.release_value());
  372. m_streamer.drop_octets(total_frame_content_size);
  373. }
  374. return block;
  375. }
  376. Optional<String> MatroskaReader::read_string_element()
  377. {
  378. auto string_length = m_streamer.read_variable_size_integer();
  379. if (!string_length.has_value() || m_streamer.remaining() < string_length.value())
  380. return {};
  381. auto string_value = String(m_streamer.data_as_chars(), string_length.value());
  382. m_streamer.drop_octets(string_length.value());
  383. return string_value;
  384. }
  385. Optional<u64> MatroskaReader::read_u64_element()
  386. {
  387. auto integer_length = m_streamer.read_variable_size_integer();
  388. if (!integer_length.has_value() || m_streamer.remaining() < integer_length.value())
  389. return {};
  390. u64 result = 0;
  391. for (size_t i = 0; i < integer_length.value(); i++) {
  392. if (!m_streamer.has_octet())
  393. return {};
  394. result = (result << 8u) + m_streamer.read_octet();
  395. }
  396. return result;
  397. }
  398. bool MatroskaReader::read_unknown_element()
  399. {
  400. auto element_length = m_streamer.read_variable_size_integer();
  401. if (!element_length.has_value() || m_streamer.remaining() < element_length.value())
  402. return false;
  403. m_streamer.drop_octets(element_length.value());
  404. return true;
  405. }
  406. }