MatroskaReader.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include "MatroskaDocument.h"
  8. #include <AK/Debug.h>
  9. #include <AK/NonnullOwnPtrVector.h>
  10. #include <AK/Optional.h>
  11. #include <AK/OwnPtr.h>
  12. #include <math.h>
  13. namespace Video {
  14. class MatroskaReader {
  15. public:
  16. MatroskaReader(u8 const* data, size_t size)
  17. : m_streamer(data, size)
  18. {
  19. }
  20. static OwnPtr<MatroskaDocument> parse_matroska_from_file(StringView const& path);
  21. static OwnPtr<MatroskaDocument> parse_matroska_from_data(u8 const*, size_t);
  22. OwnPtr<MatroskaDocument> parse();
  23. private:
  24. class Streamer {
  25. public:
  26. Streamer(u8 const* data, size_t size)
  27. : m_data_ptr(data)
  28. , m_size_remaining(size)
  29. {
  30. }
  31. u8 const* data() { return m_data_ptr; }
  32. char const* data_as_chars() { return reinterpret_cast<char const*>(m_data_ptr); }
  33. u8 read_octet()
  34. {
  35. VERIFY(m_size_remaining >= 1);
  36. m_size_remaining--;
  37. m_octets_read.last()++;
  38. return *(m_data_ptr++);
  39. }
  40. i16 read_i16()
  41. {
  42. return (read_octet() << 8) | read_octet();
  43. }
  44. size_t octets_read() { return m_octets_read.last(); }
  45. void push_octets_read() { m_octets_read.append(0); }
  46. void pop_octets_read()
  47. {
  48. auto popped = m_octets_read.take_last();
  49. if (!m_octets_read.is_empty())
  50. m_octets_read.last() += popped;
  51. }
  52. Optional<u64> read_variable_size_integer(bool mask_length = true)
  53. {
  54. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", m_data_ptr);
  55. auto length_descriptor = read_octet();
  56. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
  57. if (length_descriptor == 0)
  58. return {};
  59. size_t length = 0;
  60. while (length < 8) {
  61. if (length_descriptor & (1u << (8 - length)))
  62. break;
  63. length++;
  64. }
  65. dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
  66. if (length > 8)
  67. return {};
  68. u64 result;
  69. if (mask_length)
  70. result = length_descriptor & ~(1u << (8 - length));
  71. else
  72. result = length_descriptor;
  73. dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
  74. for (size_t i = 1; i < length; i++) {
  75. if (!has_octet()) {
  76. dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
  77. return {};
  78. }
  79. u8 next_octet = read_octet();
  80. dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
  81. result = (result << 8u) | next_octet;
  82. dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
  83. }
  84. return result;
  85. }
  86. Optional<i64> read_variable_sized_signed_integer()
  87. {
  88. auto length_descriptor = read_octet();
  89. if (length_descriptor == 0)
  90. return {};
  91. size_t length = 0;
  92. while (length < 8) {
  93. if (length_descriptor & (1u << (8 - length)))
  94. break;
  95. length++;
  96. }
  97. if (length > 8)
  98. return {};
  99. i64 result = length_descriptor & ~(1u << (8 - length));
  100. for (size_t i = 1; i < length; i++) {
  101. if (!has_octet()) {
  102. return {};
  103. }
  104. u8 next_octet = read_octet();
  105. result = (result << 8u) | next_octet;
  106. }
  107. result -= pow(2, length * 7 - 1) - 1;
  108. return result;
  109. }
  110. void drop_octets(size_t num_octets)
  111. {
  112. VERIFY(m_size_remaining >= num_octets);
  113. m_size_remaining -= num_octets;
  114. m_octets_read.last() += num_octets;
  115. m_data_ptr += num_octets;
  116. }
  117. bool at_end() const { return !m_size_remaining; }
  118. bool has_octet() const { return m_size_remaining >= 1; }
  119. size_t remaining() const { return m_size_remaining; }
  120. void set_remaining(size_t remaining) { m_size_remaining = remaining; }
  121. private:
  122. u8 const* m_data_ptr { nullptr };
  123. size_t m_size_remaining { 0 };
  124. Vector<size_t> m_octets_read { 0 };
  125. };
  126. bool parse_master_element(StringView const& element_name, Function<bool(u64 element_id)> element_consumer);
  127. Optional<EBMLHeader> parse_ebml_header();
  128. bool parse_segment_elements(MatroskaDocument&);
  129. OwnPtr<SegmentInformation> parse_information();
  130. bool parse_tracks(MatroskaDocument&);
  131. OwnPtr<TrackEntry> parse_track_entry();
  132. Optional<TrackEntry::VideoTrack> parse_video_track_information();
  133. Optional<TrackEntry::AudioTrack> parse_audio_track_information();
  134. OwnPtr<Cluster> parse_cluster();
  135. OwnPtr<Block> parse_simple_block();
  136. Optional<String> read_string_element();
  137. Optional<u64> read_u64_element();
  138. bool read_unknown_element();
  139. Streamer m_streamer;
  140. };
  141. }