Reader.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /*
  2. * Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/ByteString.h>
  8. #include <AK/Debug.h>
  9. #include <AK/Function.h>
  10. #include <AK/ScopeGuard.h>
  11. #include <AK/Span.h>
  12. #include <AK/Vector.h>
  13. #include <LibPDF/Error.h>
  14. namespace PDF {
  15. class Reader {
  16. public:
  17. explicit Reader(ReadonlyBytes bytes)
  18. : m_bytes(bytes)
  19. {
  20. }
  21. ALWAYS_INLINE ReadonlyBytes bytes() const { return m_bytes; }
  22. ALWAYS_INLINE size_t offset() const { return m_offset; }
  23. bool done() const
  24. {
  25. if (m_forwards)
  26. return offset() >= bytes().size();
  27. return m_offset < 0;
  28. }
  29. size_t remaining() const
  30. {
  31. if (done())
  32. return 0;
  33. if (m_forwards)
  34. return bytes().size() - offset();
  35. return offset() + 1;
  36. }
  37. void move_by(ssize_t count)
  38. {
  39. if (m_forwards) {
  40. m_offset += count;
  41. } else {
  42. m_offset -= count;
  43. }
  44. }
  45. template<typename T = char>
  46. T read()
  47. {
  48. T value = reinterpret_cast<T const*>(m_bytes.offset(m_offset))[0];
  49. move_by(sizeof(T));
  50. return value;
  51. }
  52. template<typename T = char>
  53. PDFErrorOr<T> try_read()
  54. {
  55. if (sizeof(T) + m_offset > m_bytes.size()) {
  56. auto message = ByteString::formatted("Cannot read {} bytes at offset {} of ReadonlyBytes of size {}", sizeof(T), m_offset, m_bytes.size());
  57. return Error { Error::Type::Parse, message };
  58. }
  59. return read<T>();
  60. }
  61. char peek(size_t shift = 0) const
  62. {
  63. auto offset = m_offset + shift * (m_forwards ? 1 : -1);
  64. return static_cast<char>(m_bytes.at(offset));
  65. }
  66. template<typename... T>
  67. bool matches_any(T... elements) const
  68. {
  69. if (done())
  70. return false;
  71. auto ch = peek();
  72. return ((ch == elements) || ...);
  73. }
  74. bool matches(char ch) const
  75. {
  76. return !done() && peek() == ch;
  77. }
  78. bool matches(char const* chars) const
  79. {
  80. ByteString string(chars);
  81. if (remaining() < string.length())
  82. return false;
  83. if (!m_forwards)
  84. string = string.reverse();
  85. for (size_t i = 0; i < string.length(); i++) {
  86. if (peek(i) != string[i])
  87. return false;
  88. }
  89. return true;
  90. }
  91. template<typename T = char>
  92. void move_to(size_t offset)
  93. {
  94. VERIFY(offset <= m_bytes.size());
  95. m_offset = static_cast<ssize_t>(offset);
  96. }
  97. void move_until(char ch)
  98. {
  99. while (!done() && peek() != ch)
  100. move_by(1);
  101. }
  102. void move_until(AK::Function<bool(char)> predicate)
  103. {
  104. while (!done() && !predicate(peek()))
  105. move_by(1);
  106. }
  107. ALWAYS_INLINE void move_while(AK::Function<bool(char)> predicate)
  108. {
  109. move_until([&predicate](char t) { return !predicate(t); });
  110. }
  111. static bool is_eol(char);
  112. static bool is_whitespace(char);
  113. static bool is_non_eol_whitespace(char);
  114. bool matches_eol() const;
  115. bool matches_whitespace() const;
  116. bool matches_non_eol_whitespace() const;
  117. bool matches_number() const;
  118. bool matches_delimiter() const;
  119. bool matches_regular_character() const;
  120. bool consume_eol();
  121. bool consume_whitespace();
  122. bool consume_non_eol_whitespace();
  123. char consume();
  124. void consume(int amount);
  125. bool consume(char);
  126. ALWAYS_INLINE void set_reading_forwards() { m_forwards = true; }
  127. ALWAYS_INLINE void set_reading_backwards() { m_forwards = false; }
  128. ALWAYS_INLINE void save() { m_saved_offsets.append(m_offset); }
  129. ALWAYS_INLINE void load() { m_offset = m_saved_offsets.take_last(); }
  130. ALWAYS_INLINE void discard() { m_saved_offsets.take_last(); }
  131. #ifdef PDF_DEBUG
  132. void dump_state() const
  133. {
  134. dbgln("Reader State (offset={} size={})", offset(), bytes().size());
  135. size_t from = max(0, static_cast<int>(offset()) - 10);
  136. size_t to = min(bytes().size() - 1, offset() + 10);
  137. for (auto i = from; i <= to; i++) {
  138. char value = static_cast<char>(bytes().at(i));
  139. auto line = ByteString::formatted(" {}: '{}' (value={:3d}) ", i, value, static_cast<u8>(value));
  140. if (i == offset()) {
  141. dbgln("{} <<< current location, forwards={}", line, m_forwards);
  142. } else {
  143. dbgln("{}", line);
  144. }
  145. }
  146. dbgln();
  147. }
  148. #endif
  149. private:
  150. ReadonlyBytes m_bytes;
  151. ssize_t m_offset { 0 };
  152. Vector<ssize_t> m_saved_offsets;
  153. bool m_forwards { true };
  154. };
  155. }