TokenStream.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. * Copyright (c) 2021-2024, Sam Atkins <sam@ladybird.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Format.h>
  9. #include <AK/Vector.h>
  10. #include <LibWeb/CSS/Parser/ComponentValue.h>
  11. #include <LibWeb/CSS/Parser/Tokenizer.h>
  12. namespace Web::CSS::Parser {
  13. // https://drafts.csswg.org/css-syntax/#css-token-stream
  14. template<typename T>
  15. class TokenStream {
  16. public:
  17. class StateTransaction {
  18. public:
  19. explicit StateTransaction(TokenStream<T>& token_stream)
  20. : m_token_stream(token_stream)
  21. , m_saved_index(token_stream.m_index)
  22. {
  23. }
  24. ~StateTransaction()
  25. {
  26. if (!m_commit)
  27. m_token_stream.m_index = m_saved_index;
  28. }
  29. StateTransaction create_child() { return StateTransaction(*this); }
  30. void commit()
  31. {
  32. m_commit = true;
  33. if (m_parent)
  34. m_parent->commit();
  35. }
  36. private:
  37. explicit StateTransaction(StateTransaction& parent)
  38. : m_parent(&parent)
  39. , m_token_stream(parent.m_token_stream)
  40. , m_saved_index(parent.m_token_stream.m_index)
  41. {
  42. }
  43. StateTransaction* m_parent { nullptr };
  44. TokenStream<T>& m_token_stream;
  45. size_t m_saved_index { 0 };
  46. bool m_commit { false };
  47. };
  48. explicit TokenStream(Span<T const> tokens)
  49. : m_tokens(tokens)
  50. , m_eof(make_eof())
  51. {
  52. }
  53. explicit TokenStream(Vector<T> const& tokens)
  54. : m_tokens(tokens.span())
  55. , m_eof(make_eof())
  56. {
  57. }
  58. static TokenStream<T> of_single_token(T const& token)
  59. {
  60. return TokenStream(Span<T const> { &token, 1 });
  61. }
  62. TokenStream(TokenStream<T> const&) = delete;
  63. TokenStream(TokenStream<T>&&) = default;
  64. // https://drafts.csswg.org/css-syntax/#token-stream-next-token
  65. [[nodiscard]] T const& next_token() const
  66. {
  67. // The item of tokens at index.
  68. // If that index would be out-of-bounds past the end of the list, it’s instead an <eof-token>.
  69. if (m_index < m_tokens.size())
  70. return m_tokens[m_index];
  71. return m_eof;
  72. }
  73. // https://drafts.csswg.org/css-syntax/#token-stream-empty
  74. [[nodiscard]] bool is_empty() const
  75. {
  76. // A token stream is empty if the next token is an <eof-token>.
  77. return next_token().is(Token::Type::EndOfFile);
  78. }
  79. // https://drafts.csswg.org/css-syntax/#token-stream-consume-a-token
  80. [[nodiscard]] T const& consume_a_token()
  81. {
  82. // Let token be the next token. Increment index, then return token.
  83. auto& token = next_token();
  84. ++m_index;
  85. return token;
  86. }
  87. // https://drafts.csswg.org/css-syntax/#token-stream-discard-a-token
  88. void discard_a_token()
  89. {
  90. // If the token stream is not empty, increment index.
  91. if (!is_empty())
  92. ++m_index;
  93. }
  94. // https://drafts.csswg.org/css-syntax/#token-stream-mark
  95. void mark()
  96. {
  97. // Append index to marked indexes.
  98. m_marked_indexes.append(m_index);
  99. }
  100. // https://drafts.csswg.org/css-syntax/#token-stream-restore-a-mark
  101. void restore_a_mark()
  102. {
  103. // Pop from marked indexes, and set index to the popped value.
  104. m_index = m_marked_indexes.take_last();
  105. }
  106. // https://drafts.csswg.org/css-syntax/#token-stream-discard-a-mark
  107. void discard_a_mark()
  108. {
  109. // Pop from marked indexes, and do nothing with the popped value.
  110. m_marked_indexes.take_last();
  111. }
  112. // https://drafts.csswg.org/css-syntax/#token-stream-discard-whitespace
  113. void discard_whitespace()
  114. {
  115. // While the next token is a <whitespace-token>, discard a token.
  116. while (next_token().is(Token::Type::Whitespace))
  117. discard_a_token();
  118. }
  119. bool has_next_token()
  120. {
  121. return !is_empty();
  122. }
  123. // Deprecated, used in older versions of the spec.
  124. T const& current_token()
  125. {
  126. if (m_index < 1 || (m_index - 1) >= m_tokens.size())
  127. return m_eof;
  128. return m_tokens.at(m_index - 1);
  129. }
  130. // Deprecated
  131. T const& peek_token(size_t offset = 0)
  132. {
  133. if (remaining_token_count() <= offset)
  134. return m_eof;
  135. return m_tokens.at(m_index + offset);
  136. }
  137. // Deprecated, was used in older versions of the spec.
  138. void reconsume_current_input_token()
  139. {
  140. if (m_index > 0)
  141. --m_index;
  142. }
  143. StateTransaction begin_transaction() { return StateTransaction(*this); }
  144. size_t remaining_token_count() const
  145. {
  146. if (m_tokens.size() > m_index)
  147. return m_tokens.size() - m_index;
  148. return 0;
  149. }
  150. void dump_all_tokens()
  151. {
  152. dbgln("Dumping all tokens:");
  153. for (size_t i = 0; i < m_tokens.size(); ++i) {
  154. auto& token = m_tokens[i];
  155. if (i == m_index)
  156. dbgln("-> {}", token.to_debug_string());
  157. else
  158. dbgln(" {}", token.to_debug_string());
  159. }
  160. }
  161. void copy_state(Badge<Parser>, TokenStream<T> const& other)
  162. {
  163. m_index = other.m_index;
  164. }
  165. private:
  166. // https://drafts.csswg.org/css-syntax/#token-stream-tokens
  167. Span<T const> m_tokens;
  168. // https://drafts.csswg.org/css-syntax/#token-stream-index
  169. size_t m_index { 0 };
  170. // https://drafts.csswg.org/css-syntax/#token-stream-marked-indexes
  171. Vector<size_t> m_marked_indexes;
  172. T make_eof()
  173. {
  174. if constexpr (IsSame<T, Token>) {
  175. return Tokenizer::create_eof_token();
  176. }
  177. if constexpr (IsSame<T, ComponentValue>) {
  178. return ComponentValue(Tokenizer::create_eof_token());
  179. }
  180. }
  181. T m_eof;
  182. };
  183. }