瀏覽代碼

LibWeb: Allow HTML tokenizer to emit more than one token

Tokens are now put on a queue when emitted, and we always pop from that
queue when returning from next_token().
Andreas Kling 5 年之前
父節點
當前提交
ecd25ce6c7
共有 2 個文件被更改,包括 26 次插入10 次删除
  1. 23 10
      Libraries/LibWeb/Parser/HTMLTokenizer.cpp
  2. 3 0
      Libraries/LibWeb/Parser/HTMLTokenizer.h

+ 23 - 10
Libraries/LibWeb/Parser/HTMLTokenizer.cpp

@@ -58,7 +58,16 @@
         will_switch_to(State::new_state);           \
         will_switch_to(State::new_state);           \
         m_state = State::new_state;                 \
         m_state = State::new_state;                 \
         will_emit(m_current_token);                 \
         will_emit(m_current_token);                 \
-        return m_current_token;                     \
+        m_queued_tokens.enqueue(m_current_token);   \
+        return m_queued_tokens.dequeue();           \
+    } while (0)
+
+#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \
+    do {                                                      \
+        m_queued_tokens.enqueue(m_current_token);             \
+        will_reconsume_in(State::new_state);                  \
+        m_state = State::new_state;                           \
+        goto new_state;                                       \
     } while (0)
     } while (0)
 
 
 #define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
 #define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
@@ -90,21 +99,23 @@
         m_has_emitted_eof = true;                     \
         m_has_emitted_eof = true;                     \
         create_new_token(HTMLToken::Type::EndOfFile); \
         create_new_token(HTMLToken::Type::EndOfFile); \
         will_emit(m_current_token);                   \
         will_emit(m_current_token);                   \
-        return m_current_token;                       \
+        m_queued_tokens.enqueue(m_current_token);     \
+        return m_queued_tokens.dequeue();             \
     } while (0)
     } while (0)
 
 
-#define EMIT_CURRENT_TOKEN          \
-    do {                            \
-        will_emit(m_current_token); \
-        return m_current_token;     \
+#define EMIT_CURRENT_TOKEN                        \
+    do {                                          \
+        will_emit(m_current_token);               \
+        m_queued_tokens.enqueue(m_current_token); \
+        return m_queued_tokens.dequeue();         \
     } while (0)
     } while (0)
 
 
 #define EMIT_CHARACTER(codepoint)                                      \
 #define EMIT_CHARACTER(codepoint)                                      \
     do {                                                               \
     do {                                                               \
         create_new_token(HTMLToken::Type::Character);                  \
         create_new_token(HTMLToken::Type::Character);                  \
         m_current_token.m_comment_or_character.data.append(codepoint); \
         m_current_token.m_comment_or_character.data.append(codepoint); \
-        will_emit(m_current_token);                                    \
-        return m_current_token;                                        \
+        m_queued_tokens.enqueue(m_current_token);                      \
+        return m_queued_tokens.dequeue();                              \
     } while (0)
     } while (0)
 
 
 #define EMIT_CURRENT_CHARACTER \
 #define EMIT_CURRENT_CHARACTER \
@@ -141,6 +152,9 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
 
 
 Optional<HTMLToken> HTMLTokenizer::next_token()
 Optional<HTMLToken> HTMLTokenizer::next_token()
 {
 {
+    if (!m_queued_tokens.is_empty())
+        return m_queued_tokens.dequeue();
+
     for (;;) {
     for (;;) {
         auto current_input_character = next_codepoint();
         auto current_input_character = next_codepoint();
         switch (m_state) {
         switch (m_state) {
@@ -1270,8 +1284,7 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
                 }
                 }
                 ANYTHING_ELSE
                 ANYTHING_ELSE
                 {
                 {
-                    EMIT_CHARACTER('<');
-                    RECONSUME_IN(ScriptData);
+                    EMIT_CHARACTER_AND_RECONSUME_IN('<', ScriptData);
                 }
                 }
             }
             }
             END_STATE
             END_STATE

+ 3 - 0
Libraries/LibWeb/Parser/HTMLTokenizer.h

@@ -26,6 +26,7 @@
 
 
 #pragma once
 #pragma once
 
 
+#include <AK/Queue.h>
 #include <AK/StringView.h>
 #include <AK/StringView.h>
 #include <AK/Types.h>
 #include <AK/Types.h>
 #include <LibWeb/Forward.h>
 #include <LibWeb/Forward.h>
@@ -165,5 +166,7 @@ private:
     HTMLToken m_last_emitted_start_tag;
     HTMLToken m_last_emitted_start_tag;
 
 
     bool m_has_emitted_eof { false };
     bool m_has_emitted_eof { false };
+
+    Queue<HTMLToken> m_queued_tokens;
 };
 };
 }
 }