Переглянути джерело

LibWeb: Use getter and setter for Character type HTMLTokens

While storing the code point in a UTF-8 encoded String in horrendously
inefficient, this problem will be addressed at a later stage.
Max Wipfli 4 роки тому
батько
коміт
1aeafcc58b

+ 9 - 4
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h

@@ -50,10 +50,7 @@ public:
     {
     {
         HTMLToken token;
         HTMLToken token;
         token.m_type = Type::Character;
         token.m_type = Type::Character;
-        StringBuilder builder;
-        // FIXME: This narrows code_point to char, should this be append_code_point() instead?
-        builder.append(code_point);
-        token.m_comment_or_character.data = builder.to_string();
+        token.set_code_point(code_point);
         return token;
         return token;
     }
     }
 
 
@@ -97,6 +94,14 @@ public:
         }
         }
     }
     }
 
 
+    void set_code_point(u32 code_point)
+    {
+        VERIFY(is_character());
+        StringBuilder builder;
+        builder.append_code_point(code_point);
+        m_comment_or_character.data = builder.to_string();
+    }
+
     String const& comment() const
     String const& comment() const
     {
     {
         VERIFY(is_comment());
         VERIFY(is_comment());

+ 17 - 19
Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp

@@ -75,18 +75,17 @@ namespace Web::HTML {
         goto new_state;                                                 \
         goto new_state;                                                 \
     } while (0)
     } while (0)
 
 
-#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE                               \
-    do {                                                                                 \
-        for (auto code_point : m_temporary_buffer) {                                     \
-            if (consumed_as_part_of_an_attribute()) {                                    \
-                m_current_builder.append_code_point(code_point);                         \
-            } else {                                                                     \
-                create_new_token(HTMLToken::Type::Character);                            \
-                m_current_builder.append_code_point(code_point);                         \
-                m_current_token.m_comment_or_character.data = consume_current_builder(); \
-                m_queued_tokens.enqueue(move(m_current_token));                          \
-            }                                                                            \
-        }                                                                                \
+#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE       \
+    do {                                                         \
+        for (auto code_point : m_temporary_buffer) {             \
+            if (consumed_as_part_of_an_attribute()) {            \
+                m_current_builder.append_code_point(code_point); \
+            } else {                                             \
+                create_new_token(HTMLToken::Type::Character);    \
+                m_current_token.set_code_point(code_point);      \
+                m_queued_tokens.enqueue(move(m_current_token));  \
+            }                                                    \
+        }                                                        \
     } while (0)
     } while (0)
 
 
 #define DONT_CONSUME_NEXT_INPUT_CHARACTER \
 #define DONT_CONSUME_NEXT_INPUT_CHARACTER \
@@ -142,13 +141,12 @@ namespace Web::HTML {
         return m_queued_tokens.dequeue();               \
         return m_queued_tokens.dequeue();               \
     } while (0)
     } while (0)
 
 
-#define EMIT_CHARACTER(code_point)                                               \
-    do {                                                                         \
-        create_new_token(HTMLToken::Type::Character);                            \
-        m_current_builder.append_code_point(code_point);                         \
-        m_current_token.m_comment_or_character.data = consume_current_builder(); \
-        m_queued_tokens.enqueue(move(m_current_token));                          \
-        return m_queued_tokens.dequeue();                                        \
+#define EMIT_CHARACTER(code_point)                      \
+    do {                                                \
+        create_new_token(HTMLToken::Type::Character);   \
+        m_current_token.set_code_point(code_point);     \
+        m_queued_tokens.enqueue(move(m_current_token)); \
+        return m_queued_tokens.dequeue();               \
     } while (0)
     } while (0)
 
 
 #define EMIT_CURRENT_CHARACTER \
 #define EMIT_CURRENT_CHARACTER \