فهرست منبع

LibWeb: Make HTML parser flush all pending tokens in "in table text"

There were multiple bugs in the parsing algorithm for handling text
occurring inside a `table` element:

- When there was pending non-whitespace text inside a table, we only
  flushed one token instead of all pending tokens.

- Also, we didn't even flush one of the right tokens, but instead the
  token that caused the flush to happen.

- Once we started flushing the right tokens, it turned out we had not
  yet implemented character insertion points expressed as "before X".

- Finally, we were not exiting the "in table text" mode after flushing
  pending tokens, effectively getting us stuck in that mode until EOF.
Andreas Kling 2 سال پیش
والد
کامیت
5cdb394400

+ 1 - 0
Tests/LibWeb/Text/expected/html-parser-text-in-table-hoisting.txt

@@ -0,0 +1 @@
+PASS  

+ 10 - 0
Tests/LibWeb/Text/input/html-parser-text-in-table-hoisting.html

@@ -0,0 +1,10 @@
+<script src="include.js"></script>
+<body><table><tr>PASS</tr></table></body>
+<script>
+    test(() => {
+        // Remove the table. "PASS" should still be visible,
+        // as the HTML parser inserts it *before* the table
+        // under these circumstances.
+        document.querySelector("table").remove()
+    });
+</script>

+ 14 - 12
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

@@ -1001,7 +1001,11 @@ DOM::Text* HTMLParser::find_character_insertion_node()
 {
     auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
     if (adjusted_insertion_location.insert_before_sibling) {
-        TODO();
+        if (adjusted_insertion_location.insert_before_sibling->previous_sibling() && adjusted_insertion_location.insert_before_sibling->previous_sibling()->is_text())
+            return static_cast<DOM::Text*>(adjusted_insertion_location.insert_before_sibling->previous_sibling());
+        auto new_text_node = realm().heap().allocate<DOM::Text>(realm(), document(), "").release_allocated_value_but_fixme_should_propagate_errors();
+        adjusted_insertion_location.parent->insert_before(*new_text_node, *adjusted_insertion_location.insert_before_sibling);
+        return new_text_node;
     }
     if (adjusted_insertion_location.parent->is_document())
         return nullptr;
@@ -2661,20 +2665,18 @@ void HTMLParser::handle_in_table_text(HTMLToken& token)
     // are character tokens that are not ASCII whitespace, then this is a parse error:
     // reprocess the character tokens in the pending table character tokens list using
     // the rules given in the "anything else" entry in the "in table" insertion mode.
-    for (auto& pending_token : m_pending_table_character_tokens) {
-        VERIFY(pending_token.is_character());
-        if (!pending_token.is_parser_whitespace()) {
-            log_parse_error();
+    if (any_of(m_pending_table_character_tokens, [](auto const& token) { return !token.is_parser_whitespace(); })) {
+        log_parse_error();
+        for (auto& pending_token : m_pending_table_character_tokens) {
             m_foster_parenting = true;
-            process_using_the_rules_for(InsertionMode::InBody, token);
+            process_using_the_rules_for(InsertionMode::InBody, pending_token);
             m_foster_parenting = false;
-            return;
         }
-    }
-
-    // Otherwise, insert the characters given by the pending table character tokens list.
-    for (auto& pending_token : m_pending_table_character_tokens) {
-        insert_character(pending_token.code_point());
+    } else {
+        // Otherwise, insert the characters given by the pending table character tokens list.
+        for (auto& pending_token : m_pending_table_character_tokens) {
+            insert_character(pending_token.code_point());
+        }
     }
 
     // Switch the insertion mode to the original insertion mode and reprocess the token.