Selaa lähdekoodia

LibWeb: Stop parsing after `document.write` at the insertion point

If a call to `document.write` inserts an incomplete HTML tag, e.g.:

    document.write("<p");

we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.
Timothy Flynn 1 vuosi sitten
vanhempi
commit
af57bd5cca

+ 34 - 0
Tests/LibWeb/Layout/expected/document-write-incomplete-tag.txt

@@ -0,0 +1,34 @@
+Viewport <#document> at (0,0) content-size 800x600 children: not-inline
+  BlockContainer <html> at (0,0) content-size 800x600 [BFC] children: not-inline
+    BlockContainer <body> at (8,16) content-size 784x83 children: not-inline
+      BlockContainer <p> at (8,16) content-size 784x17 children: inline
+        frag 0 from TextNode start: 0, length: 4, rect: [8,16 30.078125x17] baseline: 13.296875
+            "Well"
+        TextNode <#text>
+      BlockContainer <(anonymous)> at (8,49) content-size 784x0 children: inline
+        TextNode <#text>
+      BlockContainer <p> at (8,49) content-size 784x17 children: inline
+        frag 0 from TextNode start: 0, length: 5, rect: [8,49 36.84375x17] baseline: 13.296875
+            "hello"
+        TextNode <#text>
+      BlockContainer <(anonymous)> at (8,82) content-size 784x0 children: inline
+        TextNode <#text>
+      BlockContainer <p> at (8,82) content-size 784x17 children: inline
+        frag 0 from TextNode start: 0, length: 8, rect: [8,82 59.21875x17] baseline: 13.296875
+            "friends!"
+        TextNode <#text>
+      BlockContainer <(anonymous)> at (8,115) content-size 784x0 children: inline
+        TextNode <#text>
+
+ViewportPaintable (Viewport<#document>) [0,0 800x600]
+  PaintableWithLines (BlockContainer<HTML>) [0,0 800x600]
+    PaintableWithLines (BlockContainer<BODY>) [8,16 784x83] overflow: [8,16 784x99]
+      PaintableWithLines (BlockContainer<P>) [8,16 784x17]
+        TextPaintable (TextNode<#text>)
+      PaintableWithLines (BlockContainer(anonymous)) [8,49 784x0]
+      PaintableWithLines (BlockContainer<P>) [8,49 784x17]
+        TextPaintable (TextNode<#text>)
+      PaintableWithLines (BlockContainer(anonymous)) [8,82 784x0]
+      PaintableWithLines (BlockContainer<P>) [8,82 784x17]
+        TextPaintable (TextNode<#text>)
+      PaintableWithLines (BlockContainer(anonymous)) [8,115 784x0]

+ 8 - 0
Tests/LibWeb/Layout/input/document-write-incomplete-tag.html

@@ -0,0 +1,8 @@
+<p>Well</p>
+
+<script type="text/javascript">
+    document.write("<p");
+    document.write(">hello</p>");
+</script>
+
+<p>friends!</p>

+ 5 - 2
Userland/Libraries/LibWeb/DOM/Document.cpp

@@ -530,9 +530,12 @@ WebIDL::ExceptionOr<void> Document::run_the_document_write_steps(StringView inpu
     // 5. Insert input into the input stream just before the insertion point.
     // 5. Insert input into the input stream just before the insertion point.
     m_parser->tokenizer().insert_input_at_insertion_point(input);
     m_parser->tokenizer().insert_input_at_insertion_point(input);
 
 
-    // 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script end tag token is emitted by the tokenizer).
+    // 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time,
+    //    processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point
+    //    or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script
+    //    end tag token is emitted by the tokenizer).
     if (!pending_parsing_blocking_script())
     if (!pending_parsing_blocking_script())
-        m_parser->run();
+        m_parser->run(HTML::HTMLTokenizer::StopAtInsertionPoint::Yes);
 
 
     return {};
     return {};
 }
 }

+ 4 - 4
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

@@ -169,14 +169,14 @@ void HTMLParser::visit_edges(Cell::Visitor& visitor)
     m_list_of_active_formatting_elements.visit_edges(visitor);
     m_list_of_active_formatting_elements.visit_edges(visitor);
 }
 }
 
 
-void HTMLParser::run()
+void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
 {
 {
     for (;;) {
     for (;;) {
         // FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
         // FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
         if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
         if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
             return;
             return;
 
 
-        auto optional_token = m_tokenizer.next_token();
+        auto optional_token = m_tokenizer.next_token(stop_at_insertion_point);
         if (!optional_token.has_value())
         if (!optional_token.has_value())
             break;
             break;
         auto& token = optional_token.value();
         auto& token = optional_token.value();
@@ -216,11 +216,11 @@ void HTMLParser::run()
     flush_character_insertions();
     flush_character_insertions();
 }
 }
 
 
-void HTMLParser::run(const AK::URL& url)
+void HTMLParser::run(const AK::URL& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
 {
 {
     m_document->set_url(url);
     m_document->set_url(url);
     m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source())));
     m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source())));
-    run();
+    run(stop_at_insertion_point);
     the_end(*m_document, this);
     the_end(*m_document, this);
     m_document->detach_parser({});
     m_document->detach_parser({});
 }
 }

+ 2 - 2
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h

@@ -53,8 +53,8 @@ public:
     static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
     static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
     static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, ByteString const& encoding);
     static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, ByteString const& encoding);
 
 
-    void run();
-    void run(const AK::URL&);
+    void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
+    void run(const AK::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
 
 
     static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
     static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
 
 

+ 4 - 1
Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp

@@ -248,7 +248,7 @@ HTMLToken::Position HTMLTokenizer::nth_last_position(size_t n)
     return m_source_positions.at(m_source_positions.size() - 1 - n);
     return m_source_positions.at(m_source_positions.size() - 1 - n);
 }
 }
 
 
-Optional<HTMLToken> HTMLTokenizer::next_token()
+Optional<HTMLToken> HTMLTokenizer::next_token(StopAtInsertionPoint stop_at_insertion_point)
 {
 {
     if (!m_source_positions.is_empty()) {
     if (!m_source_positions.is_empty()) {
         auto last_position = m_source_positions.last();
         auto last_position = m_source_positions.last();
@@ -263,6 +263,9 @@ _StartOfFunction:
         return {};
         return {};
 
 
     for (;;) {
     for (;;) {
+        if (stop_at_insertion_point == StopAtInsertionPoint::Yes && is_insertion_point_reached())
+            return {};
+
         auto current_input_character = next_code_point();
         auto current_input_character = next_code_point();
         switch (m_state) {
         switch (m_state) {
             // 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state
             // 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state

+ 5 - 1
Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h

@@ -111,7 +111,11 @@ public:
 #undef __ENUMERATE_TOKENIZER_STATE
 #undef __ENUMERATE_TOKENIZER_STATE
     };
     };
 
 
-    Optional<HTMLToken> next_token();
+    enum class StopAtInsertionPoint {
+        No,
+        Yes,
+    };
+    Optional<HTMLToken> next_token(StopAtInsertionPoint = StopAtInsertionPoint::No);
 
 
     void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; }
     void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; }