Sfoglia il codice sorgente

LibWeb: Implement unsafe HTML parsing methods

Both Element's and ShadowRoot's setHTMLUnsafe, and Document's static
parseHTMLUnsafe methods are implemented.
Luke Warlow 1 anno fa
parent
commit
ce8d3d17c4

+ 36 - 0
Userland/Libraries/LibWeb/DOM/Document.cpp

@@ -5222,4 +5222,40 @@ void Document::set_allow_declarative_shadow_roots(bool allow)
     m_allow_declarative_shadow_roots = allow;
 }
 
+// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#parse-html-from-a-string
+void Document::parse_html_from_a_string(StringView html)
+{
+    // 1. Set document's type to "html".
+    set_document_type(DOM::Document::Type::HTML);
+
+    // 2. Create an HTML parser parser, associated with document.
+    // 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
+    // FIXME: We don't have the concept of encoding confidence yet.
+    auto parser = HTML::HTMLParser::create(*this, html, "UTF-8"sv);
+
+    // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
+    // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
+    parser->run("about:blank"sv);
+}
+
+// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsehtmlunsafe
+JS::NonnullGCPtr<Document> Document::parse_html_unsafe(JS::VM& vm, StringView html)
+{
+    auto& realm = *vm.current_realm();
+    // FIXME: 1. Let compliantHTML to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Document parseHTMLUnsafe", and "script".
+
+    // 2. Let document be a new Document, whose content type is "text/html".
+    JS::NonnullGCPtr<DOM::Document> document = Document::create(realm);
+    document->set_content_type("text/html"_string);
+
+    // 3. Set document's allow declarative shadow roots to true.
+    document->set_allow_declarative_shadow_roots(true);
+
+    // 4. Parse HTML from a string given document and compliantHTML. // FIXME: Use compliantHTML.
+    document->parse_html_from_a_string(html);
+
+    // 5. Return document.
+    return document;
+}
+
 }

+ 3 - 0
Userland/Libraries/LibWeb/DOM/Document.h

@@ -676,6 +676,9 @@ public:
 
     Vector<JS::Handle<DOM::Range>> find_matching_text(String const&, CaseSensitivity);
 
+    void parse_html_from_a_string(StringView);
+    static JS::NonnullGCPtr<Document> parse_html_unsafe(JS::VM&, StringView);
+
 protected:
     virtual void initialize(JS::Realm&) override;
     virtual void visit_edges(Cell::Visitor&) override;

+ 3 - 0
Userland/Libraries/LibWeb/DOM/Document.idl

@@ -56,6 +56,9 @@ interface Document : Node {
     [CEReactions] undefined write(DOMString... text);
     [CEReactions] undefined writeln(DOMString... text);
 
+    // FIXME: static Document parseHTMLUnsafe((TrustedHTML or DOMString) html);
+    static Document parseHTMLUnsafe(DOMString html);
+
     attribute DOMString cookie;
 
     // https://html.spec.whatwg.org/#Document-partial

+ 17 - 1
Userland/Libraries/LibWeb/DOM/Element.cpp

@@ -1499,7 +1499,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<DOM::DocumentFragment>> Element::parse_frag
     }
 
     // 3. Let new children be the result of invoking algorithm given markup, with context set to context.
-    auto new_children = algorithm(*this, markup);
+    auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No);
 
     // 4. Let fragment be a new DocumentFragment whose node document is context's node document.
     auto fragment = realm().heap().allocate<DOM::DocumentFragment>(realm(), document());
@@ -2648,4 +2648,20 @@ WebIDL::ExceptionOr<String> Element::get_html(GetHTMLOptions const& options) con
         options.shadow_roots);
 }
 
+// https://html.spec.whatwg.org/#dom-element-sethtmlunsafe
+WebIDL::ExceptionOr<void> Element::set_html_unsafe(StringView html)
+{
+    // FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Element setHTMLUnsafe", and "script".
+
+    // 2. Let target be this's template contents if this is a template element; otherwise this.
+    DOM::Node* target = this;
+    if (is<HTML::HTMLTemplateElement>(*this))
+        target = verify_cast<HTML::HTMLTemplateElement>(*this).content().ptr();
+
+    // 3. Unsafe set HTML given target, this, and compliantHTML. FIXME: Use compliantHTML.
+    TRY(target->unsafely_set_html(*this, html));
+
+    return {};
+}
+
 }

+ 2 - 0
Userland/Libraries/LibWeb/DOM/Element.h

@@ -189,6 +189,8 @@ public:
     WebIDL::ExceptionOr<String> inner_html() const;
     WebIDL::ExceptionOr<void> set_inner_html(StringView);
 
+    WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
+
     WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
 
     WebIDL::ExceptionOr<void> insert_adjacent_html(String const& position, String const&);

+ 2 - 1
Userland/Libraries/LibWeb/DOM/Element.idl

@@ -94,7 +94,8 @@ interface Element : Node {
     readonly attribute double currentCSSZoom;
 
     // https://html.spec.whatwg.org/#dom-parsing-and-serialization
-    [FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
+    // FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
+    [CEReactions] undefined setHTMLUnsafe(DOMString html);
     DOMString getHTML(optional GetHTMLOptions options = {});
 
     // FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;

+ 20 - 0
Userland/Libraries/LibWeb/DOM/Node.cpp

@@ -1385,6 +1385,26 @@ WebIDL::ExceptionOr<String> Node::serialize_fragment(DOMParsing::RequireWellForm
     return DOMParsing::serialize_node_to_xml_string(*this, require_well_formed);
 }
 
+// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#unsafely-set-html
+WebIDL::ExceptionOr<void> Node::unsafely_set_html(Element& context_element, StringView html)
+{
+    // 1. Let newChildren be the result of the HTML fragment parsing algorithm given contextElement, html, and true.
+    auto new_children = HTML::HTMLParser::parse_html_fragment(context_element, html, HTML::HTMLParser::AllowDeclarativeShadowRoots::Yes);
+
+    // 2. Let fragment be a new DocumentFragment whose node document is contextElement’s node document.
+    auto fragment = heap().allocate<DocumentFragment>(realm(), context_element.document());
+
+    // 3. For each node in newChildren, append node to fragment.
+    for (auto& child : new_children)
+        // I don't know if this can throw here, but let's be safe.
+        (void)TRY(fragment->append_child(*child));
+
+    // 4. Replace all with fragment within contextElement.
+    replace_all(fragment);
+
+    return {};
+}
+
 // https://dom.spec.whatwg.org/#dom-node-issamenode
 bool Node::is_same_node(Node const* other_node) const
 {

+ 2 - 0
Userland/Libraries/LibWeb/DOM/Node.h

@@ -252,6 +252,8 @@ public:
 
     WebIDL::ExceptionOr<String> serialize_fragment(DOMParsing::RequireWellFormed, FragmentSerializationMode = FragmentSerializationMode::Inner) const;
 
+    WebIDL::ExceptionOr<void> unsafely_set_html(Element&, StringView);
+
     void replace_all(JS::GCPtr<Node>);
     void string_replace_all(String const&);
 

+ 11 - 0
Userland/Libraries/LibWeb/DOM/ShadowRoot.cpp

@@ -107,6 +107,17 @@ WebIDL::ExceptionOr<String> ShadowRoot::get_html(GetHTMLOptions const& options)
         options.shadow_roots);
 }
 
+// https://html.spec.whatwg.org/#dom-shadowroot-sethtmlunsafe
+WebIDL::ExceptionOr<void> ShadowRoot::set_html_unsafe(StringView html)
+{
+    // FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "ShadowRoot setHTMLUnsafe", and "script".
+
+    // 3. Unsafe set HTML given this, this's shadow host, and compliantHTML. FIXME: Use compliantHTML.
+    TRY(unsafely_set_html(*this->host(), html));
+
+    return {};
+}
+
 CSS::StyleSheetList& ShadowRoot::style_sheets()
 {
     if (!m_style_sheets)

+ 2 - 0
Userland/Libraries/LibWeb/DOM/ShadowRoot.h

@@ -46,6 +46,8 @@ public:
     WebIDL::ExceptionOr<String> inner_html() const;
     WebIDL::ExceptionOr<void> set_inner_html(StringView);
 
+    WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
+
     WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
 
     CSS::StyleSheetList& style_sheets();

+ 2 - 1
Userland/Libraries/LibWeb/DOM/ShadowRoot.idl

@@ -15,7 +15,8 @@ interface ShadowRoot : DocumentFragment {
 
     // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
 
-    [FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
+    // FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
+    [CEReactions] undefined setHTMLUnsafe(DOMString html);
     DOMString getHTML(optional GetHTMLOptions options = {});
 
     // FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;

+ 7 - 13
Userland/Libraries/LibWeb/HTML/DOMParser.cpp

@@ -39,25 +39,19 @@ void DOMParser::initialize(JS::Realm& realm)
 // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring
 JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string, Bindings::DOMParserSupportedType type)
 {
-    // 1. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
+    // FIXME: 1. Let compliantString to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, string, "DOMParser parseFromString", and "script".
+
+    // 2. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
     JS::GCPtr<DOM::Document> document;
 
-    // 2. Switch on type:
+    // 3. Switch on type:
     if (type == Bindings::DOMParserSupportedType::Text_Html) {
         // -> "text/html"
-        // 1. Set document's type to "html".
         document = HTML::HTMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
         document->set_content_type(Bindings::idl_enum_to_string(type));
-        document->set_document_type(DOM::Document::Type::HTML);
-
-        // 2. Create an HTML parser parser, associated with document.
-        // 3. Place string into the input stream for parser. The encoding confidence is irrelevant.
-        // FIXME: We don't have the concept of encoding confidence yet.
-        auto parser = HTMLParser::create(*document, string, "UTF-8"sv);
 
-        // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
-        // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
-        parser->run("about:blank"sv);
+        // 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString.
+        document->parse_html_from_a_string(string);
     } else {
         // -> Otherwise
         document = DOM::XMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
@@ -67,7 +61,7 @@ JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string,
         // 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
         XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
         XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
-        // 2. Parse string using parser.
+        // 2. Parse compliantString using parser. FIXME: Use compliantString.
         auto result = parser.parse_with_listener(builder);
         // 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
         if (result.is_error() || builder.has_error()) {

+ 17 - 13
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

@@ -4266,7 +4266,7 @@ DOM::Document& HTMLParser::document()
 }
 
 // https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
-Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
+Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots)
 {
     // 1. Create a new Document node, and mark it as being an HTML document.
     auto temp_document = DOM::Document::create(context_element.realm());
@@ -4279,12 +4279,16 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
     //    Otherwise, leave the Document in no-quirks mode.
     temp_document->set_quirks_mode(context_element.document().mode());
 
-    // 3. Create a new HTML parser, and associate it with the just created Document node.
+    // 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true.
+    if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
+        temp_document->set_allow_declarative_shadow_roots(true);
+
+    // 4. Create a new HTML parser, and associate it with the just created Document node.
     auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv);
     parser->m_context_element = JS::make_handle(context_element);
     parser->m_parsing_fragment = true;
 
-    // 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
+    // 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
     // - title
     // - textarea
     if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
@@ -4321,37 +4325,37 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
         // Leave the tokenizer in the data state.
     }
 
-    // 5. Let root be a new html element with no attributes.
+    // 6. Let root be a new html element with no attributes.
     auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
 
-    // 6. Append the element root to the Document node created above.
+    // 7. Append the element root to the Document node created above.
     MUST(temp_document->append_child(root));
 
-    // 7. Set up the parser's stack of open elements so that it contains just the single element root.
+    // 8. Set up the parser's stack of open elements so that it contains just the single element root.
     parser->m_stack_of_open_elements.push(root);
 
-    // 8. If the context element is a template element,
+    // 9. If the context element is a template element,
     if (context_element.local_name() == HTML::TagNames::template_) {
         // push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
         parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
     }
 
-    // FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
+    // FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
     //           Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
 
-    // 10. Reset the parser's insertion mode appropriately.
+    // 11. Reset the parser's insertion mode appropriately.
     parser->reset_the_insertion_mode_appropriately();
 
-    // 11. Set the parser's form element pointer to the nearest node to the context element that is a form element
+    // 12. Set the parser's form element pointer to the nearest node to the context element that is a form element
     //     (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
     //     (If there is no such form element, the form element pointer keeps its initial value, null.)
     parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
 
-    // 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
-    // 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
+    // 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
+    // 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
     parser->run(context_element.document().url());
 
-    // 14. Return the child nodes of root, in tree order.
+    // 15. Return the child nodes of root, in tree order.
     Vector<JS::Handle<DOM::Node>> children;
     while (JS::GCPtr<DOM::Node> child = root->first_child()) {
         MUST(root->remove_child(*child));

+ 5 - 2
Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h

@@ -59,8 +59,11 @@ public:
     static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
 
     DOM::Document& document();
-
-    static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView);
+    enum class AllowDeclarativeShadowRoots {
+        No,
+        Yes,
+    };
+    static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No);
     enum class SerializableShadowRoots {
         No,
         Yes,