LibWeb: Implement unsafe HTML parsing methods
Both Element's and ShadowRoot's setHTMLUnsafe, and Document's static parseHTMLUnsafe methods are implemented.
This commit is contained in:
parent
58fc901578
commit
ce8d3d17c4
Notes:
sideshowbarker
2024-07-17 01:55:29 +09:00
Author: https://github.com/lukewarlow Commit: https://github.com/LadybirdBrowser/ladybird/commit/ce8d3d17c4 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/282 Reviewed-by: https://github.com/AtkinsSJ
14 changed files with 129 additions and 31 deletions
|
@ -5222,4 +5222,40 @@ void Document::set_allow_declarative_shadow_roots(bool allow)
|
|||
m_allow_declarative_shadow_roots = allow;
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#parse-html-from-a-string
|
||||
void Document::parse_html_from_a_string(StringView html)
|
||||
{
|
||||
// 1. Set document's type to "html".
|
||||
set_document_type(DOM::Document::Type::HTML);
|
||||
|
||||
// 2. Create an HTML parser parser, associated with document.
|
||||
// 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
|
||||
// FIXME: We don't have the concept of encoding confidence yet.
|
||||
auto parser = HTML::HTMLParser::create(*this, html, "UTF-8"sv);
|
||||
|
||||
// 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
|
||||
// FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
|
||||
parser->run("about:blank"sv);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsehtmlunsafe
|
||||
JS::NonnullGCPtr<Document> Document::parse_html_unsafe(JS::VM& vm, StringView html)
|
||||
{
|
||||
auto& realm = *vm.current_realm();
|
||||
// FIXME: 1. Let compliantHTML to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Document parseHTMLUnsafe", and "script".
|
||||
|
||||
// 2. Let document be a new Document, whose content type is "text/html".
|
||||
JS::NonnullGCPtr<DOM::Document> document = Document::create(realm);
|
||||
document->set_content_type("text/html"_string);
|
||||
|
||||
// 3. Set document's allow declarative shadow roots to true.
|
||||
document->set_allow_declarative_shadow_roots(true);
|
||||
|
||||
// 4. Parse HTML from a string given document and compliantHTML. // FIXME: Use compliantHTML.
|
||||
document->parse_html_from_a_string(html);
|
||||
|
||||
// 5. Return document.
|
||||
return document;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -676,6 +676,9 @@ public:
|
|||
|
||||
Vector<JS::Handle<DOM::Range>> find_matching_text(String const&, CaseSensitivity);
|
||||
|
||||
void parse_html_from_a_string(StringView);
|
||||
static JS::NonnullGCPtr<Document> parse_html_unsafe(JS::VM&, StringView);
|
||||
|
||||
protected:
|
||||
virtual void initialize(JS::Realm&) override;
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
|
|
@ -56,6 +56,9 @@ interface Document : Node {
|
|||
[CEReactions] undefined write(DOMString... text);
|
||||
[CEReactions] undefined writeln(DOMString... text);
|
||||
|
||||
// FIXME: static Document parseHTMLUnsafe((TrustedHTML or DOMString) html);
|
||||
static Document parseHTMLUnsafe(DOMString html);
|
||||
|
||||
attribute DOMString cookie;
|
||||
|
||||
// https://html.spec.whatwg.org/#Document-partial
|
||||
|
|
|
@ -1499,7 +1499,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<DOM::DocumentFragment>> Element::parse_frag
|
|||
}
|
||||
|
||||
// 3. Let new children be the result of invoking algorithm given markup, with context set to context.
|
||||
auto new_children = algorithm(*this, markup);
|
||||
auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No);
|
||||
|
||||
// 4. Let fragment be a new DocumentFragment whose node document is context's node document.
|
||||
auto fragment = realm().heap().allocate<DOM::DocumentFragment>(realm(), document());
|
||||
|
@ -2648,4 +2648,20 @@ WebIDL::ExceptionOr<String> Element::get_html(GetHTMLOptions const& options) con
|
|||
options.shadow_roots);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/#dom-element-sethtmlunsafe
|
||||
WebIDL::ExceptionOr<void> Element::set_html_unsafe(StringView html)
|
||||
{
|
||||
// FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "Element setHTMLUnsafe", and "script".
|
||||
|
||||
// 2. Let target be this's template contents if this is a template element; otherwise this.
|
||||
DOM::Node* target = this;
|
||||
if (is<HTML::HTMLTemplateElement>(*this))
|
||||
target = verify_cast<HTML::HTMLTemplateElement>(*this).content().ptr();
|
||||
|
||||
// 3. Unsafe set HTML given target, this, and compliantHTML. FIXME: Use compliantHTML.
|
||||
TRY(target->unsafely_set_html(*this, html));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -189,6 +189,8 @@ public:
|
|||
WebIDL::ExceptionOr<String> inner_html() const;
|
||||
WebIDL::ExceptionOr<void> set_inner_html(StringView);
|
||||
|
||||
WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
|
||||
|
||||
WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
|
||||
|
||||
WebIDL::ExceptionOr<void> insert_adjacent_html(String const& position, String const&);
|
||||
|
|
|
@ -94,7 +94,8 @@ interface Element : Node {
|
|||
readonly attribute double currentCSSZoom;
|
||||
|
||||
// https://html.spec.whatwg.org/#dom-parsing-and-serialization
|
||||
[FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
|
||||
// FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
|
||||
[CEReactions] undefined setHTMLUnsafe(DOMString html);
|
||||
DOMString getHTML(optional GetHTMLOptions options = {});
|
||||
|
||||
// FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;
|
||||
|
|
|
@ -1385,6 +1385,26 @@ WebIDL::ExceptionOr<String> Node::serialize_fragment(DOMParsing::RequireWellForm
|
|||
return DOMParsing::serialize_node_to_xml_string(*this, require_well_formed);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#unsafely-set-html
|
||||
WebIDL::ExceptionOr<void> Node::unsafely_set_html(Element& context_element, StringView html)
|
||||
{
|
||||
// 1. Let newChildren be the result of the HTML fragment parsing algorithm given contextElement, html, and true.
|
||||
auto new_children = HTML::HTMLParser::parse_html_fragment(context_element, html, HTML::HTMLParser::AllowDeclarativeShadowRoots::Yes);
|
||||
|
||||
// 2. Let fragment be a new DocumentFragment whose node document is contextElement’s node document.
|
||||
auto fragment = heap().allocate<DocumentFragment>(realm(), context_element.document());
|
||||
|
||||
// 3. For each node in newChildren, append node to fragment.
|
||||
for (auto& child : new_children)
|
||||
// I don't know if this can throw here, but let's be safe.
|
||||
(void)TRY(fragment->append_child(*child));
|
||||
|
||||
// 4. Replace all with fragment within contextElement.
|
||||
replace_all(fragment);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://dom.spec.whatwg.org/#dom-node-issamenode
|
||||
bool Node::is_same_node(Node const* other_node) const
|
||||
{
|
||||
|
|
|
@ -252,6 +252,8 @@ public:
|
|||
|
||||
WebIDL::ExceptionOr<String> serialize_fragment(DOMParsing::RequireWellFormed, FragmentSerializationMode = FragmentSerializationMode::Inner) const;
|
||||
|
||||
WebIDL::ExceptionOr<void> unsafely_set_html(Element&, StringView);
|
||||
|
||||
void replace_all(JS::GCPtr<Node>);
|
||||
void string_replace_all(String const&);
|
||||
|
||||
|
|
|
@ -107,6 +107,17 @@ WebIDL::ExceptionOr<String> ShadowRoot::get_html(GetHTMLOptions const& options)
|
|||
options.shadow_roots);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/#dom-shadowroot-sethtmlunsafe
|
||||
WebIDL::ExceptionOr<void> ShadowRoot::set_html_unsafe(StringView html)
|
||||
{
|
||||
// FIXME: 1. Let compliantHTML be the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, html, "ShadowRoot setHTMLUnsafe", and "script".
|
||||
|
||||
// 3. Unsafe set HTML given this, this's shadow host, and compliantHTML. FIXME: Use compliantHTML.
|
||||
TRY(unsafely_set_html(*this->host(), html));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
CSS::StyleSheetList& ShadowRoot::style_sheets()
|
||||
{
|
||||
if (!m_style_sheets)
|
||||
|
|
|
@ -46,6 +46,8 @@ public:
|
|||
WebIDL::ExceptionOr<String> inner_html() const;
|
||||
WebIDL::ExceptionOr<void> set_inner_html(StringView);
|
||||
|
||||
WebIDL::ExceptionOr<void> set_html_unsafe(StringView);
|
||||
|
||||
WebIDL::ExceptionOr<String> get_html(GetHTMLOptions const&) const;
|
||||
|
||||
CSS::StyleSheetList& style_sheets();
|
||||
|
|
|
@ -15,7 +15,8 @@ interface ShadowRoot : DocumentFragment {
|
|||
|
||||
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
|
||||
|
||||
[FIXME, CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
|
||||
// FIXME: [CEReactions] undefined setHTMLUnsafe((TrustedHTML or DOMString) html);
|
||||
[CEReactions] undefined setHTMLUnsafe(DOMString html);
|
||||
DOMString getHTML(optional GetHTMLOptions options = {});
|
||||
|
||||
// FIXME: [CEReactions] attribute (TrustedHTML or [LegacyNullToEmptyString] DOMString) innerHTML;
|
||||
|
|
|
@ -39,25 +39,19 @@ void DOMParser::initialize(JS::Realm& realm)
|
|||
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring
|
||||
JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string, Bindings::DOMParserSupportedType type)
|
||||
{
|
||||
// 1. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
|
||||
// FIXME: 1. Let compliantString to the result of invoking the Get Trusted Type compliant string algorithm with TrustedHTML, this's relevant global object, string, "DOMParser parseFromString", and "script".
|
||||
|
||||
// 2. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
|
||||
JS::GCPtr<DOM::Document> document;
|
||||
|
||||
// 2. Switch on type:
|
||||
// 3. Switch on type:
|
||||
if (type == Bindings::DOMParserSupportedType::Text_Html) {
|
||||
// -> "text/html"
|
||||
// 1. Set document's type to "html".
|
||||
document = HTML::HTMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
|
||||
document->set_content_type(Bindings::idl_enum_to_string(type));
|
||||
document->set_document_type(DOM::Document::Type::HTML);
|
||||
|
||||
// 2. Create an HTML parser parser, associated with document.
|
||||
// 3. Place string into the input stream for parser. The encoding confidence is irrelevant.
|
||||
// FIXME: We don't have the concept of encoding confidence yet.
|
||||
auto parser = HTMLParser::create(*document, string, "UTF-8"sv);
|
||||
|
||||
// 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
|
||||
// FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
|
||||
parser->run("about:blank"sv);
|
||||
// 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString.
|
||||
document->parse_html_from_a_string(string);
|
||||
} else {
|
||||
// -> Otherwise
|
||||
document = DOM::XMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
|
||||
|
@ -67,7 +61,7 @@ JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string,
|
|||
// 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
|
||||
XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
|
||||
XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
|
||||
// 2. Parse string using parser.
|
||||
// 2. Parse compliantString using parser. FIXME: Use compliantString.
|
||||
auto result = parser.parse_with_listener(builder);
|
||||
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
|
||||
if (result.is_error() || builder.has_error()) {
|
||||
|
|
|
@ -4266,7 +4266,7 @@ DOM::Document& HTMLParser::document()
|
|||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
|
||||
Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
|
||||
Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots)
|
||||
{
|
||||
// 1. Create a new Document node, and mark it as being an HTML document.
|
||||
auto temp_document = DOM::Document::create(context_element.realm());
|
||||
|
@ -4279,12 +4279,16 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
|
|||
// Otherwise, leave the Document in no-quirks mode.
|
||||
temp_document->set_quirks_mode(context_element.document().mode());
|
||||
|
||||
// 3. Create a new HTML parser, and associate it with the just created Document node.
|
||||
// 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true.
|
||||
if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
|
||||
temp_document->set_allow_declarative_shadow_roots(true);
|
||||
|
||||
// 4. Create a new HTML parser, and associate it with the just created Document node.
|
||||
auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv);
|
||||
parser->m_context_element = JS::make_handle(context_element);
|
||||
parser->m_parsing_fragment = true;
|
||||
|
||||
// 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
|
||||
// 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
|
||||
// - title
|
||||
// - textarea
|
||||
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
|
||||
|
@ -4321,37 +4325,37 @@ Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& cont
|
|||
// Leave the tokenizer in the data state.
|
||||
}
|
||||
|
||||
// 5. Let root be a new html element with no attributes.
|
||||
// 6. Let root be a new html element with no attributes.
|
||||
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
|
||||
|
||||
// 6. Append the element root to the Document node created above.
|
||||
// 7. Append the element root to the Document node created above.
|
||||
MUST(temp_document->append_child(root));
|
||||
|
||||
// 7. Set up the parser's stack of open elements so that it contains just the single element root.
|
||||
// 8. Set up the parser's stack of open elements so that it contains just the single element root.
|
||||
parser->m_stack_of_open_elements.push(root);
|
||||
|
||||
// 8. If the context element is a template element,
|
||||
// 9. If the context element is a template element,
|
||||
if (context_element.local_name() == HTML::TagNames::template_) {
|
||||
// push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
|
||||
parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
|
||||
}
|
||||
|
||||
// FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
|
||||
// FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
|
||||
// Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
|
||||
|
||||
// 10. Reset the parser's insertion mode appropriately.
|
||||
// 11. Reset the parser's insertion mode appropriately.
|
||||
parser->reset_the_insertion_mode_appropriately();
|
||||
|
||||
// 11. Set the parser's form element pointer to the nearest node to the context element that is a form element
|
||||
// 12. Set the parser's form element pointer to the nearest node to the context element that is a form element
|
||||
// (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
|
||||
// (If there is no such form element, the form element pointer keeps its initial value, null.)
|
||||
parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
|
||||
|
||||
// 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
|
||||
// 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
|
||||
// 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
|
||||
// 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
|
||||
parser->run(context_element.document().url());
|
||||
|
||||
// 14. Return the child nodes of root, in tree order.
|
||||
// 15. Return the child nodes of root, in tree order.
|
||||
Vector<JS::Handle<DOM::Node>> children;
|
||||
while (JS::GCPtr<DOM::Node> child = root->first_child()) {
|
||||
MUST(root->remove_child(*child));
|
||||
|
|
|
@ -59,8 +59,11 @@ public:
|
|||
static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
|
||||
|
||||
DOM::Document& document();
|
||||
|
||||
static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView);
|
||||
enum class AllowDeclarativeShadowRoots {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No);
|
||||
enum class SerializableShadowRoots {
|
||||
No,
|
||||
Yes,
|
||||
|
|
Loading…
Add table
Reference in a new issue