/* * Copyright (c) 2022, Ali Mohammad Pur * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include inline namespace { extern StringView s_xhtml_unified_dtd; } namespace Web { ErrorOr>> resolve_xml_resource(XML::SystemID const&, Optional const& public_id) { static Optional> s_parsed_xhtml_unified_dtd; if (!public_id.has_value()) return Error::from_string_literal("Refusing to load disallowed external entity"); auto public_literal = public_id->public_literal; if (!public_literal.is_one_of( "-//W3C//DTD XHTML 1.0 Transitional//EN", "-//W3C//DTD XHTML 1.1//EN", "-//W3C//DTD XHTML 1.0 Strict//EN", "-//W3C//DTD XHTML 1.0 Frameset//EN", "-//W3C//DTD XHTML Basic 1.0//EN", "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", "-//W3C//DTD MathML 2.0//EN", "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")) return Error::from_string_literal("Refusing to load disallowed external entity"); if (!s_parsed_xhtml_unified_dtd.has_value()) { auto parser = XML::Parser(s_xhtml_unified_dtd, XML::Parser::Options {}); auto result = parser.parse_external_subset(); if (result.is_error()) // We can't really recover from this, so just return the source and let libxml handle it. return ByteString { s_xhtml_unified_dtd }; s_parsed_xhtml_unified_dtd = result.release_value(); } return s_parsed_xhtml_unified_dtd.value(); } XMLDocumentBuilder::XMLDocumentBuilder(DOM::Document& document, XMLScriptingSupport scripting_support) : m_document(document) , m_current_node(m_document) , m_scripting_support(scripting_support) { m_namespace_stack.append({ m_namespace, 1 }); } void XMLDocumentBuilder::set_source(ByteString source) { m_document->set_source(MUST(String::from_byte_string(source))); } void XMLDocumentBuilder::element_start(const XML::Name& name, HashMap const& attributes) { if (m_has_error) return; auto found_explicit_namespace = false; if (auto it = attributes.find("xmlns"); it != attributes.end()) { found_explicit_namespace = true; m_namespace_stack.append({ m_namespace, 1 }); m_namespace = MUST(FlyString::from_deprecated_fly_string(it->value)); } else { m_namespace_stack.last().depth += 1; } if (name == HTML::TagNames::html.to_deprecated_fly_string() && m_namespace != Namespace::HTML) { // HTML / 2.1.3 XML compatibility: https://html.spec.whatwg.org/#xml // To ease migration from HTML to XML, user agents conforming to this specification will place elements in HTML // in the http://www.w3.org/1999/xhtml namespace, at least for the purposes of the DOM and CSS. // The term "HTML elements" refers to any element in that namespace, even in XML documents. if (found_explicit_namespace || m_namespace_stack.size() != 1 || m_namespace_stack.last().depth != 2) { m_has_error = true; return; } m_namespace = Namespace::HTML; } auto node = DOM::create_element(m_document, MUST(FlyString::from_deprecated_fly_string(name)), m_namespace).release_value_but_fixme_should_propagate_errors(); // When an XML parser with XML scripting support enabled creates a script element, // it must have its parser document set and its "force async" flag must be unset. // FIXME: If the parser was created as part of the XML fragment parsing algorithm, then the element must be marked as "already started" also. if (m_scripting_support == XMLScriptingSupport::Enabled && node->is_html_script_element()) { auto& script_element = static_cast(*node); script_element.set_parser_document(Badge {}, m_document); script_element.set_force_async(Badge {}, false); } if (m_current_node->is_html_template_element()) { // When an XML parser would append a node to a template element, it must instead append it to the template element's template contents (a DocumentFragment node). MUST(static_cast(*m_current_node).content()->append_child(node)); } else { MUST(m_current_node->append_child(node)); } for (auto const& attribute : attributes) { // https://www.w3.org/TR/2006/REC-xml-names11-20060816/#ns-decl if (attribute.key == "xmlns" || attribute.key.starts_with("xmlns:"sv)) { auto name = attribute.key; if (!name.is_one_of("xmlns:"sv, "xmlns:xmlns"sv)) { // The prefix xmlns is used only to declare namespace bindings and is by definition bound to the namespace name http://www.w3.org/2000/xmlns/. MUST(node->set_attribute_ns(Namespace::XMLNS, MUST(FlyString::from_deprecated_fly_string(name)), MUST(String::from_byte_string(attribute.value)))); } else { m_has_error = true; } } else if (attribute.key.contains(":"sv)) { if (!attribute.key.starts_with("xml:"sv)) { m_has_error = true; } } MUST(node->set_attribute(MUST(FlyString::from_deprecated_fly_string(attribute.key)), MUST(String::from_byte_string(attribute.value)))); } m_current_node = node.ptr(); } void XMLDocumentBuilder::element_end(const XML::Name& name) { if (m_has_error) return; if (--m_namespace_stack.last().depth == 0) { m_namespace = m_namespace_stack.take_last().ns; } VERIFY(m_current_node->node_name().equals_ignoring_ascii_case(name)); // When an XML parser with XML scripting support enabled creates a script element, [...] // When the element's end tag is subsequently parsed, if (m_scripting_support == XMLScriptingSupport::Enabled && m_current_node->is_html_script_element()) { // the user agent must perform a microtask checkpoint, HTML::perform_a_microtask_checkpoint(); // and then prepare the script element. auto& script_element = static_cast(*m_current_node); script_element.prepare_script(Badge {}); // If this causes there to be a pending parsing-blocking script, then the user agent must run the following steps: if (auto pending_parsing_blocking_script = m_document->pending_parsing_blocking_script()) { // 1. Block this instance of the XML parser, such that the event loop will not run tasks that invoke it. // NOTE: Noop. // 2. Spin the event loop until the parser's Document has no style sheet that is blocking scripts and the pending parsing-blocking script's "ready to be parser-executed" flag is set. if (m_document->has_a_style_sheet_that_is_blocking_scripts() || !pending_parsing_blocking_script->is_ready_to_be_parser_executed()) { HTML::main_thread_event_loop().spin_until([&] { return !m_document->has_a_style_sheet_that_is_blocking_scripts() && pending_parsing_blocking_script->is_ready_to_be_parser_executed(); }); } // 3. Unblock this instance of the XML parser, such that tasks that invoke it can again be run. // NOTE: Noop. // 4. Execute the script element given by the pending parsing-blocking script. pending_parsing_blocking_script->execute_script(); // 5. Set the pending parsing-blocking script to null. m_document->set_pending_parsing_blocking_script(nullptr); } } else if (m_scripting_support == XMLScriptingSupport::Enabled && m_current_node->is_svg_script_element()) { // https://www.w3.org/TR/SVGMobile12/struct.html#ProgressiveRendering // When an end element event occurs for a 'script' element, that element is processed according to the // Script processing section of the Scripting chapter. Further parsing of the document will be blocked // until processing of the 'script' is complete. auto& script_element = static_cast(*m_current_node); script_element.process_the_script_element(); }; m_current_node = m_current_node->parent_node(); } void XMLDocumentBuilder::text(StringView data) { if (m_has_error) return; auto last = m_current_node->last_child(); if (last && last->is_text()) { auto& text_node = static_cast(*last); text_builder.append(text_node.data()); text_builder.append(data); text_node.set_data(MUST(text_builder.to_string())); text_builder.clear(); } else { if (!data.is_empty()) { auto node = m_document->create_text_node(MUST(String::from_utf8(data))); MUST(m_current_node->append_child(node)); } } } void XMLDocumentBuilder::comment(StringView data) { if (m_has_error) return; MUST(m_document->append_child(m_document->create_comment(MUST(String::from_utf8(data))))); } void XMLDocumentBuilder::document_end() { // When an XML parser reaches the end of its input, it must stop parsing. // If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. // NOTE: Noop. // Set the insertion point to undefined. m_current_node = nullptr; // Update the current document readiness to "interactive". m_document->update_readiness(HTML::DocumentReadyState::Interactive); // Pop all the nodes off the stack of open elements. // NOTE: Noop. // While the list of scripts that will execute when the document has finished parsing is not empty: while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) { // Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing has its "ready to be parser-executed" flag set // and the parser's Document has no style sheet that is blocking scripts. HTML::main_thread_event_loop().spin_until([&] { return m_document->scripts_to_execute_when_parsing_has_finished().first()->is_ready_to_be_parser_executed() && !m_document->has_a_style_sheet_that_is_blocking_scripts(); }); // Execute the first script in the list of scripts that will execute when the document has finished parsing. m_document->scripts_to_execute_when_parsing_has_finished().first()->execute_script(); // Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list). (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first(); } // Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps: queue_global_task(HTML::Task::Source::DOMManipulation, m_document, JS::create_heap_function(m_document->heap(), [document = m_document] { // Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object. document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document)); // Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true. auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded); content_loaded_event->set_bubbles(true); document->dispatch_event(content_loaded_event); // Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object. document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document)); // FIXME: Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object. // FIXME: Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL. })); // Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty. HTML::main_thread_event_loop().spin_until([&] { return m_document->scripts_to_execute_as_soon_as_possible().is_empty(); }); // Spin the event loop until there is nothing that delays the load event in the Document. HTML::main_thread_event_loop().spin_until([&] { return !m_document->anything_is_delaying_the_load_event(); }); // Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps: queue_global_task(HTML::Task::Source::DOMManipulation, m_document, JS::create_heap_function(m_document->heap(), [document = m_document] { // Update the current document readiness to "complete". document->update_readiness(HTML::DocumentReadyState::Complete); // If the Document object's browsing context is null, then abort these steps. if (!document->browsing_context()) return; // Let window be the Document's relevant global object. JS::NonnullGCPtr window = verify_cast(relevant_global_object(*document)); // Set the Document's load timing info's load event start time to the current high resolution time given window. document->load_timing_info().load_event_start_time = HighResolutionTime::current_high_resolution_time(window); // Fire an event named load at window, with legacy target override flag set. // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event() // We should reorganize this so that the flag appears explicitly here instead. window->dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load)); // FIXME: Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL. // FIXME: Set the Document object's navigation id to null. // Set the Document's load timing info's load event end time to the current high resolution time given window. document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::current_high_resolution_time(window); // Assert: Document's page showing is false. VERIFY(!document->page_showing()); // Set the Document's page showing flag to true. document->set_page_showing(true); // Fire a page transition event named pageshow at window with false. window->fire_a_page_transition_event(HTML::EventNames::pageshow, false); // Completely finish loading the Document. document->completely_finish_loading(); // FIXME: Queue the navigation timing entry for the Document. })); // FIXME: If the Document's print when loaded flag is set, then run the printing steps. // The Document is now ready for post-load tasks. m_document->set_ready_for_post_load_tasks(true); } } inline namespace { StringView s_xhtml_unified_dtd = R"xmlxmlxml( )xmlxmlxml"sv; }