/* * Copyright (c) 2020-2022, Andreas Kling * Copyright (c) 2021, Luke Wilde * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Web::HTML { static inline void log_parse_error(SourceLocation const& location = SourceLocation::current()) { dbgln_if(HTML_PARSER_DEBUG, "Parse error! {}", location); } static Vector s_quirks_public_ids = { "+//Silmaril//dtd html Pro v0r11 19970101//", "-//AS//DTD HTML 3.0 asWedit + extensions//", "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", "-//IETF//DTD HTML 2.0 Level 1//", "-//IETF//DTD HTML 2.0 Level 2//", "-//IETF//DTD HTML 2.0 Strict Level 1//", "-//IETF//DTD HTML 2.0 Strict Level 2//", "-//IETF//DTD HTML 2.0 Strict//", "-//IETF//DTD HTML 2.0//", "-//IETF//DTD HTML 2.1E//", "-//IETF//DTD HTML 3.0//", "-//IETF//DTD HTML 3.2 Final//", "-//IETF//DTD HTML 3.2//", "-//IETF//DTD HTML 3//", "-//IETF//DTD HTML Level 0//", "-//IETF//DTD HTML Level 1//", "-//IETF//DTD HTML Level 2//", "-//IETF//DTD HTML Level 3//", "-//IETF//DTD HTML Strict Level 0//", "-//IETF//DTD HTML Strict Level 1//", "-//IETF//DTD HTML Strict Level 2//", "-//IETF//DTD HTML Strict Level 3//", "-//IETF//DTD HTML Strict//", "-//IETF//DTD HTML//", "-//Metrius//DTD Metrius Presentational//", "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 2.0 HTML//", "-//Microsoft//DTD Internet Explorer 2.0 Tables//", "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 3.0 HTML//", "-//Microsoft//DTD Internet Explorer 3.0 Tables//", "-//Netscape Comm. Corp.//DTD HTML//", "-//Netscape Comm. Corp.//DTD Strict HTML//", "-//O'Reilly and Associates//DTD HTML 2.0//", "-//O'Reilly and Associates//DTD HTML Extended 1.0//", "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", "-//Spyglass//DTD HTML 2.0 Extended//", "-//Sun Microsystems Corp.//DTD HotJava HTML//", "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", "-//W3C//DTD HTML 3 1995-03-24//", "-//W3C//DTD HTML 3.2 Draft//", "-//W3C//DTD HTML 3.2 Final//", "-//W3C//DTD HTML 3.2//", "-//W3C//DTD HTML 3.2S Draft//", "-//W3C//DTD HTML 4.0 Frameset//", "-//W3C//DTD HTML 4.0 Transitional//", "-//W3C//DTD HTML Experimental 19960712//", "-//W3C//DTD HTML Experimental 970421//", "-//W3C//DTD W3 HTML//", "-//W3O//DTD W3 HTML 3.0//", "-//WebTechs//DTD Mozilla HTML 2.0//", "-//WebTechs//DTD Mozilla HTML//" }; // https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point static bool is_mathml_text_integration_point(DOM::Element const&) { // FIXME: Implement. return false; } // https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point static bool is_html_integration_point(DOM::Element const& element) { // A node is an HTML integration point if it is one of the following elements: // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html" // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml" // An SVG foreignObject element // An SVG desc element // An SVG title element if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title)) return true; return false; } HTMLParser::HTMLParser(DOM::Document& document, StringView input, DeprecatedString const& encoding) : m_tokenizer(input, encoding) , m_scripting_enabled(document.is_scripting_enabled()) , m_document(JS::make_handle(document)) { m_tokenizer.set_parser({}, *this); m_document->set_parser({}, *this); auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); VERIFY(standardized_encoding.has_value()); m_document->set_encoding(standardized_encoding.value()); } HTMLParser::HTMLParser(DOM::Document& document) : m_scripting_enabled(document.is_scripting_enabled()) , m_document(JS::make_handle(document)) { m_document->set_parser({}, *this); m_tokenizer.set_parser({}, *this); } HTMLParser::~HTMLParser() { } void HTMLParser::visit_edges(Cell::Visitor& visitor) { Base::visit_edges(visitor); visitor.visit(m_document); visitor.visit(m_head_element); visitor.visit(m_form_element); visitor.visit(m_context_element); visitor.visit(m_character_insertion_node); m_stack_of_open_elements.visit_edges(visitor); m_list_of_active_formatting_elements.visit_edges(visitor); } void HTMLParser::run() { for (;;) { // FIXME: Find a better way to say that we come from Document::close() and want to process EOF. if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached()) return; auto optional_token = m_tokenizer.next_token(); if (!optional_token.has_value()) break; auto& token = optional_token.value(); dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_deprecated_string()); // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher // As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher: if (m_stack_of_open_elements.is_empty() || adjusted_current_node().namespace_() == Namespace::HTML || (is_html_integration_point(adjusted_current_node()) && (token.is_start_tag() || token.is_character())) || token.is_end_of_file()) { // -> If the stack of open elements is empty // -> If the adjusted current node is an element in the HTML namespace // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark" // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a character token // FIXME: -> If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg" // -> If the adjusted current node is an HTML integration point and the token is a start tag // -> If the adjusted current node is an HTML integration point and the token is a character token // -> If the token is an end-of-file token // Process the token according to the rules given in the section corresponding to the current insertion mode in HTML content. process_using_the_rules_for(m_insertion_mode, token); } else { // -> Otherwise // Process the token according to the rules given in the section for parsing tokens in foreign content. process_using_the_rules_for_foreign_content(token); } if (m_stop_parsing) { dbgln_if(HTML_PARSER_DEBUG, "Stop parsing{}! :^)", m_parsing_fragment ? " fragment" : ""); break; } } flush_character_insertions(); } void HTMLParser::run(const AK::URL& url) { m_document->set_url(url); m_document->set_source(m_tokenizer.source()); run(); the_end(); m_document->detach_parser({}); } // https://html.spec.whatwg.org/multipage/parsing.html#the-end void HTMLParser::the_end() { // Once the user agent stops parsing the document, the user agent must run the following steps: // FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. // 2. Set the insertion point to undefined. m_tokenizer.undefine_insertion_point(); // 3. Update the current document readiness to "interactive". m_document->update_readiness(HTML::DocumentReadyState::Interactive); // 4. Pop all the nodes off the stack of open elements. while (!m_stack_of_open_elements.is_empty()) (void)m_stack_of_open_elements.pop(); // 5. While the list of scripts that will execute when the document has finished parsing is not empty: while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) { // 1. Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing // has its "ready to be parser-executed" flag set and the parser's Document has no style sheet that is blocking scripts. main_thread_event_loop().spin_until([&] { return m_document->scripts_to_execute_when_parsing_has_finished().first()->is_ready_to_be_parser_executed() && !m_document->has_a_style_sheet_that_is_blocking_scripts(); }); // 2. Execute the first script in the list of scripts that will execute when the document has finished parsing. m_document->scripts_to_execute_when_parsing_has_finished().first()->execute_script(); // 3. Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list). (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first(); } // 6. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps: old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] { // 1. Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object. document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::unsafe_shared_current_time(); // 2. Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true. auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded).release_value_but_fixme_should_propagate_errors(); content_loaded_event->set_bubbles(true); document->dispatch_event(content_loaded_event); // 3. Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object. document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::unsafe_shared_current_time(); // FIXME: 4. Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object. // FIXME: 5. Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL. }); // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty. main_thread_event_loop().spin_until([&] { return m_document->scripts_to_execute_as_soon_as_possible().is_empty(); }); // 8. Spin the event loop until there is nothing that delays the load event in the Document. // FIXME: Track down all the things that are supposed to delay the load event. main_thread_event_loop().spin_until([&] { return m_document->number_of_things_delaying_the_load_event() == 0; }); // 9. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps: old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] { // 1. Update the current document readiness to "complete". document->update_readiness(HTML::DocumentReadyState::Complete); // 2. If the Document object's browsing context is null, then abort these steps. if (!document->browsing_context()) return; // 3. Let window be the Document's relevant global object. JS::NonnullGCPtr window = document->window(); // 4. Set the Document's load timing info's load event start time to the current high resolution time given window. document->load_timing_info().load_event_start_time = HighResolutionTime::unsafe_shared_current_time(); // 5. Fire an event named load at window, with legacy target override flag set. // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event() // We should reorganize this so that the flag appears explicitly here instead. window->dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load).release_value_but_fixme_should_propagate_errors()); // FIXME: 6. Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL. // FIXME: 7. Set the Document object's navigation id to null. // 8. Set the Document's load timing info's load event end time to the current high resolution time given window. document->load_timing_info().load_event_end_time = HighResolutionTime::unsafe_shared_current_time(); // 9. Assert: Document's page showing is false. VERIFY(!document->page_showing()); // 10. Set the Document's page showing flag to true. document->set_page_showing(true); // 11. Fire a page transition event named pageshow at window with false. window->fire_a_page_transition_event(HTML::EventNames::pageshow, false); // 12. Completely finish loading the Document. document->completely_finish_loading(); // FIXME: 13. Queue the navigation timing entry for the Document. }); // FIXME: 10. If the Document's print when loaded flag is set, then run the printing steps. // 11. The Document is now ready for post-load tasks. m_document->set_ready_for_post_load_tasks(true); } void HTMLParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token) { switch (mode) { case InsertionMode::Initial: handle_initial(token); break; case InsertionMode::BeforeHTML: handle_before_html(token); break; case InsertionMode::BeforeHead: handle_before_head(token); break; case InsertionMode::InHead: handle_in_head(token); break; case InsertionMode::InHeadNoscript: handle_in_head_noscript(token); break; case InsertionMode::AfterHead: handle_after_head(token); break; case InsertionMode::InBody: handle_in_body(token); break; case InsertionMode::AfterBody: handle_after_body(token); break; case InsertionMode::AfterAfterBody: handle_after_after_body(token); break; case InsertionMode::Text: handle_text(token); break; case InsertionMode::InTable: handle_in_table(token); break; case InsertionMode::InTableBody: handle_in_table_body(token); break; case InsertionMode::InRow: handle_in_row(token); break; case InsertionMode::InCell: handle_in_cell(token); break; case InsertionMode::InTableText: handle_in_table_text(token); break; case InsertionMode::InSelectInTable: handle_in_select_in_table(token); break; case InsertionMode::InSelect: handle_in_select(token); break; case InsertionMode::InCaption: handle_in_caption(token); break; case InsertionMode::InColumnGroup: handle_in_column_group(token); break; case InsertionMode::InTemplate: handle_in_template(token); break; case InsertionMode::InFrameset: handle_in_frameset(token); break; case InsertionMode::AfterFrameset: handle_after_frameset(token); break; case InsertionMode::AfterAfterFrameset: handle_after_after_frameset(token); break; default: VERIFY_NOT_REACHED(); } } DOM::QuirksMode HTMLParser::which_quirks_mode(HTMLToken const& doctype_token) const { if (doctype_token.doctype_data().force_quirks) return DOM::QuirksMode::Yes; // NOTE: The tokenizer puts the name into lower case for us. if (doctype_token.doctype_data().name != "html") return DOM::QuirksMode::Yes; auto const& public_identifier = doctype_token.doctype_data().public_identifier; auto const& system_identifier = doctype_token.doctype_data().system_identifier; if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"sv)) return DOM::QuirksMode::Yes; if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN"sv)) return DOM::QuirksMode::Yes; if (public_identifier.equals_ignoring_case("HTML"sv)) return DOM::QuirksMode::Yes; if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"sv)) return DOM::QuirksMode::Yes; for (auto& public_id : s_quirks_public_ids) { if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; } if (doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; } if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (!doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; } return DOM::QuirksMode::No; } void HTMLParser::handle_initial(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { return; } if (token.is_comment()) { auto comment = realm().heap().allocate(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); MUST(document().append_child(*comment)); return; } if (token.is_doctype()) { auto doctype = realm().heap().allocate(realm(), document()).release_allocated_value_but_fixme_should_propagate_errors(); doctype->set_name(token.doctype_data().name); doctype->set_public_id(token.doctype_data().public_identifier); doctype->set_system_id(token.doctype_data().system_identifier); MUST(document().append_child(*doctype)); document().set_quirks_mode(which_quirks_mode(token)); m_insertion_mode = InsertionMode::BeforeHTML; return; } log_parse_error(); document().set_quirks_mode(DOM::QuirksMode::Yes); m_insertion_mode = InsertionMode::BeforeHTML; process_using_the_rules_for(InsertionMode::BeforeHTML, token); } // https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode void HTMLParser::handle_before_html(HTMLToken& token) { // -> A DOCTYPE token if (token.is_doctype()) { // Parse error. Ignore the token. log_parse_error(); return; } // -> A comment token if (token.is_comment()) { // Insert a comment as the last child of the Document object. auto comment = realm().heap().allocate(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); MUST(document().append_child(*comment)); return; } // -> A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE if (token.is_character() && token.is_parser_whitespace()) { // Ignore the token. return; } // -> A start tag whose tag name is "html" if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { // Create an element for the token in the HTML namespace, with the Document as the intended parent. Append it to the Document object. Put this element in the stack of open elements. auto element = create_element_for(token, Namespace::HTML, document()); MUST(document().append_child(*element)); m_stack_of_open_elements.push(move(element)); // Switch the insertion mode to "before head". m_insertion_mode = InsertionMode::BeforeHead; return; } // -> An end tag whose tag name is one of: "head", "body", "html", "br" if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { // Act as described in the "anything else" entry below. goto AnythingElse; } // -> Any other end tag if (token.is_end_tag()) { // Parse error. Ignore the token. log_parse_error(); return; } // -> Anything else AnythingElse: // Create an html element whose node document is the Document object. Append it to the Document object. Put this element in the stack of open elements. auto element = create_element(document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); MUST(document().append_child(element)); m_stack_of_open_elements.push(element); // Switch the insertion mode to "before head", then reprocess the token. m_insertion_mode = InsertionMode::BeforeHead; process_using_the_rules_for(InsertionMode::BeforeHead, token); return; } DOM::Element& HTMLParser::current_node() { return m_stack_of_open_elements.current_node(); } DOM::Element& HTMLParser::adjusted_current_node() { if (m_parsing_fragment && m_stack_of_open_elements.elements().size() == 1) return *m_context_element; return current_node(); } DOM::Element& HTMLParser::node_before_current_node() { return *m_stack_of_open_elements.elements().at(m_stack_of_open_elements.elements().size() - 2); } // https://html.spec.whatwg.org/multipage/parsing.html#appropriate-place-for-inserting-a-node HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node(JS::GCPtr override_target) { auto& target = override_target ? *override_target.ptr() : current_node(); HTMLParser::AdjustedInsertionLocation adjusted_insertion_location; // 2. Determine the adjusted insertion location using the first matching steps from the following list: // `-> If foster parenting is enabled and target is a table, tbody, tfoot, thead, or tr element if (m_foster_parenting && target.local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) { // 1. Let last template be the last template element in the stack of open elements, if any. auto last_template = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::template_); // 2. Let last table be the last table element in the stack of open elements, if any. auto last_table = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::table); // 3. If there is a last template and either there is no last table, // or there is one, but last template is lower (more recently added) than last table in the stack of open elements, if (last_template.element && (!last_table.element || last_template.index > last_table.index)) { // then: let adjusted insertion location be inside last template's template contents, after its last child (if any), and abort these steps. // NOTE: This returns the template content, so no need to check the parent is a template. return { verify_cast(*last_template.element).content().ptr(), nullptr }; } // 4. If there is no last table, then let adjusted insertion location be inside the first element in the stack of open elements (the html element), // after its last child (if any), and abort these steps. (fragment case) if (!last_table.element) { VERIFY(m_parsing_fragment); // Guaranteed not to be a template element (it will be the html element), // so no need to check the parent is a template. return { *m_stack_of_open_elements.elements().first(), nullptr }; } // 5. If last table has a parent node, then let adjusted insertion location be inside last table's parent node, immediately before last table, and abort these steps. if (last_table.element->parent_node()) { adjusted_insertion_location = { last_table.element->parent_node(), last_table.element.ptr() }; } else { // 6. Let previous element be the element immediately above last table in the stack of open elements. auto previous_element = m_stack_of_open_elements.element_immediately_above(*last_table.element); // 7. Let adjusted insertion location be inside previous element, after its last child (if any). adjusted_insertion_location = { previous_element.ptr(), nullptr }; } } else { // `-> Otherwise // Let adjusted insertion location be inside target, after its last child (if any). adjusted_insertion_location = { target, nullptr }; } if (is(*adjusted_insertion_location.parent)) return { verify_cast(*adjusted_insertion_location.parent).content().ptr(), nullptr }; return adjusted_insertion_location; } JS::NonnullGCPtr HTMLParser::create_element_for(HTMLToken const& token, DeprecatedFlyString const& namespace_, DOM::Node const& intended_parent) { // FIXME: 1. If the active speculative HTML parser is not null, then return the result of creating a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token. // FIXME: 2. Otherwise, optionally create a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token. // 3. Let document be intended parent's node document. JS::NonnullGCPtr document = intended_parent.document(); // 4. Let local name be the tag name of the token. auto local_name = token.tag_name(); // FIXME: 5. Let is be the value of the "is" attribute in the given token, if such an attribute exists, or null otherwise. // FIXME: 6. Let definition be the result of looking up a custom element definition given document, given namespace, local name, and is. // FIXME: 7. If definition is non-null and the parser was not created as part of the HTML fragment parsing algorithm, then let will execute script be true. Otherwise, let it be false. // FIXME: 8. If will execute script is true, then: // FIXME: 1. Increment document's throw-on-dynamic-markup-insertion counter. // FIXME: 2. If the JavaScript execution context stack is empty, then perform a microtask checkpoint. // FIXME: 3. Push a new element queue onto document's relevant agent's custom element reactions stack. // 9. Let element be the result of creating an element given document, localName, given namespace, null, and is. // FIXME: If will execute script is true, set the synchronous custom elements flag; otherwise, leave it unset. // FIXME: Pass in `null` and `is`. auto element = create_element(*document, local_name, namespace_).release_value_but_fixme_should_propagate_errors(); // 10. Append each attribute in the given token to element. // FIXME: This isn't the exact `append` the spec is talking about. token.for_each_attribute([&](auto& attribute) { MUST(element->set_attribute(attribute.local_name, attribute.value)); return IterationDecision::Continue; }); // FIXME: 11. If will execute script is true, then: // FIXME: 1. Let queue be the result of popping from document's relevant agent's custom element reactions stack. (This will be the same element queue as was pushed above.) // FIXME: 2. Invoke custom element reactions in queue. // FIXME: 3. Decrement document's throw-on-dynamic-markup-insertion counter. // FIXME: 12. If element has an xmlns attribute in the XMLNS namespace whose value is not exactly the same as the element's namespace, that is a parse error. // Similarly, if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not the XLink Namespace, that is a parse error. // FIXME: 13. If element is a resettable element, invoke its reset algorithm. (This initializes the element's value and checkedness based on the element's attributes.) // 14. If element is a form-associated element and not a form-associated custom element, the form element pointer is not null, there is no template element on the stack of open elements, // element is either not listed or doesn't have a form attribute, and the intended parent is in the same tree as the element pointed to by the form element pointer, // then associate element with the form element pointed to by the form element pointer and set element's parser inserted flag. // FIXME: Check if the element is not a form-associated custom element. if (is(*element)) { auto* form_associated_element = dynamic_cast(element.ptr()); VERIFY(form_associated_element); auto& html_element = form_associated_element->form_associated_element_to_html_element(); if (m_form_element.ptr() && !m_stack_of_open_elements.contains(HTML::TagNames::template_) && (!form_associated_element->is_listed() || !html_element.has_attribute(HTML::AttributeNames::form)) && &intended_parent.root() == &m_form_element->root()) { form_associated_element->set_form(m_form_element.ptr()); form_associated_element->set_parser_inserted({}); } } // 15. Return element. return element; } // https://html.spec.whatwg.org/multipage/parsing.html#insert-a-foreign-element JS::NonnullGCPtr HTMLParser::insert_foreign_element(HTMLToken const& token, DeprecatedFlyString const& namespace_) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); // NOTE: adjusted_insertion_location.parent will be non-null, however, it uses RP to be able to default-initialize HTMLParser::AdjustedInsertionLocation. auto element = create_element_for(token, namespace_, *adjusted_insertion_location.parent); auto pre_insertion_validity = adjusted_insertion_location.parent->ensure_pre_insertion_validity(*element, adjusted_insertion_location.insert_before_sibling); // NOTE: If it's not possible to insert the element at the adjusted insertion location, the element is simply dropped. if (!pre_insertion_validity.is_exception()) { if (!m_parsing_fragment) { // FIXME: push a new element queue onto element's relevant agent's custom element reactions stack. } adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling); if (!m_parsing_fragment) { // FIXME: pop the element queue from element's relevant agent's custom element reactions stack, and invoke custom element reactions in that queue. } } m_stack_of_open_elements.push(element); return element; } JS::NonnullGCPtr HTMLParser::insert_html_element(HTMLToken const& token) { return insert_foreign_element(token, Namespace::HTML); } void HTMLParser::handle_before_head(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::head) { auto element = insert_html_element(token); m_head_element = JS::make_handle(verify_cast(*element)); m_insertion_mode = InsertionMode::InHead; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if (token.is_end_tag()) { log_parse_error(); return; } AnythingElse: m_head_element = JS::make_handle(verify_cast(*insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::head)))); m_insertion_mode = InsertionMode::InHead; process_using_the_rules_for(InsertionMode::InHead, token); return; } void HTMLParser::insert_comment(HTMLToken& token) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); adjusted_insertion_location.parent->insert_before(realm().heap().allocate(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(), adjusted_insertion_location.insert_before_sibling); } void HTMLParser::handle_in_head(HTMLToken& token) { if (token.is_parser_whitespace()) { insert_character(token.code_point()); return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link)) { (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::meta) { (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::title) { (void)insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; return; } if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled) || token.tag_name() == HTML::TagNames::noframes || token.tag_name() == HTML::TagNames::style)) { parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noscript && !m_scripting_enabled) { (void)insert_html_element(token); m_insertion_mode = InsertionMode::InHeadNoscript; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent); auto& script_element = verify_cast(*element); script_element.set_parser_document(Badge {}, document()); script_element.set_force_async(Badge {}, false); script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line if (m_parsing_fragment) { script_element.set_already_started(Badge {}, true); } if (m_invoked_via_document_write) { TODO(); } adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling, false); m_stack_of_open_elements.push(element); m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::head) { (void)m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::AfterHead; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) { (void)insert_html_element(token); m_list_of_active_formatting_elements.add_marker(); m_frameset_ok = false; m_insertion_mode = InsertionMode::InTemplate; m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { log_parse_error(); return; } generate_all_implied_end_tags_thoroughly(); if (current_node().local_name() != HTML::TagNames::template_) log_parse_error(); m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_); m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); m_stack_of_template_insertion_modes.take_last(); reset_the_insertion_mode_appropriately(); return; } if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { log_parse_error(); return; } AnythingElse: (void)m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::AfterHead; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::handle_in_head_noscript(HTMLToken& token) { if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::noscript) { (void)m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::InHead; return; } if (token.is_parser_whitespace() || token.is_comment() || (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::style))) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { goto AnythingElse; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::noscript)) { log_parse_error(); return; } AnythingElse: log_parse_error(); (void)m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::InHead; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::parse_generic_raw_text_element(HTMLToken& token) { (void)insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; } DOM::Text* HTMLParser::find_character_insertion_node() { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); if (adjusted_insertion_location.insert_before_sibling) { TODO(); } if (adjusted_insertion_location.parent->is_document()) return nullptr; if (adjusted_insertion_location.parent->last_child() && adjusted_insertion_location.parent->last_child()->is_text()) return verify_cast(adjusted_insertion_location.parent->last_child()); auto new_text_node = realm().heap().allocate(realm(), document(), "").release_allocated_value_but_fixme_should_propagate_errors(); MUST(adjusted_insertion_location.parent->append_child(*new_text_node)); return new_text_node; } void HTMLParser::flush_character_insertions() { if (m_character_insertion_builder.is_empty()) return; m_character_insertion_node->set_data(m_character_insertion_builder.to_deprecated_string()); m_character_insertion_node->parent()->children_changed(); m_character_insertion_builder.clear(); } void HTMLParser::insert_character(u32 data) { auto node = find_character_insertion_node(); if (node == m_character_insertion_node.ptr()) { m_character_insertion_builder.append(Utf32View { &data, 1 }); return; } if (!m_character_insertion_node.ptr()) { m_character_insertion_node = JS::make_handle(node); m_character_insertion_builder.append(Utf32View { &data, 1 }); return; } flush_character_insertions(); m_character_insertion_node = JS::make_handle(node); m_character_insertion_builder.append(Utf32View { &data, 1 }); } void HTMLParser::handle_after_head(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { insert_character(token.code_point()); return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { (void)insert_html_element(token); m_frameset_ok = false; m_insertion_mode = InsertionMode::InBody; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { (void)insert_html_element(token); m_insertion_mode = InsertionMode::InFrameset; return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { log_parse_error(); m_stack_of_open_elements.push(*m_head_element); process_using_the_rules_for(InsertionMode::InHead, token); m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == m_head_element.ptr(); }); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { log_parse_error(); return; } AnythingElse: (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::body)); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::generate_implied_end_tags(DeprecatedFlyString const& exception) { while (current_node().local_name() != exception && current_node().local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc)) (void)m_stack_of_open_elements.pop(); } void HTMLParser::generate_all_implied_end_tags_thoroughly() { while (current_node().local_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) (void)m_stack_of_open_elements.pop(); } void HTMLParser::close_a_p_element() { generate_implied_end_tags(HTML::TagNames::p); if (current_node().local_name() != HTML::TagNames::p) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::p); } void HTMLParser::handle_after_body(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_comment()) { auto& insertion_location = m_stack_of_open_elements.first(); MUST(insertion_location.append_child(realm().heap().allocate(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors())); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { if (m_parsing_fragment) { log_parse_error(); return; } m_insertion_mode = InsertionMode::AfterAfterBody; return; } if (token.is_end_of_file()) { stop_parsing(); return; } log_parse_error(); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(InsertionMode::InBody, token); } void HTMLParser::handle_after_after_body(HTMLToken& token) { if (token.is_comment()) { auto comment = realm().heap().allocate(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); MUST(document().append_child(*comment)); return; } if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_of_file()) { stop_parsing(); return; } log_parse_error(); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(m_insertion_mode, token); } // https://html.spec.whatwg.org/multipage/parsing.html#reconstruct-the-active-formatting-elements void HTMLParser::reconstruct_the_active_formatting_elements() { // 1. If there are no entries in the list of active formatting elements, then there is nothing to reconstruct; stop this algorithm. if (m_list_of_active_formatting_elements.is_empty()) return; // 2. If the last (most recently added) entry in the list of active formatting elements is a marker, or if it is an element that is in the stack of open elements, // then there is nothing to reconstruct; stop this algorithm. if (m_list_of_active_formatting_elements.entries().last().is_marker()) return; if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element)) return; // 3. Let entry be the last (most recently added) element in the list of active formatting elements. size_t index = m_list_of_active_formatting_elements.entries().size() - 1; // NOTE: Entry will never be null, but must be a pointer instead of a reference to allow rebinding. auto* entry = &m_list_of_active_formatting_elements.entries().at(index); Rewind: // 4. Rewind: If there are no entries before entry in the list of active formatting elements, then jump to the step labeled create. if (index == 0) goto Create; // 5. Let entry be the entry one earlier than entry in the list of active formatting elements. --index; entry = &m_list_of_active_formatting_elements.entries().at(index); // 6. If entry is neither a marker nor an element that is also in the stack of open elements, go to the step labeled rewind. if (!entry->is_marker() && !m_stack_of_open_elements.contains(*entry->element)) goto Rewind; Advance: // 7. Advance: Let entry be the element one later than entry in the list of active formatting elements. ++index; entry = &m_list_of_active_formatting_elements.entries().at(index); Create: // 8. Create: Insert an HTML element for the token for which the element entry was created, to obtain new element. VERIFY(!entry->is_marker()); // FIXME: Hold on to the real token! auto new_element = insert_html_element(HTMLToken::make_start_tag(entry->element->local_name())); // 9. Replace the entry for entry in the list with an entry for new element. m_list_of_active_formatting_elements.entries().at(index).element = JS::make_handle(new_element); // 10. If the entry for new element in the list of active formatting elements is not the last entry in the list, return to the step labeled advance. if (index != m_list_of_active_formatting_elements.entries().size() - 1) goto Advance; } // https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm HTMLParser::AdoptionAgencyAlgorithmOutcome HTMLParser::run_the_adoption_agency_algorithm(HTMLToken& token) { // 1. Let subject be token's tag name. auto& subject = token.tag_name(); // 2. If the current node is an HTML element whose tag name is subject, // and the current node is not in the list of active formatting elements, // then pop the current node off the stack of open elements, and return. if (current_node().local_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { (void)m_stack_of_open_elements.pop(); return AdoptionAgencyAlgorithmOutcome::DoNothing; } // 3. Let outer loop counter be 0. size_t outer_loop_counter = 0; // 4. While true: while (true) { // 1. If outer loop counter is greater than or equal to 8, then return. if (outer_loop_counter >= 8) return AdoptionAgencyAlgorithmOutcome::DoNothing; // 2. Increment outer loop counter by 1. outer_loop_counter++; // 3. Let formatting element be the last element in the list of active formatting elements that: // - is between the end of the list and the last marker in the list, if any, or the start of the list otherwise, and // - has the tag name subject. auto* formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); // If there is no such element, then return and instead act as described in the "any other end tag" entry above. if (!formatting_element) return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps; // 4. If formatting element is not in the stack of open elements, if (!m_stack_of_open_elements.contains(*formatting_element)) { // then this is a parse error; log_parse_error(); // remove the element from the list, m_list_of_active_formatting_elements.remove(*formatting_element); // and return. return AdoptionAgencyAlgorithmOutcome::DoNothing; } // 5. If formatting element is in the stack of open elements, but the element is not in scope, if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { // then this is a parse error; log_parse_error(); // return. return AdoptionAgencyAlgorithmOutcome::DoNothing; } // 6. If formatting element is not the current node, if (formatting_element != ¤t_node()) { // this is a parse error. (But do not return.) log_parse_error(); } // 7. Let furthest block be the topmost node in the stack of open elements that is lower in the stack than formatting element, // and is an element in the special category. There might not be one. JS::GCPtr furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element); // 8. If there is no furthest block if (!furthest_block) { // then the UA must first pop all the nodes from the bottom of the stack of open elements, // from the current node up to and including formatting element, while (¤t_node() != formatting_element) (void)m_stack_of_open_elements.pop(); (void)m_stack_of_open_elements.pop(); // then remove formatting element from the list of active formatting elements, m_list_of_active_formatting_elements.remove(*formatting_element); // and finally return. return AdoptionAgencyAlgorithmOutcome::DoNothing; } // 9. Let common ancestor be the element immediately above formatting element in the stack of open elements. auto common_ancestor = m_stack_of_open_elements.element_immediately_above(*formatting_element); // 10. Let a bookmark note the position of formatting element in the list of active formatting elements // relative to the elements on either side of it in the list. auto bookmark = m_list_of_active_formatting_elements.find_index(*formatting_element).value(); // 11. Let node and last node be furthest block. auto node = furthest_block; auto last_node = furthest_block; // Keep track of this for later auto node_above_node = m_stack_of_open_elements.element_immediately_above(*node); // 12. Let inner loop counter be 0. size_t inner_loop_counter = 0; // 13. While true: while (true) { // 1. Increment inner loop counter by 1. inner_loop_counter++; // 2. Let node be the element immediately above node in the stack of open elements, // or if node is no longer in the stack of open elements (e.g. because it got removed by this algorithm), // the element that was immediately above node in the stack of open elements before node was removed. node = node_above_node; VERIFY(node); // Keep track of this for later node_above_node = m_stack_of_open_elements.element_immediately_above(*node); // 3. If node is formatting element, then break. if (node.ptr() == formatting_element) break; // 4. If inner loop counter is greater than 3 and node is in the list of active formatting elements, if (inner_loop_counter > 3 && m_list_of_active_formatting_elements.contains(*node)) { auto node_index = m_list_of_active_formatting_elements.find_index(*node); if (node_index.has_value() && node_index.value() < bookmark) bookmark--; // then remove node from the list of active formatting elements. m_list_of_active_formatting_elements.remove(*node); } // 5. If node is not in the list of active formatting elements if (!m_list_of_active_formatting_elements.contains(*node)) { // then remove node from the stack of open elements and continue. m_stack_of_open_elements.remove(*node); continue; } // 6. Create an element for the token for which the element node was created, // in the HTML namespace, with common ancestor as the intended parent; // FIXME: hold onto the real token auto element = create_element_for(HTMLToken::make_start_tag(node->local_name()), Namespace::HTML, *common_ancestor); // replace the entry for node in the list of active formatting elements with an entry for the new element, m_list_of_active_formatting_elements.replace(*node, *element); // replace the entry for node in the stack of open elements with an entry for the new element, m_stack_of_open_elements.replace(*node, element); // and let node be the new element. node = element; // 7. If last node is furthest block, if (last_node == furthest_block) { // then move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. bookmark = m_list_of_active_formatting_elements.find_index(*node).value() + 1; } // 8. Append last node to node. MUST(node->append_child(*last_node)); // 9. Set last node to node. last_node = node; } // 14. Insert whatever last node ended up being in the previous step at the appropriate place for inserting a node, // but using common ancestor as the override target. auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(common_ancestor); adjusted_insertion_location.parent->insert_before(*last_node, adjusted_insertion_location.insert_before_sibling, false); // 15. Create an element for the token for which formatting element was created, // in the HTML namespace, with furthest block as the intended parent. // FIXME: hold onto the real token auto element = create_element_for(HTMLToken::make_start_tag(formatting_element->local_name()), Namespace::HTML, *furthest_block); // 16. Take all of the child nodes of furthest block and append them to the element created in the last step. for (auto& child : furthest_block->children_as_vector()) MUST(element->append_child(furthest_block->remove_child(*child).release_value())); // 17. Append that new element to furthest block. MUST(furthest_block->append_child(*element)); // 18. Remove formatting element from the list of active formatting elements, // and insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. auto formatting_element_index = m_list_of_active_formatting_elements.find_index(*formatting_element); if (formatting_element_index.has_value() && formatting_element_index.value() < bookmark) bookmark--; m_list_of_active_formatting_elements.remove(*formatting_element); m_list_of_active_formatting_elements.insert_at(bookmark, *element); // 19. Remove formatting element from the stack of open elements, and insert the new element // into the stack of open elements immediately below the position of furthest block in that stack. m_stack_of_open_elements.remove(*formatting_element); m_stack_of_open_elements.insert_immediately_below(*element, *furthest_block); } } bool HTMLParser::is_special_tag(DeprecatedFlyString const& tag_name, DeprecatedFlyString const& namespace_) { if (namespace_ == Namespace::HTML) { return tag_name.is_one_of( HTML::TagNames::address, HTML::TagNames::applet, HTML::TagNames::area, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::blockquote, HTML::TagNames::body, HTML::TagNames::br, HTML::TagNames::button, HTML::TagNames::caption, HTML::TagNames::center, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::details, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::dt, HTML::TagNames::embed, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::form, HTML::TagNames::frame, HTML::TagNames::frameset, HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6, HTML::TagNames::head, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::hr, HTML::TagNames::html, HTML::TagNames::iframe, HTML::TagNames::img, HTML::TagNames::input, HTML::TagNames::keygen, HTML::TagNames::li, HTML::TagNames::link, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::marquee, HTML::TagNames::menu, HTML::TagNames::meta, HTML::TagNames::nav, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::noscript, HTML::TagNames::object, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::param, HTML::TagNames::plaintext, HTML::TagNames::pre, HTML::TagNames::script, HTML::TagNames::section, HTML::TagNames::select, HTML::TagNames::source, HTML::TagNames::style, HTML::TagNames::summary, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::template_, HTML::TagNames::textarea, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::title, HTML::TagNames::tr, HTML::TagNames::track, HTML::TagNames::ul, HTML::TagNames::wbr, HTML::TagNames::xmp); } else if (namespace_ == Namespace::SVG) { return tag_name.is_one_of( SVG::TagNames::desc, SVG::TagNames::foreignObject, SVG::TagNames::title); } else if (namespace_ == Namespace::MathML) { TODO(); } return false; } void HTMLParser::handle_in_body(HTMLToken& token) { if (token.is_character()) { if (token.code_point() == 0) { log_parse_error(); return; } if (token.is_parser_whitespace()) { reconstruct_the_active_formatting_elements(); insert_character(token.code_point()); return; } reconstruct_the_active_formatting_elements(); insert_character(token.code_point()); m_frameset_ok = false; return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { log_parse_error(); if (m_stack_of_open_elements.contains(HTML::TagNames::template_)) return; token.for_each_attribute([&](auto& attribute) { if (!current_node().has_attribute(attribute.local_name)) MUST(current_node().set_attribute(attribute.local_name, attribute.value)); return IterationDecision::Continue; }); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { log_parse_error(); if (m_stack_of_open_elements.elements().size() == 1 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body || m_stack_of_open_elements.contains(HTML::TagNames::template_)) { VERIFY(m_parsing_fragment); return; } m_frameset_ok = false; auto& body_element = m_stack_of_open_elements.elements().at(1); token.for_each_attribute([&](auto& attribute) { if (!body_element->has_attribute(attribute.local_name)) MUST(body_element->set_attribute(attribute.local_name, attribute.value)); return IterationDecision::Continue; }); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { log_parse_error(); if (m_stack_of_open_elements.elements().size() == 1 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body) { VERIFY(m_parsing_fragment); return; } if (!m_frameset_ok) return; TODO(); } if (token.is_end_of_file()) { if (!m_stack_of_template_insertion_modes.is_empty()) { process_using_the_rules_for(InsertionMode::InTemplate, token); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } stop_parsing(); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::body) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { log_parse_error(); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } m_insertion_mode = InsertionMode::AfterBody; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { log_parse_error(); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } m_insertion_mode = InsertionMode::AfterBody; process_using_the_rules_for(m_insertion_mode, token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); if (current_node().local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { log_parse_error(); (void)m_stack_of_open_elements.pop(); } (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::pre, HTML::TagNames::listing)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); m_frameset_ok = false; // If the next token is a U+000A LINE FEED (LF) character token, // then ignore that token and move on to the next one. // (Newlines at the start of pre blocks are ignored as an authoring convenience.) auto next_token = m_tokenizer.next_token(); if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) { if (m_form_element.ptr() && !m_stack_of_open_elements.contains(HTML::TagNames::template_)) { log_parse_error(); return; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); auto element = insert_html_element(token); if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) m_form_element = JS::make_handle(verify_cast(*element)); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::li) { m_frameset_ok = false; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { JS::GCPtr node = m_stack_of_open_elements.elements()[i].ptr(); if (node->local_name() == HTML::TagNames::li) { generate_implied_end_tags(HTML::TagNames::li); if (current_node().local_name() != HTML::TagNames::li) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); break; } if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) break; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { m_frameset_ok = false; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { JS::GCPtr node = m_stack_of_open_elements.elements()[i].ptr(); if (node->local_name() == HTML::TagNames::dd) { generate_implied_end_tags(HTML::TagNames::dd); if (current_node().local_name() != HTML::TagNames::dd) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dd); break; } if (node->local_name() == HTML::TagNames::dt) { generate_implied_end_tags(HTML::TagNames::dt); if (current_node().local_name() != HTML::TagNames::dt) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dt); break; } if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) break; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::plaintext) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::button) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::button)) { log_parse_error(); generate_implied_end_tags(); m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::button); } reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); m_frameset_ok = false; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::button, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::pre, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::form) { if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { auto node = m_form_element; m_form_element = {}; if (!node || !m_stack_of_open_elements.has_in_scope(*node)) { log_parse_error(); return; } generate_implied_end_tags(); if (¤t_node() != node.ptr()) { log_parse_error(); } m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == node.ptr(); }); } else { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::form)) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != HTML::TagNames::form) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::form); } return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::p) { if (!m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { log_parse_error(); (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::p)); } close_a_p_element(); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::li) { if (!m_stack_of_open_elements.has_in_list_item_scope(HTML::TagNames::li)) { log_parse_error(); return; } generate_implied_end_tags(HTML::TagNames::li); if (current_node().local_name() != HTML::TagNames::li) { log_parse_error(); dbgln("Expected
  • current node, but had <{}>", current_node().local_name()); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(token.tag_name()); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::h1) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h2) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h3) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h4) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h5) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h6)) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } for (;;) { auto popped_element = m_stack_of_open_elements.pop(); if (popped_element->local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) break; } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::a) { if (auto* element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(HTML::TagNames::a)) { log_parse_error(); if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) goto AnyOtherEndTag; m_list_of_active_formatting_elements.remove(*element); m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == element; }); } reconstruct_the_active_formatting_elements(); auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { reconstruct_the_active_formatting_elements(); auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::nobr) { reconstruct_the_active_formatting_elements(); if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::nobr)) { log_parse_error(); run_the_adoption_agency_algorithm(token); reconstruct_the_active_formatting_elements(); } auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::a, HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::nobr, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) goto AnyOtherEndTag; return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); m_list_of_active_formatting_elements.add_marker(); m_frameset_ok = false; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) { if (!document().in_quirks_mode()) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); } (void)insert_html_element(token); m_frameset_ok = false; m_insertion_mode = InsertionMode::InTable; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { token.drop_attributes(); goto BRStartTag; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::area, HTML::TagNames::br, HTML::TagNames::embed, HTML::TagNames::img, HTML::TagNames::keygen, HTML::TagNames::wbr)) { BRStartTag: reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); m_frameset_ok = false; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) { reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); auto type_attribute = token.attribute(HTML::AttributeNames::type); if (type_attribute.is_null() || !type_attribute.equals_ignoring_case("hidden"sv)) { m_frameset_ok = false; } return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track)) { (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::hr) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); (void)insert_html_element(token); (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); m_frameset_ok = false; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) { // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.) log_parse_error(); token.set_tag_name("img"); process_using_the_rules_for(m_insertion_mode, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::textarea) { (void)insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); // If the next token is a U+000A LINE FEED (LF) character token, // then ignore that token and move on to the next one. // (Newlines at the start of pre blocks are ignored as an authoring convenience.) auto next_token = m_tokenizer.next_token(); m_original_insertion_mode = m_insertion_mode; m_frameset_ok = false; m_insertion_mode = InsertionMode::Text; if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::xmp) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { close_a_p_element(); } reconstruct_the_active_formatting_elements(); m_frameset_ok = false; parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::iframe) { m_frameset_ok = false; parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noembed) || (token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled))) { parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) { reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); m_frameset_ok = false; switch (m_insertion_mode) { case InsertionMode::InTable: case InsertionMode::InCaption: case InsertionMode::InTableBody: case InsertionMode::InRow: case InsertionMode::InCell: m_insertion_mode = InsertionMode::InSelectInTable; break; default: m_insertion_mode = InsertionMode::InSelect; break; } return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::optgroup, HTML::TagNames::option)) { if (current_node().local_name() == HTML::TagNames::option) (void)m_stack_of_open_elements.pop(); reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) { if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) generate_implied_end_tags(); if (current_node().local_name() != HTML::TagNames::ruby) log_parse_error(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) { if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) generate_implied_end_tags(HTML::TagNames::rtc); if (current_node().local_name() != HTML::TagNames::rtc || current_node().local_name() != HTML::TagNames::ruby) log_parse_error(); (void)insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::math) { reconstruct_the_active_formatting_elements(); adjust_mathml_attributes(token); adjust_foreign_attributes(token); (void)insert_foreign_element(token, Namespace::MathML); if (token.is_self_closing()) { (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::svg) { reconstruct_the_active_formatting_elements(); adjust_svg_attributes(token); adjust_foreign_attributes(token); (void)insert_foreign_element(token, Namespace::SVG); if (token.is_self_closing()) { (void)m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); } return; } if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::frame, HTML::TagNames::head, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))) { log_parse_error(); return; } // Any other start tag if (token.is_start_tag()) { reconstruct_the_active_formatting_elements(); (void)insert_html_element(token); return; } if (token.is_end_tag()) { AnyOtherEndTag: JS::GCPtr node; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { node = m_stack_of_open_elements.elements()[i].ptr(); if (node->local_name() == token.tag_name()) { generate_implied_end_tags(token.tag_name()); if (node.ptr() != ¤t_node()) { log_parse_error(); } while (¤t_node() != node.ptr()) { (void)m_stack_of_open_elements.pop(); } (void)m_stack_of_open_elements.pop(); break; } if (is_special_tag(node->local_name(), node->namespace_())) { log_parse_error(); return; } } return; } } void HTMLParser::adjust_mathml_attributes(HTMLToken& token) { token.adjust_attribute_name("definitionurl", "definitionURL"); } void HTMLParser::adjust_svg_tag_names(HTMLToken& token) { token.adjust_tag_name("altglyph", "altGlyph"); token.adjust_tag_name("altglyphdef", "altGlyphDef"); token.adjust_tag_name("altglyphitem", "altGlyphItem"); token.adjust_tag_name("animatecolor", "animateColor"); token.adjust_tag_name("animatemotion", "animateMotion"); token.adjust_tag_name("animatetransform", "animateTransform"); token.adjust_tag_name("clippath", "clipPath"); token.adjust_tag_name("feblend", "feBlend"); token.adjust_tag_name("fecolormatrix", "feColorMatrix"); token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer"); token.adjust_tag_name("fecomposite", "feComposite"); token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix"); token.adjust_tag_name("fediffuselighting", "feDiffuseLighting"); token.adjust_tag_name("fedisplacementmap", "feDisplacementMap"); token.adjust_tag_name("fedistantlight", "feDistantLight"); token.adjust_tag_name("fedropshadow", "feDropShadow"); token.adjust_tag_name("feflood", "feFlood"); token.adjust_tag_name("fefunca", "feFuncA"); token.adjust_tag_name("fefuncb", "feFuncB"); token.adjust_tag_name("fefuncg", "feFuncG"); token.adjust_tag_name("fefuncr", "feFuncR"); token.adjust_tag_name("fegaussianblur", "feGaussianBlur"); token.adjust_tag_name("feimage", "feImage"); token.adjust_tag_name("femerge", "feMerge"); token.adjust_tag_name("femergenode", "feMergeNode"); token.adjust_tag_name("femorphology", "feMorphology"); token.adjust_tag_name("feoffset", "feOffset"); token.adjust_tag_name("fepointlight", "fePointLight"); token.adjust_tag_name("fespecularlighting", "feSpecularLighting"); token.adjust_tag_name("fespotlight", "feSpotlight"); token.adjust_tag_name("foreignobject", "foreignObject"); token.adjust_tag_name("glyphref", "glyphRef"); token.adjust_tag_name("lineargradient", "linearGradient"); token.adjust_tag_name("radialgradient", "radialGradient"); token.adjust_tag_name("textpath", "textPath"); } void HTMLParser::adjust_svg_attributes(HTMLToken& token) { token.adjust_attribute_name("attributename", "attributeName"); token.adjust_attribute_name("attributetype", "attributeType"); token.adjust_attribute_name("basefrequency", "baseFrequency"); token.adjust_attribute_name("baseprofile", "baseProfile"); token.adjust_attribute_name("calcmode", "calcMode"); token.adjust_attribute_name("clippathunits", "clipPathUnits"); token.adjust_attribute_name("diffuseconstant", "diffuseConstant"); token.adjust_attribute_name("edgemode", "edgeMode"); token.adjust_attribute_name("filterunits", "filterUnits"); token.adjust_attribute_name("glyphref", "glyphRef"); token.adjust_attribute_name("gradienttransform", "gradientTransform"); token.adjust_attribute_name("gradientunits", "gradientUnits"); token.adjust_attribute_name("kernelmatrix", "kernelMatrix"); token.adjust_attribute_name("kernelunitlength", "kernelUnitLength"); token.adjust_attribute_name("keypoints", "keyPoints"); token.adjust_attribute_name("keysplines", "keySplines"); token.adjust_attribute_name("keytimes", "keyTimes"); token.adjust_attribute_name("lengthadjust", "lengthAdjust"); token.adjust_attribute_name("limitingconeangle", "limitingConeAngle"); token.adjust_attribute_name("markerheight", "markerHeight"); token.adjust_attribute_name("markerunits", "markerUnits"); token.adjust_attribute_name("markerwidth", "markerWidth"); token.adjust_attribute_name("maskcontentunits", "maskContentUnits"); token.adjust_attribute_name("maskunits", "maskUnits"); token.adjust_attribute_name("numoctaves", "numOctaves"); token.adjust_attribute_name("pathlength", "pathLength"); token.adjust_attribute_name("patterncontentunits", "patternContentUnits"); token.adjust_attribute_name("patterntransform", "patternTransform"); token.adjust_attribute_name("patternunits", "patternUnits"); token.adjust_attribute_name("pointsatx", "pointsAtX"); token.adjust_attribute_name("pointsaty", "pointsAtY"); token.adjust_attribute_name("pointsatz", "pointsAtZ"); token.adjust_attribute_name("preservealpha", "preserveAlpha"); token.adjust_attribute_name("preserveaspectratio", "preserveAspectRatio"); token.adjust_attribute_name("primitiveunits", "primitiveUnits"); token.adjust_attribute_name("refx", "refX"); token.adjust_attribute_name("refy", "refY"); token.adjust_attribute_name("repeatcount", "repeatCount"); token.adjust_attribute_name("repeatdur", "repeatDur"); token.adjust_attribute_name("requiredextensions", "requiredExtensions"); token.adjust_attribute_name("requiredfeatures", "requiredFeatures"); token.adjust_attribute_name("specularconstant", "specularConstant"); token.adjust_attribute_name("specularexponent", "specularExponent"); token.adjust_attribute_name("spreadmethod", "spreadMethod"); token.adjust_attribute_name("startoffset", "startOffset"); token.adjust_attribute_name("stddeviation", "stdDeviation"); token.adjust_attribute_name("stitchtiles", "stitchTiles"); token.adjust_attribute_name("surfacescale", "surfaceScale"); token.adjust_attribute_name("systemlanguage", "systemLanguage"); token.adjust_attribute_name("tablevalues", "tableValues"); token.adjust_attribute_name("targetx", "targetX"); token.adjust_attribute_name("targety", "targetY"); token.adjust_attribute_name("textlength", "textLength"); token.adjust_attribute_name("viewbox", "viewBox"); token.adjust_attribute_name("viewtarget", "viewTarget"); token.adjust_attribute_name("xchannelselector", "xChannelSelector"); token.adjust_attribute_name("ychannelselector", "yChannelSelector"); token.adjust_attribute_name("zoomandpan", "zoomAndPan"); } void HTMLParser::adjust_foreign_attributes(HTMLToken& token) { token.adjust_foreign_attribute("xlink:actuate", "xlink", "actuate", Namespace::XLink); token.adjust_foreign_attribute("xlink:arcrole", "xlink", "arcrole", Namespace::XLink); token.adjust_foreign_attribute("xlink:href", "xlink", "href", Namespace::XLink); token.adjust_foreign_attribute("xlink:role", "xlink", "role", Namespace::XLink); token.adjust_foreign_attribute("xlink:show", "xlink", "show", Namespace::XLink); token.adjust_foreign_attribute("xlink:title", "xlink", "title", Namespace::XLink); token.adjust_foreign_attribute("xlink:type", "xlink", "type", Namespace::XLink); token.adjust_foreign_attribute("xml:lang", "xml", "lang", Namespace::XML); token.adjust_foreign_attribute("xml:space", "xml", "space", Namespace::XML); token.adjust_foreign_attribute("xmlns", "", "xmlns", Namespace::XMLNS); token.adjust_foreign_attribute("xmlns:xlink", "xmlns", "xlink", Namespace::XMLNS); } void HTMLParser::increment_script_nesting_level() { ++m_script_nesting_level; } void HTMLParser::decrement_script_nesting_level() { VERIFY(m_script_nesting_level); --m_script_nesting_level; } // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata void HTMLParser::handle_text(HTMLToken& token) { if (token.is_character()) { insert_character(token.code_point()); return; } if (token.is_end_of_file()) { log_parse_error(); if (current_node().local_name() == HTML::TagNames::script) verify_cast(current_node()).set_already_started(Badge {}, true); (void)m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; process_using_the_rules_for(m_insertion_mode, token); return; } // -> An end tag whose tag name is "script" if (token.is_end_tag() && token.tag_name() == HTML::TagNames::script) { // FIXME: If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint. // Non-standard: Make sure the