/* * Copyright (c) 2020, Andreas Kling * Copyright (c) 2021, Luke Wilde * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Web::HTML { static inline void log_parse_error(const SourceLocation& location = SourceLocation::current()) { dbgln("Parse error! {}", location); } static Vector s_quirks_public_ids = { "+//Silmaril//dtd html Pro v0r11 19970101//", "-//AS//DTD HTML 3.0 asWedit + extensions//", "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", "-//IETF//DTD HTML 2.0 Level 1//", "-//IETF//DTD HTML 2.0 Level 2//", "-//IETF//DTD HTML 2.0 Strict Level 1//", "-//IETF//DTD HTML 2.0 Strict Level 2//", "-//IETF//DTD HTML 2.0 Strict//", "-//IETF//DTD HTML 2.0//", "-//IETF//DTD HTML 2.1E//", "-//IETF//DTD HTML 3.0//", "-//IETF//DTD HTML 3.2 Final//", "-//IETF//DTD HTML 3.2//", "-//IETF//DTD HTML 3//", "-//IETF//DTD HTML Level 0//", "-//IETF//DTD HTML Level 1//", "-//IETF//DTD HTML Level 2//", "-//IETF//DTD HTML Level 3//", "-//IETF//DTD HTML Strict Level 0//", "-//IETF//DTD HTML Strict Level 1//", "-//IETF//DTD HTML Strict Level 2//", "-//IETF//DTD HTML Strict Level 3//", "-//IETF//DTD HTML Strict//", "-//IETF//DTD HTML//", "-//Metrius//DTD Metrius Presentational//", "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 2.0 HTML//", "-//Microsoft//DTD Internet Explorer 2.0 Tables//", "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", "-//Microsoft//DTD Internet Explorer 3.0 HTML//", "-//Microsoft//DTD Internet Explorer 3.0 Tables//", "-//Netscape Comm. Corp.//DTD HTML//", "-//Netscape Comm. Corp.//DTD Strict HTML//", "-//O'Reilly and Associates//DTD HTML 2.0//", "-//O'Reilly and Associates//DTD HTML Extended 1.0//", "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", "-//Spyglass//DTD HTML 2.0 Extended//", "-//Sun Microsystems Corp.//DTD HotJava HTML//", "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", "-//W3C//DTD HTML 3 1995-03-24//", "-//W3C//DTD HTML 3.2 Draft//", "-//W3C//DTD HTML 3.2 Final//", "-//W3C//DTD HTML 3.2//", "-//W3C//DTD HTML 3.2S Draft//", "-//W3C//DTD HTML 4.0 Frameset//", "-//W3C//DTD HTML 4.0 Transitional//", "-//W3C//DTD HTML Experimental 19960712//", "-//W3C//DTD HTML Experimental 970421//", "-//W3C//DTD W3 HTML//", "-//W3O//DTD W3 HTML 3.0//", "-//WebTechs//DTD Mozilla HTML 2.0//", "-//WebTechs//DTD Mozilla HTML//" }; // https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point static bool is_mathml_text_integration_point(DOM::Element const&) { // FIXME: Implement. return false; } // https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point static bool is_html_integration_point(DOM::Element const& element) { // A node is an HTML integration point if it is one of the following elements: // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html" // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml" // An SVG foreignObject element // An SVG desc element // An SVG title element if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title)) return true; return false; } RefPtr parse_html_document(const StringView& data, const AK::URL& url, const String& encoding) { auto document = DOM::Document::create(url); HTMLParser parser(document, data, encoding); parser.run(url); return document; } HTMLParser::HTMLParser(DOM::Document& document, const StringView& input, const String& encoding) : m_tokenizer(input, encoding) , m_document(document) { m_document->set_should_invalidate_styles_on_attribute_changes(false); auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); VERIFY(standardized_encoding.has_value()); m_document->set_encoding(standardized_encoding.value()); } HTMLParser::~HTMLParser() { m_document->set_should_invalidate_styles_on_attribute_changes(true); } void HTMLParser::run(const AK::URL& url) { m_document->set_url(url); m_document->set_source(m_tokenizer.source()); for (;;) { auto optional_token = m_tokenizer.next_token(); if (!optional_token.has_value()) break; auto& token = optional_token.value(); dbgln_if(PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_string()); // FIXME: If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark" // FIXME: If the adjusted current node is a MathML text integration point and the token is a character token // FIXME: If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg" // FIXME: If the adjusted current node is an HTML integration point and the token is a start tag // FIXME: If the adjusted current node is an HTML integration point and the token is a character token if (m_stack_of_open_elements.is_empty() || adjusted_current_node().namespace_() == Namespace::HTML || token.is_end_of_file()) { process_using_the_rules_for(m_insertion_mode, token); } else { process_using_the_rules_for_foreign_content(token); } if (m_stop_parsing) { dbgln_if(PARSER_DEBUG, "Stop parsing{}! :^)", m_parsing_fragment ? " fragment" : ""); break; } } flush_character_insertions(); the_end(); } // https://html.spec.whatwg.org/multipage/parsing.html#the-end void HTMLParser::the_end() { // Once the user agent stops parsing the document, the user agent must run the following steps: // FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. // FIXME: 2. Set the insertion point to undefined. // 3. Update the current document readiness to "interactive". m_document->set_ready_state("interactive"); // 4. Pop all the nodes off the stack of open elements. while (!m_stack_of_open_elements.is_empty()) m_stack_of_open_elements.pop(); auto scripts_to_execute_when_parsing_has_finished = m_document->take_scripts_to_execute_when_parsing_has_finished({}); for (auto& script : scripts_to_execute_when_parsing_has_finished) { main_thread_event_loop().spin_until([&] { return script.is_ready_to_be_parser_executed() && !document().has_a_style_sheet_that_is_blocking_scripts(); }); script.execute_script(); } auto content_loaded_event = DOM::Event::create(HTML::EventNames::DOMContentLoaded); content_loaded_event->set_bubbles(true); m_document->dispatch_event(content_loaded_event); // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty. main_thread_event_loop().spin_until([&] { return m_document->scripts_to_execute_as_soon_as_possible().is_empty(); }); // FIXME: Spin the event loop until there is nothing that delays the load event in the Document. m_document->set_ready_state("complete"); m_document->window().dispatch_event(DOM::Event::create(HTML::EventNames::load)); m_document->set_ready_for_post_load_tasks(true); m_document->completely_finish_loading(); } void HTMLParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token) { switch (mode) { case InsertionMode::Initial: handle_initial(token); break; case InsertionMode::BeforeHTML: handle_before_html(token); break; case InsertionMode::BeforeHead: handle_before_head(token); break; case InsertionMode::InHead: handle_in_head(token); break; case InsertionMode::InHeadNoscript: handle_in_head_noscript(token); break; case InsertionMode::AfterHead: handle_after_head(token); break; case InsertionMode::InBody: handle_in_body(token); break; case InsertionMode::AfterBody: handle_after_body(token); break; case InsertionMode::AfterAfterBody: handle_after_after_body(token); break; case InsertionMode::Text: handle_text(token); break; case InsertionMode::InTable: handle_in_table(token); break; case InsertionMode::InTableBody: handle_in_table_body(token); break; case InsertionMode::InRow: handle_in_row(token); break; case InsertionMode::InCell: handle_in_cell(token); break; case InsertionMode::InTableText: handle_in_table_text(token); break; case InsertionMode::InSelectInTable: handle_in_select_in_table(token); break; case InsertionMode::InSelect: handle_in_select(token); break; case InsertionMode::InCaption: handle_in_caption(token); break; case InsertionMode::InColumnGroup: handle_in_column_group(token); break; case InsertionMode::InTemplate: handle_in_template(token); break; case InsertionMode::InFrameset: handle_in_frameset(token); break; case InsertionMode::AfterFrameset: handle_after_frameset(token); break; case InsertionMode::AfterAfterFrameset: handle_after_after_frameset(token); break; default: VERIFY_NOT_REACHED(); } } DOM::QuirksMode HTMLParser::which_quirks_mode(const HTMLToken& doctype_token) const { if (doctype_token.doctype_data().force_quirks) return DOM::QuirksMode::Yes; // NOTE: The tokenizer puts the name into lower case for us. if (doctype_token.doctype_data().name != "html") return DOM::QuirksMode::Yes; auto const& public_identifier = doctype_token.doctype_data().public_identifier; auto const& system_identifier = doctype_token.doctype_data().system_identifier; if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//")) return DOM::QuirksMode::Yes; if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN")) return DOM::QuirksMode::Yes; if (public_identifier.equals_ignoring_case("HTML")) return DOM::QuirksMode::Yes; if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) return DOM::QuirksMode::Yes; for (auto& public_id : s_quirks_public_ids) { if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; } if (doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; } if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (!doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; } return DOM::QuirksMode::No; } void HTMLParser::handle_initial(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { return; } if (token.is_comment()) { auto comment = adopt_ref(*new DOM::Comment(document(), token.comment())); document().append_child(move(comment)); return; } if (token.is_doctype()) { auto doctype = adopt_ref(*new DOM::DocumentType(document())); doctype->set_name(token.doctype_data().name); doctype->set_public_id(token.doctype_data().public_identifier); doctype->set_system_id(token.doctype_data().system_identifier); document().append_child(move(doctype)); document().set_quirks_mode(which_quirks_mode(token)); m_insertion_mode = InsertionMode::BeforeHTML; return; } log_parse_error(); document().set_quirks_mode(DOM::QuirksMode::Yes); m_insertion_mode = InsertionMode::BeforeHTML; process_using_the_rules_for(InsertionMode::BeforeHTML, token); } void HTMLParser::handle_before_html(HTMLToken& token) { if (token.is_doctype()) { log_parse_error(); return; } if (token.is_comment()) { auto comment = adopt_ref(*new DOM::Comment(document(), token.comment())); document().append_child(move(comment)); return; } if (token.is_character() && token.is_parser_whitespace()) { return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { auto element = create_element_for(token, Namespace::HTML); document().append_child(element); m_stack_of_open_elements.push(move(element)); m_insertion_mode = InsertionMode::BeforeHead; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if (token.is_end_tag()) { log_parse_error(); return; } AnythingElse: auto element = create_element(document(), HTML::TagNames::html, Namespace::HTML); document().append_child(element); m_stack_of_open_elements.push(element); // FIXME: If the Document is being loaded as part of navigation of a browsing context, then: run the application cache selection algorithm with no manifest, passing it the Document object. m_insertion_mode = InsertionMode::BeforeHead; process_using_the_rules_for(InsertionMode::BeforeHead, token); return; } DOM::Element& HTMLParser::current_node() { return m_stack_of_open_elements.current_node(); } DOM::Element& HTMLParser::adjusted_current_node() { if (m_parsing_fragment && m_stack_of_open_elements.elements().size() == 1) return *m_context_element; return current_node(); } DOM::Element& HTMLParser::node_before_current_node() { return m_stack_of_open_elements.elements().at(m_stack_of_open_elements.elements().size() - 2); } HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node() { auto& target = current_node(); HTMLParser::AdjustedInsertionLocation adjusted_insertion_location; if (m_foster_parenting && target.local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) { auto last_template = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::template_); auto last_table = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::table); if (last_template.element && (!last_table.element || last_template.index > last_table.index)) { // This returns the template content, so no need to check the parent is a template. return { verify_cast(last_template.element)->content(), nullptr }; } if (!last_table.element) { VERIFY(m_parsing_fragment); // Guaranteed not to be a template element (it will be the html element), // so no need to check the parent is a template. return { m_stack_of_open_elements.elements().first(), nullptr }; } if (last_table.element->parent_node()) adjusted_insertion_location = { last_table.element->parent_node(), last_table.element }; else adjusted_insertion_location = { m_stack_of_open_elements.element_before(*last_table.element), nullptr }; } else { adjusted_insertion_location = { target, nullptr }; } if (is(*adjusted_insertion_location.parent)) return { verify_cast(*adjusted_insertion_location.parent).content(), nullptr }; return adjusted_insertion_location; } NonnullRefPtr HTMLParser::create_element_for(const HTMLToken& token, const FlyString& namespace_) { auto element = create_element(document(), token.tag_name(), namespace_); token.for_each_attribute([&](auto& attribute) { element->set_attribute(attribute.local_name, attribute.value); return IterationDecision::Continue; }); return element; } // https://html.spec.whatwg.org/multipage/parsing.html#insert-a-foreign-element NonnullRefPtr HTMLParser::insert_foreign_element(const HTMLToken& token, const FlyString& namespace_) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); // FIXME: Pass in adjusted_insertion_location.parent as the intended parent. auto element = create_element_for(token, namespace_); auto pre_insertion_validity = adjusted_insertion_location.parent->ensure_pre_insertion_validity(element, adjusted_insertion_location.insert_before_sibling); // NOTE: If it's not possible to insert the element at the adjusted insertion location, the element is simply dropped. if (!pre_insertion_validity.is_exception()) { if (!m_parsing_fragment) { // FIXME: push a new element queue onto element's relevant agent's custom element reactions stack. } adjusted_insertion_location.parent->insert_before(element, adjusted_insertion_location.insert_before_sibling); if (!m_parsing_fragment) { // FIXME: pop the element queue from element's relevant agent's custom element reactions stack, and invoke custom element reactions in that queue. } } m_stack_of_open_elements.push(element); return element; } NonnullRefPtr HTMLParser::insert_html_element(const HTMLToken& token) { return insert_foreign_element(token, Namespace::HTML); } void HTMLParser::handle_before_head(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::head) { auto element = insert_html_element(token); m_head_element = verify_cast(*element); m_insertion_mode = InsertionMode::InHead; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if (token.is_end_tag()) { log_parse_error(); return; } AnythingElse: m_head_element = verify_cast(*insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::head))); m_insertion_mode = InsertionMode::InHead; process_using_the_rules_for(InsertionMode::InHead, token); return; } void HTMLParser::insert_comment(HTMLToken& token) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); adjusted_insertion_location.parent->insert_before(adopt_ref(*new DOM::Comment(document(), token.comment())), adjusted_insertion_location.insert_before_sibling); } void HTMLParser::handle_in_head(HTMLToken& token) { if (token.is_parser_whitespace()) { insert_character(token.code_point()); return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link)) { insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::meta) { auto element = insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::title) { insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; return; } if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled) || token.tag_name() == HTML::TagNames::noframes || token.tag_name() == HTML::TagNames::style)) { parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noscript && !m_scripting_enabled) { insert_html_element(token); m_insertion_mode = InsertionMode::InHeadNoscript; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto element = create_element_for(token, Namespace::HTML); auto& script_element = verify_cast(*element); script_element.set_parser_document({}, document()); script_element.set_non_blocking({}, false); if (m_parsing_fragment) { script_element.set_already_started({}, true); } if (m_invoked_via_document_write) { TODO(); } adjusted_insertion_location.parent->insert_before(element, adjusted_insertion_location.insert_before_sibling, false); m_stack_of_open_elements.push(element); m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::head) { m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::AfterHead; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) { insert_html_element(token); m_list_of_active_formatting_elements.add_marker(); m_frameset_ok = false; m_insertion_mode = InsertionMode::InTemplate; m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { log_parse_error(); return; } generate_all_implied_end_tags_thoroughly(); if (current_node().local_name() != HTML::TagNames::template_) log_parse_error(); m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_); m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); m_stack_of_template_insertion_modes.take_last(); reset_the_insertion_mode_appropriately(); return; } if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { log_parse_error(); return; } AnythingElse: m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::AfterHead; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::handle_in_head_noscript(HTMLToken& token) { if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::noscript) { m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::InHead; return; } if (token.is_parser_whitespace() || token.is_comment() || (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::style))) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { goto AnythingElse; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::noscript)) { log_parse_error(); return; } AnythingElse: log_parse_error(); m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::InHead; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::parse_generic_raw_text_element(HTMLToken& token) { insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); m_original_insertion_mode = m_insertion_mode; m_insertion_mode = InsertionMode::Text; } DOM::Text* HTMLParser::find_character_insertion_node() { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); if (adjusted_insertion_location.insert_before_sibling) { TODO(); } if (adjusted_insertion_location.parent->is_document()) return nullptr; if (adjusted_insertion_location.parent->last_child() && adjusted_insertion_location.parent->last_child()->is_text()) return verify_cast(adjusted_insertion_location.parent->last_child()); auto new_text_node = adopt_ref(*new DOM::Text(document(), "")); adjusted_insertion_location.parent->append_child(new_text_node); return new_text_node; } void HTMLParser::flush_character_insertions() { if (m_character_insertion_builder.is_empty()) return; m_character_insertion_node->set_data(m_character_insertion_builder.to_string()); m_character_insertion_node->parent()->children_changed(); m_character_insertion_builder.clear(); } void HTMLParser::insert_character(u32 data) { auto node = find_character_insertion_node(); if (node == m_character_insertion_node) { m_character_insertion_builder.append(Utf32View { &data, 1 }); return; } if (!m_character_insertion_node) { m_character_insertion_node = node; m_character_insertion_builder.append(Utf32View { &data, 1 }); return; } flush_character_insertions(); m_character_insertion_node = node; m_character_insertion_builder.append(Utf32View { &data, 1 }); } void HTMLParser::handle_after_head(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { insert_character(token.code_point()); return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { insert_html_element(token); m_frameset_ok = false; m_insertion_mode = InsertionMode::InBody; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { insert_html_element(token); m_insertion_mode = InsertionMode::InFrameset; return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { log_parse_error(); m_stack_of_open_elements.push(*m_head_element); process_using_the_rules_for(InsertionMode::InHead, token); m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == m_head_element; }); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { goto AnythingElse; } if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { log_parse_error(); return; } AnythingElse: insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::body)); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::generate_implied_end_tags(const FlyString& exception) { while (current_node().local_name() != exception && current_node().local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc)) m_stack_of_open_elements.pop(); } void HTMLParser::generate_all_implied_end_tags_thoroughly() { while (current_node().local_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) m_stack_of_open_elements.pop(); } void HTMLParser::close_a_p_element() { generate_implied_end_tags(HTML::TagNames::p); if (current_node().local_name() != HTML::TagNames::p) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::p); } void HTMLParser::handle_after_body(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_comment()) { auto& insertion_location = m_stack_of_open_elements.first(); insertion_location.append_child(adopt_ref(*new DOM::Comment(document(), token.comment()))); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { if (m_parsing_fragment) { log_parse_error(); return; } m_insertion_mode = InsertionMode::AfterAfterBody; return; } if (token.is_end_of_file()) { stop_parsing(); return; } log_parse_error(); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(InsertionMode::InBody, token); } void HTMLParser::handle_after_after_body(HTMLToken& token) { if (token.is_comment()) { auto comment = adopt_ref(*new DOM::Comment(document(), token.comment())); document().append_child(move(comment)); return; } if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) { process_using_the_rules_for(InsertionMode::InBody, token); return; } if (token.is_end_of_file()) { stop_parsing(); return; } log_parse_error(); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(m_insertion_mode, token); } void HTMLParser::reconstruct_the_active_formatting_elements() { // FIXME: This needs to care about "markers" if (m_list_of_active_formatting_elements.is_empty()) return; if (m_list_of_active_formatting_elements.entries().last().is_marker()) return; if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element)) return; ssize_t index = m_list_of_active_formatting_elements.entries().size() - 1; RefPtr entry = m_list_of_active_formatting_elements.entries().at(index).element; VERIFY(entry); Rewind: if (index == 0) { goto Create; } --index; entry = m_list_of_active_formatting_elements.entries().at(index).element; VERIFY(entry); if (!m_stack_of_open_elements.contains(*entry)) goto Rewind; Advance: ++index; entry = m_list_of_active_formatting_elements.entries().at(index).element; VERIFY(entry); Create: // FIXME: Hold on to the real token! auto new_element = insert_html_element(HTMLToken::make_start_tag(entry->local_name())); m_list_of_active_formatting_elements.entries().at(index).element = *new_element; if (index != (ssize_t)m_list_of_active_formatting_elements.entries().size() - 1) goto Advance; } HTMLParser::AdoptionAgencyAlgorithmOutcome HTMLParser::run_the_adoption_agency_algorithm(HTMLToken& token) { auto subject = token.tag_name(); // If the current node is an HTML element whose tag name is subject, // and the current node is not in the list of active formatting elements, // then pop the current node off the stack of open elements, and return. if (current_node().local_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { m_stack_of_open_elements.pop(); return AdoptionAgencyAlgorithmOutcome::DoNothing; } size_t outer_loop_counter = 0; //OuterLoop: if (outer_loop_counter >= 8) return AdoptionAgencyAlgorithmOutcome::DoNothing; ++outer_loop_counter; auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); if (!formatting_element) return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps; if (!m_stack_of_open_elements.contains(*formatting_element)) { log_parse_error(); m_list_of_active_formatting_elements.remove(*formatting_element); return AdoptionAgencyAlgorithmOutcome::DoNothing; } if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { log_parse_error(); return AdoptionAgencyAlgorithmOutcome::DoNothing; } if (formatting_element != ¤t_node()) { log_parse_error(); } RefPtr furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element); if (!furthest_block) { while (¤t_node() != formatting_element) m_stack_of_open_elements.pop(); m_stack_of_open_elements.pop(); m_list_of_active_formatting_elements.remove(*formatting_element); return AdoptionAgencyAlgorithmOutcome::DoNothing; } // FIXME: Implement the rest of the AAA :^) TODO(); } bool HTMLParser::is_special_tag(const FlyString& tag_name, const FlyString& namespace_) { if (namespace_ == Namespace::HTML) { return tag_name.is_one_of( HTML::TagNames::address, HTML::TagNames::applet, HTML::TagNames::area, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::blockquote, HTML::TagNames::body, HTML::TagNames::br, HTML::TagNames::button, HTML::TagNames::caption, HTML::TagNames::center, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::details, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::dt, HTML::TagNames::embed, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::form, HTML::TagNames::frame, HTML::TagNames::frameset, HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6, HTML::TagNames::head, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::hr, HTML::TagNames::html, HTML::TagNames::iframe, HTML::TagNames::img, HTML::TagNames::input, HTML::TagNames::keygen, HTML::TagNames::li, HTML::TagNames::link, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::marquee, HTML::TagNames::menu, HTML::TagNames::meta, HTML::TagNames::nav, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::noscript, HTML::TagNames::object, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::param, HTML::TagNames::plaintext, HTML::TagNames::pre, HTML::TagNames::script, HTML::TagNames::section, HTML::TagNames::select, HTML::TagNames::source, HTML::TagNames::style, HTML::TagNames::summary, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::template_, HTML::TagNames::textarea, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::title, HTML::TagNames::tr, HTML::TagNames::track, HTML::TagNames::ul, HTML::TagNames::wbr, HTML::TagNames::xmp); } else if (namespace_ == Namespace::SVG) { return tag_name.is_one_of( SVG::TagNames::desc, SVG::TagNames::foreignObject, SVG::TagNames::title); } else if (namespace_ == Namespace::MathML) { TODO(); } return false; } void HTMLParser::handle_in_body(HTMLToken& token) { if (token.is_character()) { if (token.code_point() == 0) { log_parse_error(); return; } if (token.is_parser_whitespace()) { reconstruct_the_active_formatting_elements(); insert_character(token.code_point()); return; } reconstruct_the_active_formatting_elements(); insert_character(token.code_point()); m_frameset_ok = false; return; } if (token.is_comment()) { insert_comment(token); return; } if (token.is_doctype()) { log_parse_error(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { log_parse_error(); if (m_stack_of_open_elements.contains(HTML::TagNames::template_)) return; token.for_each_attribute([&](auto& attribute) { if (!current_node().has_attribute(attribute.local_name)) current_node().set_attribute(attribute.local_name, attribute.value); return IterationDecision::Continue; }); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { process_using_the_rules_for(InsertionMode::InHead, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { log_parse_error(); if (m_stack_of_open_elements.elements().size() == 1 || m_stack_of_open_elements.elements().at(1).local_name() != HTML::TagNames::body || m_stack_of_open_elements.contains(HTML::TagNames::template_)) { VERIFY(m_parsing_fragment); return; } m_frameset_ok = false; auto& body_element = m_stack_of_open_elements.elements().at(1); token.for_each_attribute([&](auto& attribute) { if (!body_element.has_attribute(attribute.local_name)) body_element.set_attribute(attribute.local_name, attribute.value); return IterationDecision::Continue; }); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { log_parse_error(); if (m_stack_of_open_elements.elements().size() == 1 || m_stack_of_open_elements.elements().at(1).local_name() != HTML::TagNames::body) { VERIFY(m_parsing_fragment); return; } if (!m_frameset_ok) return; TODO(); } if (token.is_end_of_file()) { if (!m_stack_of_template_insertion_modes.is_empty()) { process_using_the_rules_for(InsertionMode::InTemplate, token); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node.local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } stop_parsing(); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::body) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { log_parse_error(); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node.local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } m_insertion_mode = InsertionMode::AfterBody; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { log_parse_error(); return; } for (auto& node : m_stack_of_open_elements.elements()) { if (!node.local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { log_parse_error(); break; } } m_insertion_mode = InsertionMode::AfterBody; process_using_the_rules_for(m_insertion_mode, token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); if (current_node().local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { log_parse_error(); m_stack_of_open_elements.pop(); } insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::pre, HTML::TagNames::listing)) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); m_frameset_ok = false; // If the next token is a U+000A LINE FEED (LF) character token, // then ignore that token and move on to the next one. // (Newlines at the start of pre blocks are ignored as an authoring convenience.) auto next_token = m_tokenizer.next_token(); if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) { if (m_form_element && !m_stack_of_open_elements.contains(HTML::TagNames::template_)) { log_parse_error(); return; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); auto element = insert_html_element(token); if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) m_form_element = verify_cast(*element); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::li) { m_frameset_ok = false; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { RefPtr node = m_stack_of_open_elements.elements()[i]; if (node->local_name() == HTML::TagNames::li) { generate_implied_end_tags(HTML::TagNames::li); if (current_node().local_name() != HTML::TagNames::li) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); break; } if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) break; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { m_frameset_ok = false; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { RefPtr node = m_stack_of_open_elements.elements()[i]; if (node->local_name() == HTML::TagNames::dd) { generate_implied_end_tags(HTML::TagNames::dd); if (current_node().local_name() != HTML::TagNames::dd) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dd); break; } if (node->local_name() == HTML::TagNames::dt) { generate_implied_end_tags(HTML::TagNames::dt); if (current_node().local_name() != HTML::TagNames::dt) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dt); break; } if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) break; } if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::plaintext) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::button) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::button)) { log_parse_error(); generate_implied_end_tags(); m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::button); } reconstruct_the_active_formatting_elements(); insert_html_element(token); m_frameset_ok = false; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::button, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::pre, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::form) { if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { auto node = m_form_element; m_form_element = nullptr; if (!node || !m_stack_of_open_elements.has_in_scope(*node)) { log_parse_error(); return; } generate_implied_end_tags(); if (¤t_node() != node) { log_parse_error(); } m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == node.ptr(); }); } else { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::form)) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != HTML::TagNames::form) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::form); } return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::p) { if (!m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { log_parse_error(); insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::p)); } close_a_p_element(); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::li) { if (!m_stack_of_open_elements.has_in_list_item_scope(HTML::TagNames::li)) { log_parse_error(); return; } generate_implied_end_tags(HTML::TagNames::li); if (current_node().local_name() != HTML::TagNames::li) { log_parse_error(); dbgln("Expected
  • current node, but had <{}>", current_node().local_name()); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(token.tag_name()); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::h1) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h2) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h3) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h4) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h5) && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h6)) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } for (;;) { auto popped_element = m_stack_of_open_elements.pop(); if (popped_element->local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) break; } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::a) { if (auto* element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(HTML::TagNames::a)) { log_parse_error(); if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) goto AnyOtherEndTag; m_list_of_active_formatting_elements.remove(*element); m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == element; }); } reconstruct_the_active_formatting_elements(); auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { reconstruct_the_active_formatting_elements(); auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::nobr) { reconstruct_the_active_formatting_elements(); if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::nobr)) { log_parse_error(); run_the_adoption_agency_algorithm(token); reconstruct_the_active_formatting_elements(); } auto element = insert_html_element(token); m_list_of_active_formatting_elements.add(*element); return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::a, HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::nobr, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) goto AnyOtherEndTag; return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { reconstruct_the_active_formatting_elements(); insert_html_element(token); m_list_of_active_formatting_elements.add_marker(); m_frameset_ok = false; return; } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { log_parse_error(); return; } generate_implied_end_tags(); if (current_node().local_name() != token.tag_name()) { log_parse_error(); } m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) { if (!document().in_quirks_mode()) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); } insert_html_element(token); m_frameset_ok = false; m_insertion_mode = InsertionMode::InTable; return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { token.drop_attributes(); goto BRStartTag; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::area, HTML::TagNames::br, HTML::TagNames::embed, HTML::TagNames::img, HTML::TagNames::keygen, HTML::TagNames::wbr)) { BRStartTag: reconstruct_the_active_formatting_elements(); insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); m_frameset_ok = false; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) { reconstruct_the_active_formatting_elements(); insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); auto type_attribute = token.attribute(HTML::AttributeNames::type); if (type_attribute.is_null() || !type_attribute.equals_ignoring_case("hidden")) { m_frameset_ok = false; } return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track)) { insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::hr) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) close_a_p_element(); insert_html_element(token); m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); m_frameset_ok = false; return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) { // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.) log_parse_error(); token.set_tag_name("img"); process_using_the_rules_for(m_insertion_mode, token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::textarea) { insert_html_element(token); m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); // If the next token is a U+000A LINE FEED (LF) character token, // then ignore that token and move on to the next one. // (Newlines at the start of pre blocks are ignored as an authoring convenience.) auto next_token = m_tokenizer.next_token(); m_original_insertion_mode = m_insertion_mode; m_frameset_ok = false; m_insertion_mode = InsertionMode::Text; if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::xmp) { if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { close_a_p_element(); } reconstruct_the_active_formatting_elements(); m_frameset_ok = false; parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::iframe) { m_frameset_ok = false; parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noembed) || (token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled))) { parse_generic_raw_text_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) { reconstruct_the_active_formatting_elements(); insert_html_element(token); m_frameset_ok = false; switch (m_insertion_mode) { case InsertionMode::InTable: case InsertionMode::InCaption: case InsertionMode::InTableBody: case InsertionMode::InRow: case InsertionMode::InCell: m_insertion_mode = InsertionMode::InSelectInTable; break; default: m_insertion_mode = InsertionMode::InSelect; break; } return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::optgroup, HTML::TagNames::option)) { if (current_node().local_name() == HTML::TagNames::option) m_stack_of_open_elements.pop(); reconstruct_the_active_formatting_elements(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) { if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) generate_implied_end_tags(); if (current_node().local_name() != HTML::TagNames::ruby) log_parse_error(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) { if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) generate_implied_end_tags(HTML::TagNames::rtc); if (current_node().local_name() != HTML::TagNames::rtc || current_node().local_name() != HTML::TagNames::ruby) log_parse_error(); insert_html_element(token); return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::math) { reconstruct_the_active_formatting_elements(); adjust_mathml_attributes(token); adjust_foreign_attributes(token); insert_foreign_element(token, Namespace::MathML); if (token.is_self_closing()) { m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); } return; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::svg) { reconstruct_the_active_formatting_elements(); adjust_svg_attributes(token); adjust_foreign_attributes(token); insert_foreign_element(token, Namespace::SVG); if (token.is_self_closing()) { m_stack_of_open_elements.pop(); token.acknowledge_self_closing_flag_if_set(); } return; } if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::frame, HTML::TagNames::head, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))) { log_parse_error(); return; } // Any other start tag if (token.is_start_tag()) { reconstruct_the_active_formatting_elements(); insert_html_element(token); return; } if (token.is_end_tag()) { AnyOtherEndTag: RefPtr node; for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { node = m_stack_of_open_elements.elements()[i]; if (node->local_name() == token.tag_name()) { generate_implied_end_tags(token.tag_name()); if (node != current_node()) { log_parse_error(); } while (¤t_node() != node) { m_stack_of_open_elements.pop(); } m_stack_of_open_elements.pop(); break; } if (is_special_tag(node->local_name(), node->namespace_())) { log_parse_error(); return; } } return; } } void HTMLParser::adjust_mathml_attributes(HTMLToken& token) { token.adjust_attribute_name("definitionurl", "definitionURL"); } void HTMLParser::adjust_svg_tag_names(HTMLToken& token) { token.adjust_tag_name("altglyph", "altGlyph"); token.adjust_tag_name("altglyphdef", "altGlyphDef"); token.adjust_tag_name("altglyphitem", "altGlyphItem"); token.adjust_tag_name("animatecolor", "animateColor"); token.adjust_tag_name("animatemotion", "animateMotion"); token.adjust_tag_name("animatetransform", "animateTransform"); token.adjust_tag_name("clippath", "clipPath"); token.adjust_tag_name("feblend", "feBlend"); token.adjust_tag_name("fecolormatrix", "feColorMatrix"); token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer"); token.adjust_tag_name("fecomposite", "feComposite"); token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix"); token.adjust_tag_name("fediffuselighting", "feDiffuseLighting"); token.adjust_tag_name("fedisplacementmap", "feDisplacementMap"); token.adjust_tag_name("fedistantlight", "feDistantLight"); token.adjust_tag_name("fedropshadow", "feDropShadow"); token.adjust_tag_name("feflood", "feFlood"); token.adjust_tag_name("fefunca", "feFuncA"); token.adjust_tag_name("fefuncb", "feFuncB"); token.adjust_tag_name("fefuncg", "feFuncG"); token.adjust_tag_name("fefuncr", "feFuncR"); token.adjust_tag_name("fegaussianblur", "feGaussianBlur"); token.adjust_tag_name("feimage", "feImage"); token.adjust_tag_name("femerge", "feMerge"); token.adjust_tag_name("femergenode", "feMergeNode"); token.adjust_tag_name("femorphology", "feMorphology"); token.adjust_tag_name("feoffset", "feOffset"); token.adjust_tag_name("fepointlight", "fePointLight"); token.adjust_tag_name("fespecularlighting", "feSpecularLighting"); token.adjust_tag_name("fespotlight", "feSpotlight"); token.adjust_tag_name("glyphref", "glyphRef"); token.adjust_tag_name("lineargradient", "linearGradient"); token.adjust_tag_name("radialgradient", "radialGradient"); token.adjust_tag_name("textpath", "textPath"); } void HTMLParser::adjust_svg_attributes(HTMLToken& token) { token.adjust_attribute_name("attributename", "attributeName"); token.adjust_attribute_name("attributetype", "attributeType"); token.adjust_attribute_name("basefrequency", "baseFrequency"); token.adjust_attribute_name("baseprofile", "baseProfile"); token.adjust_attribute_name("calcmode", "calcMode"); token.adjust_attribute_name("clippathunits", "clipPathUnits"); token.adjust_attribute_name("diffuseconstant", "diffuseConstant"); token.adjust_attribute_name("edgemode", "edgeMode"); token.adjust_attribute_name("filterunits", "filterUnits"); token.adjust_attribute_name("glyphref", "glyphRef"); token.adjust_attribute_name("gradienttransform", "gradientTransform"); token.adjust_attribute_name("gradientunits", "gradientUnits"); token.adjust_attribute_name("kernelmatrix", "kernelMatrix"); token.adjust_attribute_name("kernelunitlength", "kernelUnitLength"); token.adjust_attribute_name("keypoints", "keyPoints"); token.adjust_attribute_name("keysplines", "keySplines"); token.adjust_attribute_name("keytimes", "keyTimes"); token.adjust_attribute_name("lengthadjust", "lengthAdjust"); token.adjust_attribute_name("limitingconeangle", "limitingConeAngle"); token.adjust_attribute_name("markerheight", "markerHeight"); token.adjust_attribute_name("markerunits", "markerUnits"); token.adjust_attribute_name("markerwidth", "markerWidth"); token.adjust_attribute_name("maskcontentunits", "maskContentUnits"); token.adjust_attribute_name("maskunits", "maskUnits"); token.adjust_attribute_name("numoctaves", "numOctaves"); token.adjust_attribute_name("pathlength", "pathLength"); token.adjust_attribute_name("patterncontentunits", "patternContentUnits"); token.adjust_attribute_name("patterntransform", "patternTransform"); token.adjust_attribute_name("patternunits", "patternUnits"); token.adjust_attribute_name("pointsatx", "pointsAtX"); token.adjust_attribute_name("pointsaty", "pointsAtY"); token.adjust_attribute_name("pointsatz", "pointsAtZ"); token.adjust_attribute_name("preservealpha", "preserveAlpha"); token.adjust_attribute_name("preserveaspectratio", "preserveAspectRatio"); token.adjust_attribute_name("primitiveunits", "primitiveUnits"); token.adjust_attribute_name("refx", "refX"); token.adjust_attribute_name("refy", "refY"); token.adjust_attribute_name("repeatcount", "repeatCount"); token.adjust_attribute_name("repeatdur", "repeatDur"); token.adjust_attribute_name("requiredextensions", "requiredExtensions"); token.adjust_attribute_name("requiredfeatures", "requiredFeatures"); token.adjust_attribute_name("specularconstant", "specularConstant"); token.adjust_attribute_name("specularexponent", "specularExponent"); token.adjust_attribute_name("spreadmethod", "spreadMethod"); token.adjust_attribute_name("startoffset", "startOffset"); token.adjust_attribute_name("stddeviation", "stdDeviation"); token.adjust_attribute_name("stitchtiles", "stitchTiles"); token.adjust_attribute_name("surfacescale", "surfaceScale"); token.adjust_attribute_name("systemlanguage", "systemLanguage"); token.adjust_attribute_name("tablevalues", "tableValues"); token.adjust_attribute_name("targetx", "targetX"); token.adjust_attribute_name("targety", "targetY"); token.adjust_attribute_name("textlength", "textLength"); token.adjust_attribute_name("viewbox", "viewBox"); token.adjust_attribute_name("viewtarget", "viewTarget"); token.adjust_attribute_name("xchannelselector", "xChannelSelector"); token.adjust_attribute_name("ychannelselector", "yChannelSelector"); token.adjust_attribute_name("zoomandpan", "zoomAndPan"); } void HTMLParser::adjust_foreign_attributes(HTMLToken& token) { token.adjust_foreign_attribute("xlink:actuate", "xlink", "actuate", Namespace::XLink); token.adjust_foreign_attribute("xlink:arcrole", "xlink", "arcrole", Namespace::XLink); token.adjust_foreign_attribute("xlink:href", "xlink", "href", Namespace::XLink); token.adjust_foreign_attribute("xlink:role", "xlink", "role", Namespace::XLink); token.adjust_foreign_attribute("xlink:show", "xlink", "show", Namespace::XLink); token.adjust_foreign_attribute("xlink:title", "xlink", "title", Namespace::XLink); token.adjust_foreign_attribute("xlink:type", "xlink", "type", Namespace::XLink); token.adjust_foreign_attribute("xml:lang", "xml", "lang", Namespace::XML); token.adjust_foreign_attribute("xml:space", "xml", "space", Namespace::XML); token.adjust_foreign_attribute("xmlns", "", "xmlns", Namespace::XMLNS); token.adjust_foreign_attribute("xmlns:xlink", "xmlns", "xlink", Namespace::XMLNS); } void HTMLParser::increment_script_nesting_level() { ++m_script_nesting_level; } void HTMLParser::decrement_script_nesting_level() { VERIFY(m_script_nesting_level); --m_script_nesting_level; } void HTMLParser::handle_text(HTMLToken& token) { if (token.is_character()) { insert_character(token.code_point()); return; } if (token.is_end_of_file()) { log_parse_error(); if (current_node().local_name() == HTML::TagNames::script) verify_cast(current_node()).set_already_started({}, true); m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; process_using_the_rules_for(m_insertion_mode, token); return; } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::script) { // Make sure the