From 8b31e41692014ce51f920cfc43fe7736f984f80c Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 15 Jul 2021 00:03:50 +0200 Subject: [PATCH] LibWeb: Change HTMLToken::m_doctype into named DoctypeData struct This is in preparation for an upcoming storage change of HTMLToken. In contrast to the other token types, the accessor can hand out a mutable reference to allow users to change parts of the DoctypeData easily. --- .../LibWeb/HTML/Parser/HTMLDocumentParser.cpp | 18 +-- .../LibWeb/HTML/Parser/HTMLToken.cpp | 2 +- .../Libraries/LibWeb/HTML/Parser/HTMLToken.h | 35 ++++-- .../LibWeb/HTML/Parser/HTMLTokenizer.cpp | 116 +++++++++--------- 4 files changed, 92 insertions(+), 79 deletions(-) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 10fd18ed0e4..5daa31d2b9d 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -258,15 +258,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const { - if (doctype_token.m_doctype.force_quirks) + if (doctype_token.doctype_data().force_quirks) return DOM::QuirksMode::Yes; // NOTE: The tokenizer puts the name into lower case for us. - if (doctype_token.m_doctype.name != "html") + if (doctype_token.doctype_data().name != "html") return DOM::QuirksMode::Yes; - auto const& public_identifier = doctype_token.m_doctype.public_identifier; - auto const& system_identifier = doctype_token.m_doctype.system_identifier; + auto const& public_identifier = doctype_token.doctype_data().public_identifier; + auto const& system_identifier = doctype_token.doctype_data().system_identifier; if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//")) return DOM::QuirksMode::Yes; @@ -285,7 +285,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t return DOM::QuirksMode::Yes; } - if (doctype_token.m_doctype.missing_system_identifier) { + if (doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Yes; @@ -299,7 +299,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; - if (!doctype_token.m_doctype.missing_system_identifier) { + if (!doctype_token.doctype_data().missing_system_identifier) { if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) return DOM::QuirksMode::Limited; @@ -324,9 +324,9 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token) if (token.is_doctype()) { auto doctype = adopt_ref(*new DOM::DocumentType(document())); - doctype->set_name(token.m_doctype.name); - doctype->set_public_id(token.m_doctype.public_identifier); - doctype->set_system_id(token.m_doctype.system_identifier); + doctype->set_name(token.doctype_data().name); + doctype->set_public_id(token.doctype_data().public_identifier); + doctype->set_system_id(token.doctype_data().system_identifier); document().append_child(move(doctype)); document().set_quirks_mode(which_quirks_mode(token)); m_insertion_mode = InsertionMode::BeforeHTML; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp index 39fd79e9ab3..df662c7e00d 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp @@ -16,7 +16,7 @@ String HTMLToken::to_string() const case HTMLToken::Type::DOCTYPE: builder.append("DOCTYPE"); builder.append(" { name: '"); - builder.append(m_doctype.name); + builder.append(doctype_data().name); builder.append("' }"); break; case HTMLToken::Type::StartTag: diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h index ea310fe8c92..49de74d50de 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -47,6 +47,17 @@ public: Position value_end_position; }; + struct DoctypeData { + // NOTE: "Missing" is a distinct state from the empty string. + String name; + String public_identifier; + String system_identifier; + bool missing_name { true }; + bool missing_public_identifier { true }; + bool missing_system_identifier { true }; + bool force_quirks { false }; + }; + static HTMLToken make_character(u32 code_point) { HTMLToken token; @@ -252,6 +263,18 @@ public: }); } + DoctypeData const& doctype_data() const + { + VERIFY(is_doctype()); + return m_doctype; + } + + DoctypeData& doctype_data() + { + VERIFY(is_doctype()); + return m_doctype; + } + Type type() const { return m_type; } String to_string() const; @@ -263,17 +286,7 @@ private: Type m_type { Type::Invalid }; // Type::DOCTYPE - struct { - // NOTE: "Missing" is a distinct state from the empty string. - - String name; - bool missing_name { true }; - String public_identifier; - bool missing_public_identifier { true }; - String system_identifier; - bool missing_system_identifier { true }; - bool force_quirks { false }; - } m_doctype; + DoctypeData m_doctype; // Type::StartTag // Type::EndTag diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index f43f6bc702f..fb730b28d53 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -440,7 +440,7 @@ _StartOfFunction: { log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -462,7 +462,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_builder.append_code_point(to_ascii_lowercase(current_input_character.value())); - m_current_token.m_doctype.missing_name = false; + m_current_token.doctype_data().missing_name = false; SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName); } ON(0) @@ -470,21 +470,21 @@ _StartOfFunction: log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_builder.append_code_point(0xFFFD); - m_current_token.m_doctype.missing_name = false; + m_current_token.doctype_data().missing_name = false; SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName); } ON('>') { log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -492,7 +492,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_builder.append_code_point(current_input_character.value()); - m_current_token.m_doctype.missing_name = false; + m_current_token.doctype_data().missing_name = false; SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName); } } @@ -502,12 +502,12 @@ _StartOfFunction: { ON_WHITESPACE { - m_current_token.m_doctype.name = consume_current_builder(); + m_current_token.doctype_data().name = consume_current_builder(); SWITCH_TO(AfterDOCTYPEName); } ON('>') { - m_current_token.m_doctype.name = consume_current_builder(); + m_current_token.doctype_data().name = consume_current_builder(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_ASCII_UPPER_ALPHA @@ -524,7 +524,7 @@ _StartOfFunction: ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -549,7 +549,7 @@ _StartOfFunction: ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -562,7 +562,7 @@ _StartOfFunction: SWITCH_TO(AfterDOCTYPESystemKeyword); } log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -577,32 +577,32 @@ _StartOfFunction: ON('"') { log_parse_error(); - m_current_token.m_doctype.missing_public_identifier = false; + m_current_token.doctype_data().missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { log_parse_error(); - m_current_token.m_doctype.missing_public_identifier = false; + m_current_token.doctype_data().missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -617,34 +617,34 @@ _StartOfFunction: ON('"') { log_parse_error(); - m_current_token.m_doctype.system_identifier = {}; - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().system_identifier = {}; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { log_parse_error(); - m_current_token.m_doctype.system_identifier = {}; - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().system_identifier = {}; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -658,31 +658,31 @@ _StartOfFunction: } ON('"') { - m_current_token.m_doctype.missing_public_identifier = false; + m_current_token.doctype_data().missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { - m_current_token.m_doctype.missing_public_identifier = false; + m_current_token.doctype_data().missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -696,31 +696,31 @@ _StartOfFunction: } ON('"') { - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -730,7 +730,7 @@ _StartOfFunction: { ON('"') { - m_current_token.m_doctype.public_identifier = consume_current_builder(); + m_current_token.doctype_data().public_identifier = consume_current_builder(); SWITCH_TO(AfterDOCTYPEPublicIdentifier); } ON(0) @@ -742,14 +742,14 @@ _StartOfFunction: ON('>') { log_parse_error(); - m_current_token.m_doctype.public_identifier = consume_current_builder(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().public_identifier = consume_current_builder(); + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -765,7 +765,7 @@ _StartOfFunction: { ON('\'') { - m_current_token.m_doctype.public_identifier = consume_current_builder(); + m_current_token.doctype_data().public_identifier = consume_current_builder(); SWITCH_TO(AfterDOCTYPEPublicIdentifier); } ON(0) @@ -777,14 +777,14 @@ _StartOfFunction: ON('>') { log_parse_error(); - m_current_token.m_doctype.public_identifier = consume_current_builder(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().public_identifier = consume_current_builder(); + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -800,7 +800,7 @@ _StartOfFunction: { ON('"') { - m_current_token.m_doctype.public_identifier = consume_current_builder(); + m_current_token.doctype_data().public_identifier = consume_current_builder(); SWITCH_TO(AfterDOCTYPESystemIdentifier); } ON(0) @@ -812,14 +812,14 @@ _StartOfFunction: ON('>') { log_parse_error(); - m_current_token.m_doctype.public_identifier = consume_current_builder(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().public_identifier = consume_current_builder(); + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -835,7 +835,7 @@ _StartOfFunction: { ON('\'') { - m_current_token.m_doctype.system_identifier = consume_current_builder(); + m_current_token.doctype_data().system_identifier = consume_current_builder(); SWITCH_TO(AfterDOCTYPESystemIdentifier); } ON(0) @@ -847,14 +847,14 @@ _StartOfFunction: ON('>') { log_parse_error(); - m_current_token.m_doctype.system_identifier = consume_current_builder(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().system_identifier = consume_current_builder(); + m_current_token.doctype_data().force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } @@ -879,26 +879,26 @@ _StartOfFunction: ON('"') { log_parse_error(); - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { log_parse_error(); - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -916,25 +916,25 @@ _StartOfFunction: } ON('"') { - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { - m_current_token.m_doctype.missing_system_identifier = false; + m_current_token.doctype_data().missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; } ANYTHING_ELSE { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } } @@ -953,7 +953,7 @@ _StartOfFunction: ON_EOF { log_parse_error(); - m_current_token.m_doctype.force_quirks = true; + m_current_token.doctype_data().force_quirks = true; m_queued_tokens.enqueue(move(m_current_token)); EMIT_EOF; }