LibWeb: Change HTMLToken::m_doctype into named DoctypeData struct

This is in preparation for an upcoming storage change of HTMLToken. In
contrast to the other token types, the accessor can hand out a mutable
reference to allow users to change parts of the DoctypeData easily.
This commit is contained in:
Max Wipfli 2021-07-15 00:03:50 +02:00 committed by Ali Mohammad Pur
parent 918bde98b1
commit 8b31e41692
Notes: sideshowbarker 2024-07-18 08:52:51 +09:00
4 changed files with 92 additions and 79 deletions

View file

@ -258,15 +258,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const
{
if (doctype_token.m_doctype.force_quirks)
if (doctype_token.doctype_data().force_quirks)
return DOM::QuirksMode::Yes;
// NOTE: The tokenizer puts the name into lower case for us.
if (doctype_token.m_doctype.name != "html")
if (doctype_token.doctype_data().name != "html")
return DOM::QuirksMode::Yes;
auto const& public_identifier = doctype_token.m_doctype.public_identifier;
auto const& system_identifier = doctype_token.m_doctype.system_identifier;
auto const& public_identifier = doctype_token.doctype_data().public_identifier;
auto const& system_identifier = doctype_token.doctype_data().system_identifier;
if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"))
return DOM::QuirksMode::Yes;
@ -285,7 +285,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t
return DOM::QuirksMode::Yes;
}
if (doctype_token.m_doctype.missing_system_identifier) {
if (doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes;
@ -299,7 +299,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
if (!doctype_token.m_doctype.missing_system_identifier) {
if (!doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
@ -324,9 +324,9 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
if (token.is_doctype()) {
auto doctype = adopt_ref(*new DOM::DocumentType(document()));
doctype->set_name(token.m_doctype.name);
doctype->set_public_id(token.m_doctype.public_identifier);
doctype->set_system_id(token.m_doctype.system_identifier);
doctype->set_name(token.doctype_data().name);
doctype->set_public_id(token.doctype_data().public_identifier);
doctype->set_system_id(token.doctype_data().system_identifier);
document().append_child(move(doctype));
document().set_quirks_mode(which_quirks_mode(token));
m_insertion_mode = InsertionMode::BeforeHTML;

View file

@ -16,7 +16,7 @@ String HTMLToken::to_string() const
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE");
builder.append(" { name: '");
builder.append(m_doctype.name);
builder.append(doctype_data().name);
builder.append("' }");
break;
case HTMLToken::Type::StartTag:

View file

@ -47,6 +47,17 @@ public:
Position value_end_position;
};
struct DoctypeData {
// NOTE: "Missing" is a distinct state from the empty string.
String name;
String public_identifier;
String system_identifier;
bool missing_name { true };
bool missing_public_identifier { true };
bool missing_system_identifier { true };
bool force_quirks { false };
};
static HTMLToken make_character(u32 code_point)
{
HTMLToken token;
@ -252,6 +263,18 @@ public:
});
}
DoctypeData const& doctype_data() const
{
VERIFY(is_doctype());
return m_doctype;
}
DoctypeData& doctype_data()
{
VERIFY(is_doctype());
return m_doctype;
}
Type type() const { return m_type; }
String to_string() const;
@ -263,17 +286,7 @@ private:
Type m_type { Type::Invalid };
// Type::DOCTYPE
struct {
// NOTE: "Missing" is a distinct state from the empty string.
String name;
bool missing_name { true };
String public_identifier;
bool missing_public_identifier { true };
String system_identifier;
bool missing_system_identifier { true };
bool force_quirks { false };
} m_doctype;
DoctypeData m_doctype;
// Type::StartTag
// Type::EndTag

View file

@ -440,7 +440,7 @@ _StartOfFunction:
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -462,7 +462,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(to_ascii_lowercase(current_input_character.value()));
m_current_token.m_doctype.missing_name = false;
m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
ON(0)
@ -470,21 +470,21 @@ _StartOfFunction:
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(0xFFFD);
m_current_token.m_doctype.missing_name = false;
m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
ON('>')
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -492,7 +492,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(current_input_character.value());
m_current_token.m_doctype.missing_name = false;
m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
}
@ -502,12 +502,12 @@ _StartOfFunction:
{
ON_WHITESPACE
{
m_current_token.m_doctype.name = consume_current_builder();
m_current_token.doctype_data().name = consume_current_builder();
SWITCH_TO(AfterDOCTYPEName);
}
ON('>')
{
m_current_token.m_doctype.name = consume_current_builder();
m_current_token.doctype_data().name = consume_current_builder();
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_ASCII_UPPER_ALPHA
@ -524,7 +524,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -549,7 +549,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -562,7 +562,7 @@ _StartOfFunction:
SWITCH_TO(AfterDOCTYPESystemKeyword);
}
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -577,32 +577,32 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
m_current_token.m_doctype.missing_public_identifier = false;
m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
m_current_token.m_doctype.missing_public_identifier = false;
m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -617,34 +617,34 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
m_current_token.m_doctype.system_identifier = {};
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().system_identifier = {};
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
m_current_token.m_doctype.system_identifier = {};
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().system_identifier = {};
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -658,31 +658,31 @@ _StartOfFunction:
}
ON('"')
{
m_current_token.m_doctype.missing_public_identifier = false;
m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.missing_public_identifier = false;
m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -696,31 +696,31 @@ _StartOfFunction:
}
ON('"')
{
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -730,7 +730,7 @@ _StartOfFunction:
{
ON('"')
{
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
@ -742,14 +742,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().public_identifier = consume_current_builder();
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -765,7 +765,7 @@ _StartOfFunction:
{
ON('\'')
{
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
@ -777,14 +777,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().public_identifier = consume_current_builder();
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -800,7 +800,7 @@ _StartOfFunction:
{
ON('"')
{
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
@ -812,14 +812,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
m_current_token.m_doctype.public_identifier = consume_current_builder();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().public_identifier = consume_current_builder();
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -835,7 +835,7 @@ _StartOfFunction:
{
ON('\'')
{
m_current_token.m_doctype.system_identifier = consume_current_builder();
m_current_token.doctype_data().system_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
@ -847,14 +847,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
m_current_token.m_doctype.system_identifier = consume_current_builder();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().system_identifier = consume_current_builder();
m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@ -879,26 +879,26 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -916,25 +916,25 @@ _StartOfFunction:
}
ON('"')
{
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.missing_system_identifier = false;
m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@ -953,7 +953,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
m_current_token.m_doctype.force_quirks = true;
m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}