LibWeb: Implement more of the tokenizer states

Slowly adding more unimplemented options for tokenizer states.
This commit is contained in:
TheDumpap 2020-05-28 00:28:32 +02:00 committed by Andreas Kling
parent c742306b6f
commit d92c9d3772
Notes: sideshowbarker 2024-07-19 18:30:24 +09:00

View file

@ -230,6 +230,11 @@ _StartOfFunction:
{
SWITCH_TO(TagOpen);
}
ON(0)
{
PARSE_ERROR();
EMIT_CURRENT_CHARACTER;
}
ON_EOF
{
EMIT_EOF;
@ -257,12 +262,20 @@ _StartOfFunction:
RECONSUME_IN(TagName);
}
ON('?')
{
PARSE_ERROR();
create_new_token(HTMLToken::Type::Comment);
RECONSUME_IN(BogusComment);
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
PARSE_ERROR();
EMIT_CHARACTER('<');
RECONSUME_IN(Data);
}
}
END_STATE
@ -281,6 +294,22 @@ _StartOfFunction:
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
continue;
}
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.tag_name.append("\uFFFD");
continue;
}
ON_EOF
{
PARSE_ERROR();
EMIT_EOF;
}
ANYTHING_ELSE
{
m_current_token.m_tag.tag_name.append(current_input_character.value());
@ -296,6 +325,23 @@ _StartOfFunction:
create_new_token(HTMLToken::Type::EndTag);
RECONSUME_IN(TagName);
}
ON('>')
{
PARSE_ERROR();
SWITCH_TO(Data);
}
ON_EOF
{
PARSE_ERROR();
// FIXME: Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token.
continue;
}
ANYTHING_ELSE
{
PARSE_ERROR();
create_new_token(HTMLToken::Type::Comment);
RECONSUME_IN(BogusComment);
}
}
END_STATE
@ -312,6 +358,27 @@ _StartOfFunction:
}
END_STATE
BEGIN_STATE(BogusComment)
{
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ON(0)
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(DOCTYPE)
{
ON_WHITESPACE
@ -1415,7 +1482,8 @@ _StartOfFunction:
}
ON(0)
{
TODO();
PARSE_ERROR();
EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{
@ -1462,11 +1530,19 @@ _StartOfFunction:
{
ON_WHITESPACE
{
TODO();
if (!current_end_tag_token_is_appropriate()) {
// FIXME: Otherwise, treat it as per the "anything else" entry below.
TODO();
}
SWITCH_TO(BeforeAttributeName);
}
ON('/')
{
TODO();
if (!current_end_tag_token_is_appropriate()) {
// FIXME: Otherwise, treat it as per the "anything else" entry below.
TODO();
}
SWITCH_TO(SelfClosingStartTag);
}
ON('>')
{
@ -1503,7 +1579,8 @@ _StartOfFunction:
}
ON(0)
{
TODO();
PARSE_ERROR();
EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{
@ -1550,11 +1627,19 @@ _StartOfFunction:
{
ON_WHITESPACE
{
TODO();
if (!current_end_tag_token_is_appropriate()) {
// FIXME: Otherwise, treat it as per the "anything else" entry below.
TODO();
}
SWITCH_TO(BeforeAttributeName);
}
ON('/')
{
TODO();
if (!current_end_tag_token_is_appropriate()) {
// FIXME: Otherwise, treat it as per the "anything else" entry below.
TODO();
}
SWITCH_TO(SelfClosingStartTag);
}
ON('>')
{
@ -1591,7 +1676,26 @@ _StartOfFunction:
}
ON(0)
{
TODO();
PARSE_ERROR();
EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{
EMIT_EOF;
}
ANYTHING_ELSE
{
EMIT_CURRENT_CHARACTER;
}
}
END_STATE
BEGIN_STATE(PLAINTEXT)
{
ON(0)
{
PARSE_ERROR();
EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{