瀏覽代碼

LibWeb: Implement more of the tokenizer states

Slowly adding more unimplemented options for tokenizer states.
TheDumpap 5 年之前
父節點
當前提交
d92c9d3772
共有 1 個文件被更改,包括 112 次插入8 次删除
  1. 112 8
      Libraries/LibWeb/Parser/HTMLTokenizer.cpp

+ 112 - 8
Libraries/LibWeb/Parser/HTMLTokenizer.cpp

@@ -230,6 +230,11 @@ _StartOfFunction:
                 {
                     SWITCH_TO(TagOpen);
                 }
+                ON(0)
+                {
+                    PARSE_ERROR();
+                    EMIT_CURRENT_CHARACTER;
+                }
                 ON_EOF
                 {
                     EMIT_EOF;
@@ -257,12 +262,20 @@ _StartOfFunction:
                     RECONSUME_IN(TagName);
                 }
                 ON('?')
+                {
+                    PARSE_ERROR();
+                    create_new_token(HTMLToken::Type::Comment);
+                    RECONSUME_IN(BogusComment);
+                }
+                ON_EOF
                 {
                     TODO();
                 }
                 ANYTHING_ELSE
                 {
-                    TODO();
+                    PARSE_ERROR();
+                    EMIT_CHARACTER('<');
+                    RECONSUME_IN(Data);
                 }
             }
             END_STATE
@@ -281,6 +294,22 @@ _StartOfFunction:
                 {
                     SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
                 }
+                ON_ASCII_UPPER_ALPHA
+                {
+                    m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
+                    continue;
+                }
+                ON(0)
+                {
+                    PARSE_ERROR();
+                    m_current_token.m_tag.tag_name.append("\uFFFD");
+                    continue;
+                }
+                ON_EOF
+                {
+                    PARSE_ERROR();
+                    EMIT_EOF;
+                }
                 ANYTHING_ELSE
                 {
                     m_current_token.m_tag.tag_name.append(current_input_character.value());
@@ -296,6 +325,23 @@ _StartOfFunction:
                     create_new_token(HTMLToken::Type::EndTag);
                     RECONSUME_IN(TagName);
                 }
+                ON('>')
+                {
+                    PARSE_ERROR();
+                    SWITCH_TO(Data);
+                }
+                ON_EOF
+                {
+                    PARSE_ERROR();
+                    // FIXME: Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token.
+                    continue;
+                }
+                ANYTHING_ELSE
+                {
+                    PARSE_ERROR();
+                    create_new_token(HTMLToken::Type::Comment);
+                    RECONSUME_IN(BogusComment);
+                }
             }
             END_STATE
 
@@ -312,6 +358,27 @@ _StartOfFunction:
             }
             END_STATE
 
+            BEGIN_STATE(BogusComment)
+            {
+                ON('>')
+                {
+                    TODO();
+                }
+                ON_EOF
+                {
+                    TODO();
+                }
+                ON(0)
+                {
+                    TODO();
+                }
+                ANYTHING_ELSE
+                {
+                    TODO();
+                }
+            }
+            END_STATE
+
             BEGIN_STATE(DOCTYPE)
             {
                 ON_WHITESPACE
@@ -1415,7 +1482,8 @@ _StartOfFunction:
                 }
                 ON(0)
                 {
-                    TODO();
+                    PARSE_ERROR();
+                    EMIT_CHARACTER("\uFFFD");
                 }
                 ON_EOF
                 {
@@ -1462,11 +1530,19 @@ _StartOfFunction:
             {
                 ON_WHITESPACE
                 {
-                    TODO();
+                    if (!current_end_tag_token_is_appropriate()) {
+                        // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                        TODO();
+                    }
+                    SWITCH_TO(BeforeAttributeName);
                 }
                 ON('/')
                 {
-                    TODO();
+                    if (!current_end_tag_token_is_appropriate()) {
+                        // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                        TODO();
+                    }
+                    SWITCH_TO(SelfClosingStartTag);
                 }
                 ON('>')
                 {
@@ -1503,7 +1579,8 @@ _StartOfFunction:
                 }
                 ON(0)
                 {
-                    TODO();
+                    PARSE_ERROR();
+                    EMIT_CHARACTER("\uFFFD");
                 }
                 ON_EOF
                 {
@@ -1550,11 +1627,19 @@ _StartOfFunction:
             {
                 ON_WHITESPACE
                 {
-                    TODO();
+                    if (!current_end_tag_token_is_appropriate()) {
+                        // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                        TODO();
+                    }
+                    SWITCH_TO(BeforeAttributeName);
                 }
                 ON('/')
                 {
-                    TODO();
+                    if (!current_end_tag_token_is_appropriate()) {
+                        // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                        TODO();
+                    }
+                    SWITCH_TO(SelfClosingStartTag);
                 }
                 ON('>')
                 {
@@ -1591,7 +1676,26 @@ _StartOfFunction:
                 }
                 ON(0)
                 {
-                    TODO();
+                    PARSE_ERROR();
+                    EMIT_CHARACTER("\uFFFD");
+                }
+                ON_EOF
+                {
+                    EMIT_EOF;
+                }
+                ANYTHING_ELSE
+                {
+                    EMIT_CURRENT_CHARACTER;
+                }
+            }
+            END_STATE
+
+            BEGIN_STATE(PLAINTEXT)
+            {
+                ON(0)
+                {
+                    PARSE_ERROR();
+                    EMIT_CHARACTER("\uFFFD");
                 }
                 ON_EOF
                 {