5 years ago · fd1b31d0ff
--- a/Libraries/LibWeb/CMakeLists.txt
+++ b/Libraries/LibWeb/CMakeLists.txt
@@ -84,7 +84,9 @@ set(SOURCES
 
				     Layout/LineBox.cpp
			
 
				     Layout/LineBoxFragment.cpp
			
 
				     Parser/CSSParser.cpp
			
 
				+    Parser/HTMLDocumentParser.cpp
			
 
				     Parser/HTMLParser.cpp
			
 
				+    Parser/HTMLToken.cpp
			
 
				     Parser/HTMLTokenizer.cpp
			
 
				     ResourceLoader.cpp
			
 
				     StylePropertiesModel.cpp
			
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
@@ -0,0 +1,263 @@
 
				+/*
			
 
				+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * Redistribution and use in source and binary forms, with or without
			
 
				+ * modification, are permitted provided that the following conditions are met:
			
 
				+ *
			
 
				+ * 1. Redistributions of source code must retain the above copyright notice, this
			
 
				+ *    list of conditions and the following disclaimer.
			
 
				+ *
			
 
				+ * 2. Redistributions in binary form must reproduce the above copyright notice,
			
 
				+ *    this list of conditions and the following disclaimer in the documentation
			
 
				+ *    and/or other materials provided with the distribution.
			
 
				+ *
			
 
				+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
			
 
				+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
			
 
				+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
			
 
				+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
			
 
				+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+ */
			
 
				+
			
 
				+#include <LibWeb/DOM/Document.h>
			
 
				+#include <LibWeb/DOM/DocumentType.h>
			
 
				+#include <LibWeb/DOM/ElementFactory.h>
			
 
				+#include <LibWeb/DOM/HTMLFormElement.h>
			
 
				+#include <LibWeb/DOM/HTMLHeadElement.h>
			
 
				+#include <LibWeb/Parser/HTMLDocumentParser.h>
			
 
				+#include <LibWeb/Parser/HTMLToken.h>
			
 
				+
			
 
				+namespace Web {
			
 
				+
			
 
				+HTMLDocumentParser::HTMLDocumentParser(const StringView& input)
			
 
				+    : m_tokenizer(input)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+HTMLDocumentParser::~HTMLDocumentParser()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::run()
			
 
				+{
			
 
				+    m_document = adopt(*new Document);
			
 
				+
			
 
				+    for (;;) {
			
 
				+        auto optional_token = m_tokenizer.next_token();
			
 
				+        if (!optional_token.has_value())
			
 
				+            return;
			
 
				+        auto& token = optional_token.value();
			
 
				+
			
 
				+        dbg() << "[" << insertion_mode_name() << "] " << token.to_string();
			
 
				+
			
 
				+        if (token.type() == HTMLToken::Type::EndOfFile)
			
 
				+            return;
			
 
				+
			
 
				+        switch (m_insertion_mode) {
			
 
				+        case InsertionMode::Initial:
			
 
				+            handle_initial(token);
			
 
				+            break;
			
 
				+        case InsertionMode::BeforeHTML:
			
 
				+            handle_before_html(token);
			
 
				+            break;
			
 
				+        case InsertionMode::BeforeHead:
			
 
				+            handle_before_head(token);
			
 
				+            break;
			
 
				+        case InsertionMode::InHead:
			
 
				+            handle_in_head(token);
			
 
				+            break;
			
 
				+        case InsertionMode::InHeadNoscript:
			
 
				+            handle_in_head_noscript(token);
			
 
				+            break;
			
 
				+        case InsertionMode::AfterHead:
			
 
				+            handle_after_head(token);
			
 
				+            break;
			
 
				+        case InsertionMode::InBody:
			
 
				+            handle_in_body(token);
			
 
				+            break;
			
 
				+        case InsertionMode::Text:
			
 
				+            handle_text(token);
			
 
				+            break;
			
 
				+        default:
			
 
				+            ASSERT_NOT_REACHED();
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_initial(HTMLToken& token)
			
 
				+{
			
 
				+    if (token.type() == HTMLToken::Type::DOCTYPE) {
			
 
				+        auto doctype = adopt(*new DocumentType(document()));
			
 
				+        doctype->set_name(token.m_doctype.name.to_string());
			
 
				+        document().append_child(move(doctype));
			
 
				+        m_insertion_mode = InsertionMode::BeforeHTML;
			
 
				+        return;
			
 
				+    }
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_before_html(HTMLToken& token)
			
 
				+{
			
 
				+    if (token.is_start_tag() && token.tag_name() == "html") {
			
 
				+        auto element = create_element_for(token);
			
 
				+        document().append_child(element);
			
 
				+        m_stack_of_open_elements.append(element);
			
 
				+        m_insertion_mode = InsertionMode::BeforeHead;
			
 
				+        return;
			
 
				+    }
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+NonnullRefPtr<Node> HTMLDocumentParser::current_node()
			
 
				+{
			
 
				+    return m_stack_of_open_elements.last();
			
 
				+}
			
 
				+
			
 
				+RefPtr<Node> HTMLDocumentParser::find_appropriate_place_for_inserting_node()
			
 
				+{
			
 
				+    auto target = current_node();
			
 
				+    if (m_foster_parenting) {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+    return target;
			
 
				+}
			
 
				+
			
 
				+NonnullRefPtr<Element> HTMLDocumentParser::create_element_for(HTMLToken& token)
			
 
				+{
			
 
				+    auto element = create_element(document(), token.tag_name());
			
 
				+    for (auto& attribute : token.m_tag.attributes) {
			
 
				+        element->set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
			
 
				+    }
			
 
				+    return element;
			
 
				+}
			
 
				+
			
 
				+RefPtr<Element> HTMLDocumentParser::insert_html_element(HTMLToken& token)
			
 
				+{
			
 
				+    auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
			
 
				+    auto element = create_element_for(token);
			
 
				+    // FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
			
 
				+    adjusted_insertion_location->append_child(element);
			
 
				+    m_stack_of_open_elements.append(element);
			
 
				+    return element;
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_before_head(HTMLToken& token)
			
 
				+{
			
 
				+    if (token.is_start_tag() && token.tag_name() == "head") {
			
 
				+        auto element = insert_html_element(token);
			
 
				+        m_head_element = to<HTMLHeadElement>(element);
			
 
				+        m_insertion_mode = InsertionMode::InHead;
			
 
				+        return;
			
 
				+    }
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_in_head(HTMLToken& token)
			
 
				+{
			
 
				+    if (token.is_start_tag() && token.tag_name() == "meta") {
			
 
				+        auto element = insert_html_element(token);
			
 
				+        m_stack_of_open_elements.take_last();
			
 
				+        if (token.is_self_closing()) {
			
 
				+            ASSERT_NOT_REACHED();
			
 
				+        }
			
 
				+        return;
			
 
				+    }
			
 
				+    if (token.is_end_tag() && token.tag_name() == "head") {
			
 
				+        m_stack_of_open_elements.take_last();
			
 
				+        m_insertion_mode = InsertionMode::AfterHead;
			
 
				+        return;
			
 
				+    }
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&)
			
 
				+{
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_after_head(HTMLToken& token)
			
 
				+{
			
 
				+    if (token.is_character()) {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_comment()) {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_doctype()) {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_start_tag() && token.tag_name() == "html") {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_start_tag() && token.tag_name() == "body") {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_start_tag() && token.tag_name() == "frameset") {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    {
			
 
				+        Vector<String> names = { "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "template", "title" };
			
 
				+        if (token.is_end_tag() && names.contains_slow(token.tag_name())) {
			
 
				+            ASSERT_NOT_REACHED();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_end_tag() && token.tag_name() == "template") {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    if (token.is_end_tag() && (token.tag_name() == "body" || token.tag_name() == "html" || token.tag_name() == "br")) {
			
 
				+        goto AnythingElse;
			
 
				+    }
			
 
				+
			
 
				+    if ((token.is_start_tag() && token.tag_name() == "head") || token.is_end_tag()) {
			
 
				+        ASSERT_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+AnythingElse:
			
 
				+    HTMLToken fake_body_token;
			
 
				+    fake_body_token.m_type = HTMLToken::Type::StartTag;
			
 
				+    fake_body_token.m_tag.tag_name.append("body");
			
 
				+    insert_html_element(fake_body_token);
			
 
				+    m_insertion_mode = InsertionMode::InBody;
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_in_body(HTMLToken&)
			
 
				+{
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+void HTMLDocumentParser::handle_text(HTMLToken&)
			
 
				+{
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+const char* HTMLDocumentParser::insertion_mode_name() const
			
 
				+{
			
 
				+    switch (m_insertion_mode) {
			
 
				+#define __ENUMERATE_INSERTION_MODE(mode) \
			
 
				+    case InsertionMode::mode:            \
			
 
				+        return #mode;
			
 
				+        ENUMERATE_INSERTION_MODES
			
 
				+#undef __ENUMERATE_INSERTION_MODE
			
 
				+    }
			
 
				+    ASSERT_NOT_REACHED();
			
 
				+}
			
 
				+
			
 
				+Document& HTMLDocumentParser::document()
			
 
				+{
			
 
				+    return *m_document;
			
 
				+}
			
 
				+
			
 
				+}
			
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h
@@ -0,0 +1,106 @@
 
				+/*
			
 
				+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * Redistribution and use in source and binary forms, with or without
			
 
				+ * modification, are permitted provided that the following conditions are met:
			
 
				+ *
			
 
				+ * 1. Redistributions of source code must retain the above copyright notice, this
			
 
				+ *    list of conditions and the following disclaimer.
			
 
				+ *
			
 
				+ * 2. Redistributions in binary form must reproduce the above copyright notice,
			
 
				+ *    this list of conditions and the following disclaimer in the documentation
			
 
				+ *    and/or other materials provided with the distribution.
			
 
				+ *
			
 
				+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
			
 
				+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
			
 
				+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
			
 
				+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
			
 
				+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+ */
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AK/NonnullRefPtrVector.h>
			
 
				+#include <LibWeb/DOM/Node.h>
			
 
				+#include <LibWeb/Parser/HTMLTokenizer.h>
			
 
				+
			
 
				+#define ENUMERATE_INSERTION_MODES               \
			
 
				+    __ENUMERATE_INSERTION_MODE(Initial)         \
			
 
				+    __ENUMERATE_INSERTION_MODE(BeforeHTML)      \
			
 
				+    __ENUMERATE_INSERTION_MODE(BeforeHead)      \
			
 
				+    __ENUMERATE_INSERTION_MODE(InHead)          \
			
 
				+    __ENUMERATE_INSERTION_MODE(InHeadNoscript)  \
			
 
				+    __ENUMERATE_INSERTION_MODE(AfterHead)       \
			
 
				+    __ENUMERATE_INSERTION_MODE(InBody)          \
			
 
				+    __ENUMERATE_INSERTION_MODE(Text)            \
			
 
				+    __ENUMERATE_INSERTION_MODE(InTable)         \
			
 
				+    __ENUMERATE_INSERTION_MODE(InTableText)     \
			
 
				+    __ENUMERATE_INSERTION_MODE(InCaption)       \
			
 
				+    __ENUMERATE_INSERTION_MODE(InColumnGroup)   \
			
 
				+    __ENUMERATE_INSERTION_MODE(InTableBody)     \
			
 
				+    __ENUMERATE_INSERTION_MODE(InRow)           \
			
 
				+    __ENUMERATE_INSERTION_MODE(InCell)          \
			
 
				+    __ENUMERATE_INSERTION_MODE(InSelect)        \
			
 
				+    __ENUMERATE_INSERTION_MODE(InSelectInTable) \
			
 
				+    __ENUMERATE_INSERTION_MODE(InTemplate)      \
			
 
				+    __ENUMERATE_INSERTION_MODE(AfterBody)       \
			
 
				+    __ENUMERATE_INSERTION_MODE(InFrameset)      \
			
 
				+    __ENUMERATE_INSERTION_MODE(AfterFrameset)   \
			
 
				+    __ENUMERATE_INSERTION_MODE(AfterAfterBody)  \
			
 
				+    __ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
			
 
				+
			
 
				+namespace Web {
			
 
				+
			
 
				+class HTMLDocumentParser {
			
 
				+public:
			
 
				+    explicit HTMLDocumentParser(const StringView& input);
			
 
				+    ~HTMLDocumentParser();
			
 
				+
			
 
				+    void run();
			
 
				+
			
 
				+    Document& document();
			
 
				+
			
 
				+    enum class InsertionMode {
			
 
				+#define __ENUMERATE_INSERTION_MODE(mode) mode,
			
 
				+        ENUMERATE_INSERTION_MODES
			
 
				+#undef __ENUMERATE_INSERTION_MODE
			
 
				+    };
			
 
				+
			
 
				+    InsertionMode insertion_mode() const { return m_insertion_mode; }
			
 
				+
			
 
				+private:
			
 
				+    const char* insertion_mode_name() const;
			
 
				+
			
 
				+    void handle_initial(HTMLToken&);
			
 
				+    void handle_before_html(HTMLToken&);
			
 
				+    void handle_before_head(HTMLToken&);
			
 
				+    void handle_in_head(HTMLToken&);
			
 
				+    void handle_in_head_noscript(HTMLToken&);
			
 
				+    void handle_after_head(HTMLToken&);
			
 
				+    void handle_in_body(HTMLToken&);
			
 
				+    void handle_text(HTMLToken&);
			
 
				+
			
 
				+    NonnullRefPtr<Element> create_element_for(HTMLToken&);
			
 
				+    RefPtr<Node> find_appropriate_place_for_inserting_node();
			
 
				+    RefPtr<Element> insert_html_element(HTMLToken&);
			
 
				+    NonnullRefPtr<Node> current_node();
			
 
				+
			
 
				+    InsertionMode m_insertion_mode { InsertionMode::Initial };
			
 
				+    NonnullRefPtrVector<Node> m_stack_of_open_elements;
			
 
				+
			
 
				+    HTMLTokenizer m_tokenizer;
			
 
				+
			
 
				+    bool m_foster_parenting { false };
			
 
				+
			
 
				+    RefPtr<Document> m_document;
			
 
				+    RefPtr<HTMLHeadElement> m_head_element;
			
 
				+    RefPtr<HTMLFormElement> m_form_element;
			
 
				+};
			
 
				+
			
 
				+}
			
--- a/Libraries/LibWeb/Parser/HTMLToken.cpp
+++ b/Libraries/LibWeb/Parser/HTMLToken.cpp
@@ -0,0 +1,84 @@
 
				+/*
			
 
				+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * Redistribution and use in source and binary forms, with or without
			
 
				+ * modification, are permitted provided that the following conditions are met:
			
 
				+ *
			
 
				+ * 1. Redistributions of source code must retain the above copyright notice, this
			
 
				+ *    list of conditions and the following disclaimer.
			
 
				+ *
			
 
				+ * 2. Redistributions in binary form must reproduce the above copyright notice,
			
 
				+ *    this list of conditions and the following disclaimer in the documentation
			
 
				+ *    and/or other materials provided with the distribution.
			
 
				+ *
			
 
				+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
			
 
				+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
			
 
				+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
			
 
				+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
			
 
				+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+ */
			
 
				+
			
 
				+#include <LibWeb/Parser/HTMLToken.h>
			
 
				+
			
 
				+namespace Web {
			
 
				+
			
 
				+String HTMLToken::to_string() const
			
 
				+{
			
 
				+    StringBuilder builder;
			
 
				+
			
 
				+    switch (type()) {
			
 
				+    case HTMLToken::Type::DOCTYPE:
			
 
				+        builder.append("DOCTYPE");
			
 
				+        builder.append(" { name: '");
			
 
				+        builder.append(m_doctype.name.to_string());
			
 
				+        builder.append("' }");
			
 
				+        break;
			
 
				+    case HTMLToken::Type::StartTag:
			
 
				+        builder.append("StartTag");
			
 
				+        break;
			
 
				+    case HTMLToken::Type::EndTag:
			
 
				+        builder.append("EndTag");
			
 
				+        break;
			
 
				+    case HTMLToken::Type::Comment:
			
 
				+        builder.append("Comment");
			
 
				+        break;
			
 
				+    case HTMLToken::Type::Character:
			
 
				+        builder.append("Character");
			
 
				+        break;
			
 
				+    case HTMLToken::Type::EndOfFile:
			
 
				+        builder.append("EndOfFile");
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
			
 
				+        builder.append(" { name: '");
			
 
				+        builder.append(m_tag.tag_name.to_string());
			
 
				+        builder.append("', { ");
			
 
				+        for (auto& attribute : m_tag.attributes) {
			
 
				+            builder.append(attribute.name_builder.to_string());
			
 
				+            builder.append("=\"");
			
 
				+            builder.append(attribute.value_builder.to_string());
			
 
				+            builder.append("\" ");
			
 
				+        }
			
 
				+        builder.append("} }");
			
 
				+    }
			
 
				+
			
 
				+    if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) {
			
 
				+        builder.append(" { data: '");
			
 
				+        builder.append(m_comment_or_character.data.to_string());
			
 
				+        builder.append(" }");
			
 
				+    }
			
 
				+
			
 
				+    return builder.to_string();
			
 
				+
			
 
				+    //dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
			
 
				+    //m_current_token = {};
			
 
				+}
			
 
				+
			
 
				+}
			
--- a/Libraries/LibWeb/Parser/HTMLToken.h
+++ b/Libraries/LibWeb/Parser/HTMLToken.h
@@ -34,6 +34,7 @@
 
				 namespace Web {
			
 
				 
			
 
				 class HTMLToken {
			
 
				+    friend class HTMLDocumentParser;
			
 
				     friend class HTMLTokenizer;
			
 
				 
			
 
				 public:
			
@@ -46,8 +47,29 @@ public:
 
				         EndOfFile,
			
 
				     };
			
 
				 
			
 
				+    bool is_doctype() const { return m_type == Type::DOCTYPE; }
			
 
				+    bool is_start_tag() const { return m_type == Type::StartTag; }
			
 
				+    bool is_end_tag() const { return m_type == Type::EndTag; }
			
 
				+    bool is_comment() const { return m_type == Type::Comment; }
			
 
				+    bool is_character() const { return m_type == Type::Character; }
			
 
				+    bool is_end_of_file() const { return m_type == Type::EndOfFile; }
			
 
				+
			
 
				+    String tag_name() const
			
 
				+    {
			
 
				+        ASSERT(is_start_tag() || is_end_tag());
			
 
				+        return m_tag.tag_name.to_string();
			
 
				+    }
			
 
				+
			
 
				+    bool is_self_closing() const
			
 
				+    {
			
 
				+        ASSERT(is_start_tag() || is_end_tag());
			
 
				+        return m_tag.self_closing;
			
 
				+    }
			
 
				+
			
 
				     Type type() const { return m_type; }
			
 
				 
			
 
				+    String to_string() const;
			
 
				+
			
 
				 private:
			
 
				     struct AttributeBuilder {
			
 
				         StringBuilder name_builder;
			
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -28,6 +28,8 @@
 
				 #include <LibWeb/Parser/HTMLTokenizer.h>
			
 
				 #include <ctype.h>
			
 
				 
			
 
				+#pragma GCC diagnostic ignored "-Wunused-label"
			
 
				+
			
 
				 //#define TOKENIZER_TRACE
			
 
				 
			
 
				 #define TODO()                                                                                              \
			
@@ -47,6 +49,11 @@
 
				     m_state = State::new_state;          \
			
 
				     goto new_state;
			
 
				 
			
 
				+#define SWITCH_TO_AND_EMIT_CURRENT_TOKEN(new_state) \
			
 
				+    will_switch_to(State::new_state);               \
			
 
				+    m_state = State::new_state;                     \
			
 
				+    return m_current_token;
			
 
				+
			
 
				 #define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
			
 
				 
			
 
				 #define ON(codepoint) \
			
@@ -66,10 +73,12 @@
 
				 
			
 
				 #define ANYTHING_ELSE if (1)
			
 
				 
			
 
				-#define EMIT_EOF_AND_RETURN                       \
			
 
				+#define EMIT_EOF                                  \
			
 
				     create_new_token(HTMLToken::Type::EndOfFile); \
			
 
				-    emit_current_token();                         \
			
 
				-    return;
			
 
				+    return m_current_token;
			
 
				+
			
 
				+#define EMIT_CURRENT_TOKEN \
			
 
				+    return m_current_token;
			
 
				 
			
 
				 #define BEGIN_STATE(state) \
			
 
				     state:                 \
			
@@ -100,7 +109,7 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
 
				     return m_input[m_cursor + offset];
			
 
				 }
			
 
				 
			
 
				-void HTMLTokenizer::run()
			
 
				+Optional<HTMLToken> HTMLTokenizer::next_token()
			
 
				 {
			
 
				     for (;;) {
			
 
				         auto current_input_character = next_codepoint();
			
@@ -118,7 +127,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON_EOF
			
 
				                 {
			
 
				-                    EMIT_EOF_AND_RETURN;
			
 
				+                    EMIT_EOF;
			
 
				                 }
			
 
				                 ANYTHING_ELSE
			
 
				                 {
			
@@ -168,8 +177,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ANYTHING_ELSE
			
 
				                 {
			
@@ -266,8 +274,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ON_ASCII_UPPER_ALPHA
			
 
				                 {
			
@@ -297,8 +304,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ON_EOF
			
 
				                 {
			
@@ -473,8 +479,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ON(0)
			
 
				                 {
			
@@ -504,8 +509,7 @@ void HTMLTokenizer::run()
 
				                 }
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ON_EOF
			
 
				                 {
			
@@ -588,8 +592,7 @@ void HTMLTokenizer::run()
 
				             {
			
 
				                 ON('>')
			
 
				                 {
			
 
				-                    emit_current_token();
			
 
				-                    SWITCH_TO(Data);
			
 
				+                    SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
			
 
				                 }
			
 
				                 ON('!')
			
 
				                 {
			
@@ -741,57 +744,6 @@ bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
 
				     return true;
			
 
				 }
			
 
				 
			
 
				-void HTMLTokenizer::emit_current_token()
			
 
				-{
			
 
				-    StringBuilder builder;
			
 
				-
			
 
				-    switch (m_current_token.type()) {
			
 
				-    case HTMLToken::Type::DOCTYPE:
			
 
				-        builder.append("DOCTYPE");
			
 
				-        builder.append(" { name: '");
			
 
				-        builder.append(m_current_token.m_doctype.name.to_string());
			
 
				-        builder.append("' }");
			
 
				-        break;
			
 
				-    case HTMLToken::Type::StartTag:
			
 
				-        builder.append("StartTag");
			
 
				-        break;
			
 
				-    case HTMLToken::Type::EndTag:
			
 
				-        builder.append("EndTag");
			
 
				-        break;
			
 
				-    case HTMLToken::Type::Comment:
			
 
				-        builder.append("Comment");
			
 
				-        break;
			
 
				-    case HTMLToken::Type::Character:
			
 
				-        builder.append("Character");
			
 
				-        break;
			
 
				-    case HTMLToken::Type::EndOfFile:
			
 
				-        builder.append("EndOfFile");
			
 
				-        break;
			
 
				-    }
			
 
				-
			
 
				-    if (m_current_token.type() == HTMLToken::Type::StartTag || m_current_token.type() == HTMLToken::Type::EndTag) {
			
 
				-        builder.append(" { name: '");
			
 
				-        builder.append(m_current_token.m_tag.tag_name.to_string());
			
 
				-        builder.append("', { ");
			
 
				-        for (auto& attribute : m_current_token.m_tag.attributes) {
			
 
				-            builder.append(attribute.name_builder.to_string());
			
 
				-            builder.append("=\"");
			
 
				-            builder.append(attribute.value_builder.to_string());
			
 
				-            builder.append("\" ");
			
 
				-        }
			
 
				-        builder.append("} }");
			
 
				-    }
			
 
				-
			
 
				-    if (m_current_token.type() == HTMLToken::Type::Comment || m_current_token.type() == HTMLToken::Type::Character) {
			
 
				-        builder.append(" { data: '");
			
 
				-        builder.append(m_current_token.m_comment_or_character.data.to_string());
			
 
				-        builder.append(" }");
			
 
				-    }
			
 
				-
			
 
				-    dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
			
 
				-    m_current_token = {};
			
 
				-}
			
 
				-
			
 
				 void HTMLTokenizer::create_new_token(HTMLToken::Type type)
			
 
				 {
			
 
				     flush_current_character_or_comment_if_needed();
			
@@ -822,8 +774,8 @@ void HTMLTokenizer::will_reconsume_in([[maybe_unused]] State new_state)
 
				 
			
 
				 void HTMLTokenizer::flush_current_character_or_comment_if_needed()
			
 
				 {
			
 
				-    if (m_current_token.type() == HTMLToken::Type::Character || m_current_token.type() == HTMLToken::Type::Comment)
			
 
				-        emit_current_token();
			
 
				+    //if (m_current_token.type() == HTMLToken::Type::Character || m_current_token.type() == HTMLToken::Type::Comment)
			
 
				+//        emit_current_token();
			
 
				 }
			
 
				 
			
 
				 }
			
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.h
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h
@@ -118,14 +118,13 @@ class HTMLTokenizer {
 
				 public:
			
 
				     explicit HTMLTokenizer(const StringView& input);
			
 
				 
			
 
				-    void run();
			
 
				+    Optional<HTMLToken> next_token();
			
 
				 
			
 
				 private:
			
 
				     Optional<u32> next_codepoint();
			
 
				     Optional<u32> peek_codepoint(size_t offset) const;
			
 
				     bool next_few_characters_are(const StringView&) const;
			
 
				     void consume(const StringView&);
			
 
				-    void emit_current_token();
			
 
				     void create_new_token(HTMLToken::Type);
			
 
				 
			
 
				     enum class State {
			
--- a/Userland/ht.cpp
+++ b/Userland/ht.cpp
@@ -24,13 +24,19 @@
 
				  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				  */
			
 
				 
			
 
				-#include <LibWeb/Parser/HTMLTokenizer.h>
			
 
				-#include <LibCore/File.h>
			
 
				 #include <AK/ByteBuffer.h>
			
 
				 #include <AK/LogStream.h>
			
 
				+#include <LibCore/EventLoop.h>
			
 
				+#include <LibCore/File.h>
			
 
				+#include <LibWeb/DOM/Document.h>
			
 
				+#include <LibWeb/Dump.h>
			
 
				+#include <LibWeb/Parser/HTMLDocumentParser.h>
			
 
				+#include <LibWeb/Parser/HTMLTokenizer.h>
			
 
				 
			
 
				 int main(int argc, char** argv)
			
 
				 {
			
 
				+    Core::EventLoop loop;
			
 
				+
			
 
				     // This is a temporary test program to aid with bringing up the new HTML parser. :^)
			
 
				     const char* input_path = "/home/anon/www/simple.html";
			
 
				     if (argc > 1)
			
@@ -40,7 +46,12 @@ int main(int argc, char** argv)
 
				     if (file_or_error.is_error())
			
 
				         return 1;
			
 
				     auto contents = file_or_error.value()->read_all();
			
 
				-    Web::HTMLTokenizer tokenizer(contents);
			
 
				-    tokenizer.run();
			
 
				+
			
 
				+    Web::HTMLDocumentParser parser(contents);
			
 
				+    parser.run();
			
 
				+
			
 
				+    auto& document = parser.document();
			
 
				+    Web::dump_tree(document);
			
 
				+
			
 
				     return 0;
			
 
				 }