LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!
This commit is contained in:
Andreas Kling 2020-05-24 00:14:23 +02:00
parent 0b61e21873
commit fd1b31d0ff
Notes: sideshowbarker 2024-07-19 06:12:03 +09:00
8 changed files with 515 additions and 76 deletions

View file

@ -84,7 +84,9 @@ set(SOURCES
Layout/LineBox.cpp
Layout/LineBoxFragment.cpp
Parser/CSSParser.cpp
Parser/HTMLDocumentParser.cpp
Parser/HTMLParser.cpp
Parser/HTMLToken.cpp
Parser/HTMLTokenizer.cpp
ResourceLoader.cpp
StylePropertiesModel.cpp

View file

@ -0,0 +1,263 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/DocumentType.h>
#include <LibWeb/DOM/ElementFactory.h>
#include <LibWeb/DOM/HTMLFormElement.h>
#include <LibWeb/DOM/HTMLHeadElement.h>
#include <LibWeb/Parser/HTMLDocumentParser.h>
#include <LibWeb/Parser/HTMLToken.h>
namespace Web {
HTMLDocumentParser::HTMLDocumentParser(const StringView& input)
: m_tokenizer(input)
{
}
HTMLDocumentParser::~HTMLDocumentParser()
{
}
void HTMLDocumentParser::run()
{
m_document = adopt(*new Document);
for (;;) {
auto optional_token = m_tokenizer.next_token();
if (!optional_token.has_value())
return;
auto& token = optional_token.value();
dbg() << "[" << insertion_mode_name() << "] " << token.to_string();
if (token.type() == HTMLToken::Type::EndOfFile)
return;
switch (m_insertion_mode) {
case InsertionMode::Initial:
handle_initial(token);
break;
case InsertionMode::BeforeHTML:
handle_before_html(token);
break;
case InsertionMode::BeforeHead:
handle_before_head(token);
break;
case InsertionMode::InHead:
handle_in_head(token);
break;
case InsertionMode::InHeadNoscript:
handle_in_head_noscript(token);
break;
case InsertionMode::AfterHead:
handle_after_head(token);
break;
case InsertionMode::InBody:
handle_in_body(token);
break;
case InsertionMode::Text:
handle_text(token);
break;
default:
ASSERT_NOT_REACHED();
}
}
}
void HTMLDocumentParser::handle_initial(HTMLToken& token)
{
if (token.type() == HTMLToken::Type::DOCTYPE) {
auto doctype = adopt(*new DocumentType(document()));
doctype->set_name(token.m_doctype.name.to_string());
document().append_child(move(doctype));
m_insertion_mode = InsertionMode::BeforeHTML;
return;
}
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::handle_before_html(HTMLToken& token)
{
if (token.is_start_tag() && token.tag_name() == "html") {
auto element = create_element_for(token);
document().append_child(element);
m_stack_of_open_elements.append(element);
m_insertion_mode = InsertionMode::BeforeHead;
return;
}
ASSERT_NOT_REACHED();
}
NonnullRefPtr<Node> HTMLDocumentParser::current_node()
{
return m_stack_of_open_elements.last();
}
RefPtr<Node> HTMLDocumentParser::find_appropriate_place_for_inserting_node()
{
auto target = current_node();
if (m_foster_parenting) {
ASSERT_NOT_REACHED();
}
return target;
}
NonnullRefPtr<Element> HTMLDocumentParser::create_element_for(HTMLToken& token)
{
auto element = create_element(document(), token.tag_name());
for (auto& attribute : token.m_tag.attributes) {
element->set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string());
}
return element;
}
RefPtr<Element> HTMLDocumentParser::insert_html_element(HTMLToken& token)
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
auto element = create_element_for(token);
// FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
adjusted_insertion_location->append_child(element);
m_stack_of_open_elements.append(element);
return element;
}
void HTMLDocumentParser::handle_before_head(HTMLToken& token)
{
if (token.is_start_tag() && token.tag_name() == "head") {
auto element = insert_html_element(token);
m_head_element = to<HTMLHeadElement>(element);
m_insertion_mode = InsertionMode::InHead;
return;
}
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::handle_in_head(HTMLToken& token)
{
if (token.is_start_tag() && token.tag_name() == "meta") {
auto element = insert_html_element(token);
m_stack_of_open_elements.take_last();
if (token.is_self_closing()) {
ASSERT_NOT_REACHED();
}
return;
}
if (token.is_end_tag() && token.tag_name() == "head") {
m_stack_of_open_elements.take_last();
m_insertion_mode = InsertionMode::AfterHead;
return;
}
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&)
{
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::handle_after_head(HTMLToken& token)
{
if (token.is_character()) {
ASSERT_NOT_REACHED();
}
if (token.is_comment()) {
ASSERT_NOT_REACHED();
}
if (token.is_doctype()) {
ASSERT_NOT_REACHED();
}
if (token.is_start_tag() && token.tag_name() == "html") {
ASSERT_NOT_REACHED();
}
if (token.is_start_tag() && token.tag_name() == "body") {
ASSERT_NOT_REACHED();
}
if (token.is_start_tag() && token.tag_name() == "frameset") {
ASSERT_NOT_REACHED();
}
{
Vector<String> names = { "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "template", "title" };
if (token.is_end_tag() && names.contains_slow(token.tag_name())) {
ASSERT_NOT_REACHED();
}
}
if (token.is_end_tag() && token.tag_name() == "template") {
ASSERT_NOT_REACHED();
}
if (token.is_end_tag() && (token.tag_name() == "body" || token.tag_name() == "html" || token.tag_name() == "br")) {
goto AnythingElse;
}
if ((token.is_start_tag() && token.tag_name() == "head") || token.is_end_tag()) {
ASSERT_NOT_REACHED();
}
AnythingElse:
HTMLToken fake_body_token;
fake_body_token.m_type = HTMLToken::Type::StartTag;
fake_body_token.m_tag.tag_name.append("body");
insert_html_element(fake_body_token);
m_insertion_mode = InsertionMode::InBody;
}
void HTMLDocumentParser::handle_in_body(HTMLToken&)
{
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::handle_text(HTMLToken&)
{
ASSERT_NOT_REACHED();
}
const char* HTMLDocumentParser::insertion_mode_name() const
{
switch (m_insertion_mode) {
#define __ENUMERATE_INSERTION_MODE(mode) \
case InsertionMode::mode: \
return #mode;
ENUMERATE_INSERTION_MODES
#undef __ENUMERATE_INSERTION_MODE
}
ASSERT_NOT_REACHED();
}
Document& HTMLDocumentParser::document()
{
return *m_document;
}
}

View file

@ -0,0 +1,106 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/Parser/HTMLTokenizer.h>
#define ENUMERATE_INSERTION_MODES \
__ENUMERATE_INSERTION_MODE(Initial) \
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
__ENUMERATE_INSERTION_MODE(BeforeHead) \
__ENUMERATE_INSERTION_MODE(InHead) \
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
__ENUMERATE_INSERTION_MODE(AfterHead) \
__ENUMERATE_INSERTION_MODE(InBody) \
__ENUMERATE_INSERTION_MODE(Text) \
__ENUMERATE_INSERTION_MODE(InTable) \
__ENUMERATE_INSERTION_MODE(InTableText) \
__ENUMERATE_INSERTION_MODE(InCaption) \
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
__ENUMERATE_INSERTION_MODE(InTableBody) \
__ENUMERATE_INSERTION_MODE(InRow) \
__ENUMERATE_INSERTION_MODE(InCell) \
__ENUMERATE_INSERTION_MODE(InSelect) \
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
__ENUMERATE_INSERTION_MODE(InTemplate) \
__ENUMERATE_INSERTION_MODE(AfterBody) \
__ENUMERATE_INSERTION_MODE(InFrameset) \
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
namespace Web {
class HTMLDocumentParser {
public:
explicit HTMLDocumentParser(const StringView& input);
~HTMLDocumentParser();
void run();
Document& document();
enum class InsertionMode {
#define __ENUMERATE_INSERTION_MODE(mode) mode,
ENUMERATE_INSERTION_MODES
#undef __ENUMERATE_INSERTION_MODE
};
InsertionMode insertion_mode() const { return m_insertion_mode; }
private:
const char* insertion_mode_name() const;
void handle_initial(HTMLToken&);
void handle_before_html(HTMLToken&);
void handle_before_head(HTMLToken&);
void handle_in_head(HTMLToken&);
void handle_in_head_noscript(HTMLToken&);
void handle_after_head(HTMLToken&);
void handle_in_body(HTMLToken&);
void handle_text(HTMLToken&);
NonnullRefPtr<Element> create_element_for(HTMLToken&);
RefPtr<Node> find_appropriate_place_for_inserting_node();
RefPtr<Element> insert_html_element(HTMLToken&);
NonnullRefPtr<Node> current_node();
InsertionMode m_insertion_mode { InsertionMode::Initial };
NonnullRefPtrVector<Node> m_stack_of_open_elements;
HTMLTokenizer m_tokenizer;
bool m_foster_parenting { false };
RefPtr<Document> m_document;
RefPtr<HTMLHeadElement> m_head_element;
RefPtr<HTMLFormElement> m_form_element;
};
}

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/Parser/HTMLToken.h>
namespace Web {
String HTMLToken::to_string() const
{
StringBuilder builder;
switch (type()) {
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE");
builder.append(" { name: '");
builder.append(m_doctype.name.to_string());
builder.append("' }");
break;
case HTMLToken::Type::StartTag:
builder.append("StartTag");
break;
case HTMLToken::Type::EndTag:
builder.append("EndTag");
break;
case HTMLToken::Type::Comment:
builder.append("Comment");
break;
case HTMLToken::Type::Character:
builder.append("Character");
break;
case HTMLToken::Type::EndOfFile:
builder.append("EndOfFile");
break;
}
if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
builder.append(" { name: '");
builder.append(m_tag.tag_name.to_string());
builder.append("', { ");
for (auto& attribute : m_tag.attributes) {
builder.append(attribute.name_builder.to_string());
builder.append("=\"");
builder.append(attribute.value_builder.to_string());
builder.append("\" ");
}
builder.append("} }");
}
if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) {
builder.append(" { data: '");
builder.append(m_comment_or_character.data.to_string());
builder.append(" }");
}
return builder.to_string();
//dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
//m_current_token = {};
}
}

View file

@ -34,6 +34,7 @@
namespace Web {
class HTMLToken {
friend class HTMLDocumentParser;
friend class HTMLTokenizer;
public:
@ -46,8 +47,29 @@ public:
EndOfFile,
};
bool is_doctype() const { return m_type == Type::DOCTYPE; }
bool is_start_tag() const { return m_type == Type::StartTag; }
bool is_end_tag() const { return m_type == Type::EndTag; }
bool is_comment() const { return m_type == Type::Comment; }
bool is_character() const { return m_type == Type::Character; }
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
String tag_name() const
{
ASSERT(is_start_tag() || is_end_tag());
return m_tag.tag_name.to_string();
}
bool is_self_closing() const
{
ASSERT(is_start_tag() || is_end_tag());
return m_tag.self_closing;
}
Type type() const { return m_type; }
String to_string() const;
private:
struct AttributeBuilder {
StringBuilder name_builder;

View file

@ -28,6 +28,8 @@
#include <LibWeb/Parser/HTMLTokenizer.h>
#include <ctype.h>
#pragma GCC diagnostic ignored "-Wunused-label"
//#define TOKENIZER_TRACE
#define TODO() \
@ -47,6 +49,11 @@
m_state = State::new_state; \
goto new_state;
#define SWITCH_TO_AND_EMIT_CURRENT_TOKEN(new_state) \
will_switch_to(State::new_state); \
m_state = State::new_state; \
return m_current_token;
#define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
#define ON(codepoint) \
@ -66,10 +73,12 @@
#define ANYTHING_ELSE if (1)
#define EMIT_EOF_AND_RETURN \
#define EMIT_EOF \
create_new_token(HTMLToken::Type::EndOfFile); \
emit_current_token(); \
return;
return m_current_token;
#define EMIT_CURRENT_TOKEN \
return m_current_token;
#define BEGIN_STATE(state) \
state: \
@ -100,7 +109,7 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
return m_input[m_cursor + offset];
}
void HTMLTokenizer::run()
Optional<HTMLToken> HTMLTokenizer::next_token()
{
for (;;) {
auto current_input_character = next_codepoint();
@ -118,7 +127,7 @@ void HTMLTokenizer::run()
}
ON_EOF
{
EMIT_EOF_AND_RETURN;
EMIT_EOF;
}
ANYTHING_ELSE
{
@ -168,8 +177,7 @@ void HTMLTokenizer::run()
}
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ANYTHING_ELSE
{
@ -266,8 +274,7 @@ void HTMLTokenizer::run()
}
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_ASCII_UPPER_ALPHA
{
@ -297,8 +304,7 @@ void HTMLTokenizer::run()
}
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
@ -473,8 +479,7 @@ void HTMLTokenizer::run()
}
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON(0)
{
@ -504,8 +509,7 @@ void HTMLTokenizer::run()
}
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
@ -588,8 +592,7 @@ void HTMLTokenizer::run()
{
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON('!')
{
@ -741,57 +744,6 @@ bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
return true;
}
void HTMLTokenizer::emit_current_token()
{
StringBuilder builder;
switch (m_current_token.type()) {
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE");
builder.append(" { name: '");
builder.append(m_current_token.m_doctype.name.to_string());
builder.append("' }");
break;
case HTMLToken::Type::StartTag:
builder.append("StartTag");
break;
case HTMLToken::Type::EndTag:
builder.append("EndTag");
break;
case HTMLToken::Type::Comment:
builder.append("Comment");
break;
case HTMLToken::Type::Character:
builder.append("Character");
break;
case HTMLToken::Type::EndOfFile:
builder.append("EndOfFile");
break;
}
if (m_current_token.type() == HTMLToken::Type::StartTag || m_current_token.type() == HTMLToken::Type::EndTag) {
builder.append(" { name: '");
builder.append(m_current_token.m_tag.tag_name.to_string());
builder.append("', { ");
for (auto& attribute : m_current_token.m_tag.attributes) {
builder.append(attribute.name_builder.to_string());
builder.append("=\"");
builder.append(attribute.value_builder.to_string());
builder.append("\" ");
}
builder.append("} }");
}
if (m_current_token.type() == HTMLToken::Type::Comment || m_current_token.type() == HTMLToken::Type::Character) {
builder.append(" { data: '");
builder.append(m_current_token.m_comment_or_character.data.to_string());
builder.append(" }");
}
dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
m_current_token = {};
}
void HTMLTokenizer::create_new_token(HTMLToken::Type type)
{
flush_current_character_or_comment_if_needed();
@ -822,8 +774,8 @@ void HTMLTokenizer::will_reconsume_in([[maybe_unused]] State new_state)
void HTMLTokenizer::flush_current_character_or_comment_if_needed()
{
if (m_current_token.type() == HTMLToken::Type::Character || m_current_token.type() == HTMLToken::Type::Comment)
emit_current_token();
//if (m_current_token.type() == HTMLToken::Type::Character || m_current_token.type() == HTMLToken::Type::Comment)
// emit_current_token();
}
}

View file

@ -118,14 +118,13 @@ class HTMLTokenizer {
public:
explicit HTMLTokenizer(const StringView& input);
void run();
Optional<HTMLToken> next_token();
private:
Optional<u32> next_codepoint();
Optional<u32> peek_codepoint(size_t offset) const;
bool next_few_characters_are(const StringView&) const;
void consume(const StringView&);
void emit_current_token();
void create_new_token(HTMLToken::Type);
enum class State {

View file

@ -24,13 +24,19 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/Parser/HTMLTokenizer.h>
#include <LibCore/File.h>
#include <AK/ByteBuffer.h>
#include <AK/LogStream.h>
#include <LibCore/EventLoop.h>
#include <LibCore/File.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/Dump.h>
#include <LibWeb/Parser/HTMLDocumentParser.h>
#include <LibWeb/Parser/HTMLTokenizer.h>
int main(int argc, char** argv)
{
Core::EventLoop loop;
// This is a temporary test program to aid with bringing up the new HTML parser. :^)
const char* input_path = "/home/anon/www/simple.html";
if (argc > 1)
@ -40,7 +46,12 @@ int main(int argc, char** argv)
if (file_or_error.is_error())
return 1;
auto contents = file_or_error.value()->read_all();
Web::HTMLTokenizer tokenizer(contents);
tokenizer.run();
Web::HTMLDocumentParser parser(contents);
parser.run();
auto& document = parser.document();
Web::dump_tree(document);
return 0;
}