LibWeb: Implement more table support in the new HTML parser

This is enough to parse the Google front page! (Note: I did have to
hack the tokenizer while parsing Google, in order to avoid named
character references screwing everything up. We'll fix that too soon
enough!)
This commit is contained in:
Andreas Kling 2020-05-28 00:27:46 +02:00
parent 7f18c51f4c
commit ebb1649a52
Notes: sideshowbarker 2024-07-19 06:03:28 +09:00
5 changed files with 142 additions and 1 deletions

View file

@ -118,6 +118,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
case InsertionMode::InTable:
handle_in_table(token);
break;
case InsertionMode::InTableBody:
handle_in_table_body(token);
break;
case InsertionMode::InRow:
handle_in_row(token);
break;
case InsertionMode::InCell:
handle_in_cell(token);
break;
default:
ASSERT_NOT_REACHED();
}
@ -507,6 +516,9 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
if (m_list_of_active_formatting_elements.is_empty())
return;
if (m_list_of_active_formatting_elements.entries().last().is_marker())
return;
if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
return;
@ -853,6 +865,108 @@ void HTMLDocumentParser::handle_text(HTMLToken& token)
ASSERT_NOT_REACHED();
}
void HTMLDocumentParser::clear_the_stack_back_to_a_table_context()
{
while (!current_node().tag_name().is_one_of("table", "template", "html"))
m_stack_of_open_elements.pop();
}
void HTMLDocumentParser::clear_the_stack_back_to_a_table_row_context()
{
while (!current_node().tag_name().is_one_of("tr", "template", "html"))
m_stack_of_open_elements.pop();
}
void HTMLDocumentParser::clear_the_stack_back_to_a_table_body_context()
{
while (!current_node().tag_name().is_one_of("tbody", "tfoot", "thead", "template", "html"))
m_stack_of_open_elements.pop();
}
void HTMLDocumentParser::handle_in_row(HTMLToken& token)
{
if (token.is_start_tag() && token.tag_name().is_one_of("th", "td")) {
clear_the_stack_back_to_a_table_row_context();
insert_html_element(token);
m_insertion_mode = InsertionMode::InCell;
m_list_of_active_formatting_elements.add_marker();
return;
}
if (token.is_end_tag() && token.tag_name() == "tr") {
if (!m_stack_of_open_elements.has_in_table_scope("tr")) {
PARSE_ERROR();
return;
}
clear_the_stack_back_to_a_table_row_context();
m_stack_of_open_elements.pop();
m_insertion_mode = InsertionMode::InTableBody;
return;
}
TODO();
}
void HTMLDocumentParser::handle_in_cell(HTMLToken& token)
{
if (token.is_end_tag() && token.tag_name().is_one_of("td", "th")) {
if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
PARSE_ERROR();
return;
}
generate_implied_end_tags();
if (current_node().tag_name() != token.tag_name()) {
PARSE_ERROR();
}
while (current_node().tag_name() != token.tag_name())
m_stack_of_open_elements.pop();
m_stack_of_open_elements.pop();
m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
m_insertion_mode = InsertionMode::InRow;
return;
}
if (token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
TODO();
}
if (token.is_end_tag() && token.tag_name().is_one_of("body", "caption", "col", "colgroup", "html")) {
PARSE_ERROR();
return;
}
if (token.is_end_tag() && token.tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
TODO();
}
process_using_the_rules_for(InsertionMode::InBody, token);
}
void HTMLDocumentParser::handle_in_table_body(HTMLToken& token)
{
if (token.is_start_tag() && token.tag_name() == "tr") {
clear_the_stack_back_to_a_table_body_context();
insert_html_element(token);
m_insertion_mode = InsertionMode::InRow;
return;
}
if ((token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "tfoot", "thead"))
|| (token.is_end_tag() && token.tag_name() == "table")) {
// FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token.
clear_the_stack_back_to_a_table_body_context();
m_stack_of_open_elements.pop();
m_insertion_mode = InsertionMode::InTable;
process_using_the_rules_for(InsertionMode::InTable, token);
return;
}
TODO();
}
void HTMLDocumentParser::handle_in_table(HTMLToken& token)
{
if (token.is_character() && current_node().tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
@ -879,7 +993,14 @@ void HTMLDocumentParser::handle_in_table(HTMLToken& token)
TODO();
}
if (token.is_start_tag() && token.tag_name().is_one_of("td", "th", "tr")) {
TODO();
clear_the_stack_back_to_a_table_context();
HTMLToken fake_tbody_token;
fake_tbody_token.m_type = HTMLToken::Type::StartTag;
fake_tbody_token.m_tag.tag_name.append("tbody");
insert_html_element(fake_tbody_token);
m_insertion_mode = InsertionMode::InTableBody;
process_using_the_rules_for(InsertionMode::InTableBody, token);
return;
}
if (token.is_start_tag() && token.tag_name() == "table") {
PARSE_ERROR();

View file

@ -90,6 +90,9 @@ private:
void handle_after_after_body(HTMLToken&);
void handle_text(HTMLToken&);
void handle_in_table(HTMLToken&);
void handle_in_table_body(HTMLToken&);
void handle_in_row(HTMLToken&);
void handle_in_cell(HTMLToken&);
void generate_implied_end_tags(const FlyString& exception = {});
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
@ -108,6 +111,9 @@ private:
size_t script_nesting_level() const { return m_script_nesting_level; }
void reset_the_insertion_mode_appropriately();
void run_the_adoption_agency_algorithm(HTMLToken&);
void clear_the_stack_back_to_a_table_context();
void clear_the_stack_back_to_a_table_body_context();
void clear_the_stack_back_to_a_table_row_context();
InsertionMode m_insertion_mode { InsertionMode::Initial };
InsertionMode m_original_insertion_mode { InsertionMode::Initial };

View file

@ -71,4 +71,13 @@ void ListOfActiveFormattingElements::remove(Element& element)
});
}
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
{
while (!m_entries.is_empty()) {
auto entry = m_entries.take_last();
if (entry.is_marker())
break;
}
}
}

View file

@ -56,6 +56,8 @@ public:
Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
void clear_up_to_the_last_marker();
private:
Vector<Entry> m_entries;
};

View file

@ -37,6 +37,9 @@ public:
StackOfOpenElements() { }
~StackOfOpenElements();
Element& first() { return m_elements.first(); }
Element& last() { return m_elements.last(); }
bool is_empty() const { return m_elements.is_empty(); }
void push(NonnullRefPtr<Element> element) { m_elements.append(move(element)); }
NonnullRefPtr<Element> pop() { return m_elements.take_last(); }