mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibWeb: Implement more table support in the new HTML parser
This is enough to parse the Google front page! (Note: I did have to hack the tokenizer while parsing Google, in order to avoid named character references screwing everything up. We'll fix that too soon enough!)
This commit is contained in:
parent
7f18c51f4c
commit
ebb1649a52
Notes:
sideshowbarker
2024-07-19 06:03:28 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/ebb1649a520
5 changed files with 142 additions and 1 deletions
|
@ -118,6 +118,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
|
|||
case InsertionMode::InTable:
|
||||
handle_in_table(token);
|
||||
break;
|
||||
case InsertionMode::InTableBody:
|
||||
handle_in_table_body(token);
|
||||
break;
|
||||
case InsertionMode::InRow:
|
||||
handle_in_row(token);
|
||||
break;
|
||||
case InsertionMode::InCell:
|
||||
handle_in_cell(token);
|
||||
break;
|
||||
default:
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
@ -507,6 +516,9 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
|
|||
if (m_list_of_active_formatting_elements.is_empty())
|
||||
return;
|
||||
|
||||
if (m_list_of_active_formatting_elements.entries().last().is_marker())
|
||||
return;
|
||||
|
||||
if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
|
||||
return;
|
||||
|
||||
|
@ -853,6 +865,108 @@ void HTMLDocumentParser::handle_text(HTMLToken& token)
|
|||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::clear_the_stack_back_to_a_table_context()
|
||||
{
|
||||
while (!current_node().tag_name().is_one_of("table", "template", "html"))
|
||||
m_stack_of_open_elements.pop();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::clear_the_stack_back_to_a_table_row_context()
|
||||
{
|
||||
while (!current_node().tag_name().is_one_of("tr", "template", "html"))
|
||||
m_stack_of_open_elements.pop();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::clear_the_stack_back_to_a_table_body_context()
|
||||
{
|
||||
while (!current_node().tag_name().is_one_of("tbody", "tfoot", "thead", "template", "html"))
|
||||
m_stack_of_open_elements.pop();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_in_row(HTMLToken& token)
|
||||
{
|
||||
if (token.is_start_tag() && token.tag_name().is_one_of("th", "td")) {
|
||||
clear_the_stack_back_to_a_table_row_context();
|
||||
insert_html_element(token);
|
||||
m_insertion_mode = InsertionMode::InCell;
|
||||
m_list_of_active_formatting_elements.add_marker();
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_end_tag() && token.tag_name() == "tr") {
|
||||
if (!m_stack_of_open_elements.has_in_table_scope("tr")) {
|
||||
PARSE_ERROR();
|
||||
return;
|
||||
}
|
||||
clear_the_stack_back_to_a_table_row_context();
|
||||
m_stack_of_open_elements.pop();
|
||||
m_insertion_mode = InsertionMode::InTableBody;
|
||||
return;
|
||||
}
|
||||
|
||||
TODO();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_in_cell(HTMLToken& token)
|
||||
{
|
||||
if (token.is_end_tag() && token.tag_name().is_one_of("td", "th")) {
|
||||
if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
|
||||
PARSE_ERROR();
|
||||
return;
|
||||
}
|
||||
generate_implied_end_tags();
|
||||
|
||||
if (current_node().tag_name() != token.tag_name()) {
|
||||
PARSE_ERROR();
|
||||
}
|
||||
|
||||
while (current_node().tag_name() != token.tag_name())
|
||||
m_stack_of_open_elements.pop();
|
||||
m_stack_of_open_elements.pop();
|
||||
|
||||
m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
|
||||
|
||||
m_insertion_mode = InsertionMode::InRow;
|
||||
return;
|
||||
}
|
||||
if (token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
|
||||
TODO();
|
||||
}
|
||||
|
||||
if (token.is_end_tag() && token.tag_name().is_one_of("body", "caption", "col", "colgroup", "html")) {
|
||||
PARSE_ERROR();
|
||||
return;
|
||||
}
|
||||
|
||||
if (token.is_end_tag() && token.tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
|
||||
TODO();
|
||||
}
|
||||
|
||||
process_using_the_rules_for(InsertionMode::InBody, token);
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_in_table_body(HTMLToken& token)
|
||||
{
|
||||
if (token.is_start_tag() && token.tag_name() == "tr") {
|
||||
clear_the_stack_back_to_a_table_body_context();
|
||||
insert_html_element(token);
|
||||
m_insertion_mode = InsertionMode::InRow;
|
||||
return;
|
||||
}
|
||||
|
||||
if ((token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "tfoot", "thead"))
|
||||
|| (token.is_end_tag() && token.tag_name() == "table")) {
|
||||
// FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token.
|
||||
|
||||
clear_the_stack_back_to_a_table_body_context();
|
||||
m_stack_of_open_elements.pop();
|
||||
m_insertion_mode = InsertionMode::InTable;
|
||||
process_using_the_rules_for(InsertionMode::InTable, token);
|
||||
return;
|
||||
}
|
||||
TODO();
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_in_table(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character() && current_node().tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
|
||||
|
@ -879,7 +993,14 @@ void HTMLDocumentParser::handle_in_table(HTMLToken& token)
|
|||
TODO();
|
||||
}
|
||||
if (token.is_start_tag() && token.tag_name().is_one_of("td", "th", "tr")) {
|
||||
TODO();
|
||||
clear_the_stack_back_to_a_table_context();
|
||||
HTMLToken fake_tbody_token;
|
||||
fake_tbody_token.m_type = HTMLToken::Type::StartTag;
|
||||
fake_tbody_token.m_tag.tag_name.append("tbody");
|
||||
insert_html_element(fake_tbody_token);
|
||||
m_insertion_mode = InsertionMode::InTableBody;
|
||||
process_using_the_rules_for(InsertionMode::InTableBody, token);
|
||||
return;
|
||||
}
|
||||
if (token.is_start_tag() && token.tag_name() == "table") {
|
||||
PARSE_ERROR();
|
||||
|
|
|
@ -90,6 +90,9 @@ private:
|
|||
void handle_after_after_body(HTMLToken&);
|
||||
void handle_text(HTMLToken&);
|
||||
void handle_in_table(HTMLToken&);
|
||||
void handle_in_table_body(HTMLToken&);
|
||||
void handle_in_row(HTMLToken&);
|
||||
void handle_in_cell(HTMLToken&);
|
||||
|
||||
void generate_implied_end_tags(const FlyString& exception = {});
|
||||
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
|
||||
|
@ -108,6 +111,9 @@ private:
|
|||
size_t script_nesting_level() const { return m_script_nesting_level; }
|
||||
void reset_the_insertion_mode_appropriately();
|
||||
void run_the_adoption_agency_algorithm(HTMLToken&);
|
||||
void clear_the_stack_back_to_a_table_context();
|
||||
void clear_the_stack_back_to_a_table_body_context();
|
||||
void clear_the_stack_back_to_a_table_row_context();
|
||||
|
||||
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
||||
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
||||
|
|
|
@ -71,4 +71,13 @@ void ListOfActiveFormattingElements::remove(Element& element)
|
|||
});
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
|
||||
{
|
||||
while (!m_entries.is_empty()) {
|
||||
auto entry = m_entries.take_last();
|
||||
if (entry.is_marker())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -56,6 +56,8 @@ public:
|
|||
|
||||
Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
|
||||
|
||||
void clear_up_to_the_last_marker();
|
||||
|
||||
private:
|
||||
Vector<Entry> m_entries;
|
||||
};
|
||||
|
|
|
@ -37,6 +37,9 @@ public:
|
|||
StackOfOpenElements() { }
|
||||
~StackOfOpenElements();
|
||||
|
||||
Element& first() { return m_elements.first(); }
|
||||
Element& last() { return m_elements.last(); }
|
||||
|
||||
bool is_empty() const { return m_elements.is_empty(); }
|
||||
void push(NonnullRefPtr<Element> element) { m_elements.append(move(element)); }
|
||||
NonnullRefPtr<Element> pop() { return m_elements.take_last(); }
|
||||
|
|
Loading…
Reference in a new issue