123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769 |
- /*
- * Copyright (c) 2020-2021, the SerenityOS developers.
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/SourceLocation.h>
- #include <LibWeb/CSS/Parser/AtStyleRule.h>
- #include <LibWeb/CSS/Parser/DeclarationOrAtRule.h>
- #include <LibWeb/CSS/Parser/Parser.h>
- #include <LibWeb/CSS/Parser/QualifiedStyleRule.h>
- #include <LibWeb/CSS/Parser/StyleBlockRule.h>
- #include <LibWeb/CSS/Parser/StyleComponentValueRule.h>
- #include <LibWeb/CSS/Parser/StyleFunctionRule.h>
- #include <LibWeb/CSS/Selector.h>
- #include <LibWeb/Dump.h>
- #define CSS_PARSER_TRACE 1
- static void log_parse_error(const SourceLocation& location = SourceLocation::current())
- {
- dbgln_if(CSS_PARSER_TRACE, "Parse error (CSS) {}", location);
- }
- namespace Web::CSS {
- Parser::Parser(const StringView& input, const String& encoding)
- : m_tokenizer(input, encoding)
- {
- m_tokens = m_tokenizer.parse();
- }
- Parser::~Parser()
- {
- }
- Token Parser::peek_token()
- {
- size_t next_offset = m_iterator_offset + 1;
- if (next_offset < m_tokens.size()) {
- return m_tokens.at(next_offset);
- }
- return m_tokens.at(m_iterator_offset);
- }
- Token Parser::next_token()
- {
- if (m_iterator_offset < (int)m_tokens.size()) {
- ++m_iterator_offset;
- }
- auto token = m_tokens.at(m_iterator_offset);
- return token;
- }
- Token Parser::current_token()
- {
- return m_tokens.at(m_iterator_offset);
- }
- Vector<QualifiedStyleRule> Parser::parse_as_stylesheet()
- {
- auto rules = consume_a_list_of_rules(true);
- dbgln("Printing rules:");
- for (auto& rule : rules) {
- dbgln("PRE:");
- for (auto& pre : rule.m_prelude) {
- dbgln("{}", pre);
- }
- dbgln("BLOCK:");
- dbgln("{}", rule.m_block.to_string());
- dbgln("");
- auto selectors = parse_selectors(rule.m_prelude);
- CSS::Selector selector = Selector(move(selectors));
- dump_selector(selector);
- }
- return rules;
- }
- Vector<CSS::Selector::ComplexSelector> Parser::parse_selectors(Vector<String> parts)
- {
- // TODO:
- // This is a mess because the prelude is parsed as a string.
- // It should really be parsed as its class, but the cpp gods have forsaken me
- // and i cant make it work due to cyclic includes.
- Vector<CSS::Selector::ComplexSelector> selectors;
- size_t index = 0;
- auto parse_simple_selector = [&]() -> Optional<CSS::Selector::SimpleSelector> {
- if (index >= parts.size()) {
- return {};
- }
- auto currentToken = parts.at(index);
- CSS::Selector::SimpleSelector::Type type;
- if (currentToken == "*") {
- type = CSS::Selector::SimpleSelector::Type::Universal;
- index++;
- return CSS::Selector::SimpleSelector {
- type,
- CSS::Selector::SimpleSelector::PseudoClass::None,
- CSS::Selector::SimpleSelector::PseudoElement::None,
- String(),
- CSS::Selector::SimpleSelector::AttributeMatchType::None,
- String(),
- String()
- };
- }
- if (currentToken == ".") {
- type = CSS::Selector::SimpleSelector::Type::Class;
- } else if (currentToken == "#") {
- type = CSS::Selector::SimpleSelector::Type::Id;
- } else if (currentToken == "*") {
- type = CSS::Selector::SimpleSelector::Type::Universal;
- } else {
- type = CSS::Selector::SimpleSelector::Type::TagName;
- }
- index++;
- auto value = currentToken;
- if (type == CSS::Selector::SimpleSelector::Type::TagName) {
- value = value.to_lowercase();
- }
- CSS::Selector::SimpleSelector simple_selector {
- type,
- CSS::Selector::SimpleSelector::PseudoClass::None,
- CSS::Selector::SimpleSelector::PseudoElement::None,
- value,
- CSS::Selector::SimpleSelector::AttributeMatchType::None,
- String(),
- String()
- };
- if (index >= parts.size()) {
- return simple_selector;
- }
- currentToken = parts.at(index);
- if (currentToken.starts_with('[')) {
- auto adjusted = currentToken.substring(1, currentToken.length() - 2);
- // TODO: split on String :^)
- Vector<String> attribute_parts = adjusted.split(',');
- simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::HasAttribute;
- simple_selector.attribute_name = attribute_parts.first();
- size_t attribute_index = 1;
- if (attribute_index >= attribute_parts.size()) {
- return simple_selector;
- }
- if (attribute_parts.at(attribute_index) == " =") {
- simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
- attribute_index++;
- }
- if (attribute_parts.at(attribute_index) == " ~") {
- simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::Contains;
- attribute_index += 2;
- }
- if (attribute_parts.at(attribute_index) == " |") {
- simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::StartsWith;
- attribute_index += 2;
- }
- simple_selector.attribute_value = attribute_parts.at(attribute_index);
- return simple_selector;
- }
- if (currentToken == ":") {
- bool is_pseudo = false;
- index++;
- if (index >= parts.size()) {
- return {};
- }
- currentToken = parts.at(index);
- if (currentToken == ":") {
- is_pseudo = true;
- index++;
- }
- if (index >= parts.size()) {
- return {};
- }
- currentToken = parts.at(index);
- auto pseudo_name = currentToken;
- index++;
- // Ignore for now, otherwise we produce a "false positive" selector
- // and apply styles to the element itself, not its pseudo element
- if (is_pseudo) {
- return {};
- }
- if (pseudo_name.equals_ignoring_case("link")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Link;
- } else if (pseudo_name.equals_ignoring_case("visited")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Visited;
- } else if (pseudo_name.equals_ignoring_case("hover")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Hover;
- } else if (pseudo_name.equals_ignoring_case("focus")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Focus;
- } else if (pseudo_name.equals_ignoring_case("first-child")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::FirstChild;
- } else if (pseudo_name.equals_ignoring_case("last-child")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::LastChild;
- } else if (pseudo_name.equals_ignoring_case("only-child")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::OnlyChild;
- } else if (pseudo_name.equals_ignoring_case("empty")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Empty;
- } else if (pseudo_name.equals_ignoring_case("root")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Root;
- } else if (pseudo_name.equals_ignoring_case("first-of-type")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::FirstOfType;
- } else if (pseudo_name.equals_ignoring_case("last-of-type")) {
- simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::LastOfType;
- } else if (pseudo_name.equals_ignoring_case("before")) {
- simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::Before;
- } else if (pseudo_name.equals_ignoring_case("after")) {
- simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::After;
- } else {
- dbgln("Unknown pseudo class: '{}'", pseudo_name);
- return simple_selector;
- }
- }
- return simple_selector;
- };
- auto parse_complex_selector = [&]() -> Optional<CSS::Selector::ComplexSelector> {
- auto relation = CSS::Selector::ComplexSelector::Relation::Descendant;
- auto currentToken = parts.at(index);
- if (is_combinator(currentToken)) {
- if (currentToken == ">") {
- relation = CSS::Selector::ComplexSelector::Relation::ImmediateChild;
- }
- if (currentToken == "+") {
- relation = CSS::Selector::ComplexSelector::Relation::AdjacentSibling;
- }
- if (currentToken == "~") {
- relation = CSS::Selector::ComplexSelector::Relation::GeneralSibling;
- }
- if (currentToken == "||") {
- relation = CSS::Selector::ComplexSelector::Relation::Column;
- }
- index++;
- }
- Vector<CSS::Selector::SimpleSelector> simple_selectors;
- for (;;) {
- auto component = parse_simple_selector();
- if (!component.has_value()) {
- break;
- }
- simple_selectors.append(component.value());
- }
- if (simple_selectors.is_empty())
- return {};
- return CSS::Selector::ComplexSelector { relation, move(simple_selectors) };
- };
- for (;;) {
- auto complex = parse_complex_selector();
- if (complex.has_value()) {
- selectors.append(complex.value());
- }
- if (index >= parts.size()) {
- break;
- }
- auto currentToken = parts.at(index);
- if (currentToken != ",") {
- break;
- }
- index++;
- }
- if (selectors.is_empty()) {
- return {};
- }
- selectors.first().relation = CSS::Selector::ComplexSelector::Relation::None;
- return selectors;
- }
- void Parser::dump_all_tokens()
- {
- dbgln("Dumping all tokens:");
- for (auto& token : m_tokens)
- dbgln("{}", token.to_string());
- }
- void Parser::reconsume_current_input_token()
- {
- --m_iterator_offset;
- }
- bool Parser::is_combinator(String input)
- {
- return input == ">" || input == "+" || input == "~" || input == "||";
- }
- Vector<QualifiedStyleRule> Parser::consume_a_list_of_rules(bool top_level)
- {
- Vector<QualifiedStyleRule> rules;
- for (;;) {
- auto token = next_token();
- if (token.is_whitespace()) {
- continue;
- }
- if (token.is_eof()) {
- break;
- }
- if (token.is_cdo() || token.is_cdc()) {
- if (top_level) {
- continue;
- }
- reconsume_current_input_token();
- auto maybe_qualified = consume_a_qualified_rule();
- if (maybe_qualified.has_value()) {
- rules.append(maybe_qualified.value());
- }
- continue;
- }
- if (token.is_at()) {
- reconsume_current_input_token();
- rules.append(consume_an_at_rule());
- continue;
- }
- reconsume_current_input_token();
- auto maybe_qualified = consume_a_qualified_rule();
- if (maybe_qualified.has_value()) {
- rules.append(maybe_qualified.value());
- }
- }
- return rules;
- }
- AtStyleRule Parser::consume_an_at_rule()
- {
- auto initial = next_token();
- AtStyleRule rule;
- rule.m_name = initial.m_value.to_string();
- for (;;) {
- auto token = next_token();
- if (token.is_semicolon()) {
- return rule;
- }
- if (token.is_eof()) {
- log_parse_error();
- return rule;
- }
- if (token.is_open_curly()) {
- rule.m_block = consume_a_simple_block();
- return rule;
- }
- // how is "simple block with an associated token of <{-token>" a valid token?
- reconsume_current_input_token();
- auto value = consume_a_component_value();
- if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
- if (value.m_token.is_whitespace()) {
- continue;
- }
- }
- rule.m_prelude.append(value.to_string());
- }
- }
- Optional<QualifiedStyleRule> Parser::consume_a_qualified_rule()
- {
- QualifiedStyleRule rule;
- for (;;) {
- auto token = next_token();
- if (token.is_eof()) {
- log_parse_error();
- return {};
- }
- if (token.is_open_curly()) {
- rule.m_block = consume_a_simple_block();
- return rule;
- }
- // how is "simple block with an associated token of <{-token>" a valid token?
- reconsume_current_input_token();
- auto value = consume_a_component_value();
- if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
- if (value.m_token.is_whitespace()) {
- continue;
- }
- }
- rule.m_prelude.append(value.to_string());
- }
- return rule;
- }
- StyleComponentValueRule Parser::consume_a_component_value()
- {
- auto token = next_token();
- if (token.is_open_curly() || token.is_open_square() || token.is_open_paren()) {
- auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Block);
- component.m_block = consume_a_simple_block();
- return component;
- }
- if (token.is_function()) {
- auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Function);
- component.m_function = consume_a_function();
- return component;
- }
- auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
- component.m_token = token;
- return component;
- }
- StyleBlockRule Parser::consume_a_simple_block()
- {
- auto ending_token = current_token().mirror_variant();
- StyleBlockRule block;
- block.m_token = current_token();
- for (;;) {
- auto token = next_token();
- if (token.m_type == ending_token) {
- return block;
- }
- if (token.is_eof()) {
- log_parse_error();
- return block;
- }
- reconsume_current_input_token();
- auto value = consume_a_component_value();
- if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
- if (value.m_token.is_whitespace()) {
- continue;
- }
- }
- block.m_values.append(value.to_string());
- }
- }
- StyleFunctionRule Parser::consume_a_function()
- {
- StyleFunctionRule function;
- function.m_name = current_token().m_value.to_string();
- for (;;) {
- auto token = next_token();
- if (token.is_close_paren()) {
- return function;
- }
- if (token.is_eof()) {
- log_parse_error();
- return function;
- }
- reconsume_current_input_token();
- auto value = consume_a_component_value();
- if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
- if (value.m_token.is_whitespace()) {
- continue;
- }
- }
- function.m_values.append(value.to_string());
- }
- return function;
- }
- Optional<StyleDeclarationRule> Parser::consume_a_declaration(Vector<StyleComponentValueRule>)
- {
- TODO();
- }
- Optional<StyleDeclarationRule> Parser::consume_a_declaration()
- {
- auto token = next_token();
- StyleDeclarationRule declaration;
- declaration.m_name = token.m_value.to_string();
- for (;;) {
- if (!peek_token().is_whitespace()) {
- break;
- }
- next_token();
- }
- auto colon = next_token();
- if (!colon.is_colon()) {
- log_parse_error();
- return {};
- }
- for (;;) {
- if (!peek_token().is_whitespace()) {
- break;
- }
- next_token();
- }
- for (;;) {
- if (peek_token().is_eof()) {
- break;
- }
- declaration.m_values.append(consume_a_component_value());
- }
- auto second_last = declaration.m_values.at(declaration.m_values.size() - 2);
- auto last = declaration.m_values.at(declaration.m_values.size() - 1);
- if (second_last.m_type == StyleComponentValueRule::ComponentType::Token && last.m_type == StyleComponentValueRule::ComponentType::Token) {
- auto last_token = last.m_token;
- auto second_last_token = second_last.m_token;
- if (second_last_token.is_delim() && second_last_token.m_value.to_string().equals_ignoring_case("!")) {
- if (last_token.is_ident() && last_token.m_value.to_string().equals_ignoring_case("important")) {
- declaration.m_values.remove(declaration.m_values.size() - 2);
- declaration.m_values.remove(declaration.m_values.size() - 1);
- declaration.m_important = true;
- }
- }
- }
- for (;;) {
- auto maybe_whitespace = declaration.m_values.at(declaration.m_values.size() - 1);
- if (!(maybe_whitespace.m_type == StyleComponentValueRule::ComponentType::Token && maybe_whitespace.m_token.is_whitespace())) {
- break;
- }
- declaration.m_values.remove(declaration.m_values.size() - 1);
- }
- return declaration;
- }
- Vector<DeclarationOrAtRule> Parser::consume_a_list_of_declarations()
- {
- Vector<DeclarationOrAtRule> list;
- for (;;) {
- auto token = next_token();
- if (token.is_whitespace() || token.is_semicolon()) {
- continue;
- }
- if (token.is_eof()) {
- return list;
- }
- if (token.is_at()) {
- reconsume_current_input_token();
- list.append(DeclarationOrAtRule(consume_an_at_rule()));
- continue;
- }
- if (token.is_ident()) {
- Vector<StyleComponentValueRule> temp;
- auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
- component.m_token = token;
- temp.append(component);
- for (;;) {
- auto peek = peek_token();
- if (peek.is_semicolon() || peek.is_eof()) {
- break;
- }
- temp.append(consume_a_component_value());
- }
- auto maybe_declaration = consume_a_declaration(temp);
- if (maybe_declaration.has_value()) {
- list.append(DeclarationOrAtRule(maybe_declaration.value()));
- }
- }
- log_parse_error();
- reconsume_current_input_token();
- auto peek = peek_token();
- if (!(peek.is_semicolon() || peek.is_eof())) {
- consume_a_component_value();
- }
- }
- return list;
- }
- Optional<QualifiedStyleRule> Parser::parse_as_rule()
- {
- Optional<QualifiedStyleRule> rule;
- for (;;) {
- auto maybe_whitespace = peek_token();
- if (!maybe_whitespace.is_whitespace()) {
- break;
- }
- next_token();
- }
- auto token = peek_token();
- if (token.is_eof()) {
- return {};
- }
- if (token.is_at()) {
- rule = consume_an_at_rule();
- } else {
- rule = consume_a_qualified_rule();
- }
- for (;;) {
- auto maybe_whitespace = peek_token();
- if (!token.is_whitespace()) {
- break;
- }
- next_token();
- }
- auto maybe_eof = peek_token();
- if (maybe_eof.is_eof()) {
- return rule;
- }
- return {};
- }
- Vector<QualifiedStyleRule> Parser::parse_as_list_of_rules()
- {
- return consume_a_list_of_rules(false);
- }
- Optional<StyleDeclarationRule> Parser::parse_as_declaration()
- {
- for (;;) {
- auto maybe_whitespace = peek_token();
- if (!maybe_whitespace.is_whitespace()) {
- break;
- }
- next_token();
- }
- auto token = peek_token();
- if (!token.is_ident()) {
- return {};
- }
- return consume_a_declaration();
- }
- Vector<DeclarationOrAtRule> Parser::parse_as_list_of_declarations()
- {
- return consume_a_list_of_declarations();
- }
- Optional<StyleComponentValueRule> Parser::parse_as_component_value()
- {
- for (;;) {
- auto maybe_whitespace = peek_token();
- if (!maybe_whitespace.is_whitespace()) {
- break;
- }
- next_token();
- }
- auto token = peek_token();
- if (token.is_eof()) {
- return {};
- }
- auto value = consume_a_component_value();
- for (;;) {
- auto maybe_whitespace = peek_token();
- if (!token.is_whitespace()) {
- break;
- }
- next_token();
- }
- auto maybe_eof = peek_token();
- if (maybe_eof.is_eof()) {
- return value;
- }
- return {};
- }
- Vector<StyleComponentValueRule> Parser::parse_as_list_of_component_values()
- {
- Vector<StyleComponentValueRule> rules;
- for (;;) {
- if (peek_token().is_eof()) {
- break;
- }
- rules.append(consume_a_component_value());
- }
- return rules;
- }
- Vector<StyleComponentValueRule> Parser::parse_as_list_of_comma_separated_component_values()
- {
- Vector<StyleComponentValueRule> rules;
- for (;;) {
- rules.append(consume_a_component_value());
- if (peek_token().is_comma())
- continue;
- if (peek_token().is_eof())
- break;
- }
- return rules;
- }
- }
|