SelectorParsing.cpp 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2024, Sam Atkins <sam@ladybird.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::CSS::Parser {
  14. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  15. {
  16. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  17. if (!selector_list.is_error())
  18. return selector_list.release_value();
  19. return {};
  20. }
  21. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  22. {
  23. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  24. if (!selector_list.is_error())
  25. return selector_list.release_value();
  26. return {};
  27. }
  28. Optional<Selector::PseudoElement> Parser::parse_as_pseudo_element_selector()
  29. {
  30. // FIXME: This is quite janky. Selector parsing is not at all designed to allow parsing just a single part of a selector.
  31. // So, this code parses a whole selector, then rejects it if it's not a single pseudo-element simple selector.
  32. // Come back and fix this, future Sam!
  33. auto maybe_selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, SelectorParsingMode::Standard);
  34. if (maybe_selector_list.is_error())
  35. return {};
  36. auto& selector_list = maybe_selector_list.value();
  37. if (selector_list.size() != 1)
  38. return {};
  39. auto& selector = selector_list.first();
  40. if (selector->compound_selectors().size() != 1)
  41. return {};
  42. auto& first_compound_selector = selector->compound_selectors().first();
  43. if (first_compound_selector.simple_selectors.size() != 1)
  44. return {};
  45. auto& simple_selector = first_compound_selector.simple_selectors.first();
  46. if (simple_selector.type != Selector::SimpleSelector::Type::PseudoElement)
  47. return {};
  48. return simple_selector.pseudo_element();
  49. }
  50. static NonnullRefPtr<Selector> create_invalid_selector(Vector<ComponentValue> component_values)
  51. {
  52. // Trim leading and trailing whitespace
  53. while (!component_values.is_empty() && component_values.first().is(Token::Type::Whitespace)) {
  54. component_values.take_first();
  55. }
  56. while (!component_values.is_empty() && component_values.last().is(Token::Type::Whitespace)) {
  57. component_values.take_last();
  58. }
  59. Selector::SimpleSelector simple {
  60. .type = Selector::SimpleSelector::Type::Invalid,
  61. .value = Selector::SimpleSelector::Invalid {
  62. .component_values = move(component_values),
  63. }
  64. };
  65. Selector::CompoundSelector compound {
  66. .combinator = Selector::Combinator::None,
  67. .simple_selectors = { move(simple) }
  68. };
  69. return Selector::create({ move(compound) });
  70. }
  71. template<typename T>
  72. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  73. {
  74. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  75. SelectorList selectors;
  76. for (auto& selector_parts : comma_separated_lists) {
  77. auto stream = TokenStream(selector_parts);
  78. auto selector = parse_complex_selector(stream, mode);
  79. if (selector.is_error()) {
  80. if (parsing_mode == SelectorParsingMode::Forgiving) {
  81. // Keep the invalid selector around for serialization and nesting
  82. selectors.append(create_invalid_selector(move(selector_parts)));
  83. continue;
  84. }
  85. return selector.error();
  86. }
  87. selectors.append(selector.release_value());
  88. }
  89. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  90. return ParseError::SyntaxError;
  91. return selectors;
  92. }
  93. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  94. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  95. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  96. {
  97. Vector<Selector::CompoundSelector> compound_selectors;
  98. auto first_selector = TRY(parse_compound_selector(tokens));
  99. if (!first_selector.has_value())
  100. return ParseError::SyntaxError;
  101. if (mode == SelectorType::Standalone) {
  102. if (first_selector->combinator != Selector::Combinator::Descendant)
  103. return ParseError::SyntaxError;
  104. first_selector->combinator = Selector::Combinator::None;
  105. }
  106. compound_selectors.append(first_selector.release_value());
  107. while (tokens.has_next_token()) {
  108. auto compound_selector = TRY(parse_compound_selector(tokens));
  109. if (!compound_selector.has_value())
  110. break;
  111. compound_selectors.append(compound_selector.release_value());
  112. }
  113. if (compound_selectors.is_empty())
  114. return ParseError::SyntaxError;
  115. return Selector::create(move(compound_selectors));
  116. }
  117. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  118. {
  119. tokens.discard_whitespace();
  120. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  121. tokens.discard_whitespace();
  122. Vector<Selector::SimpleSelector> simple_selectors;
  123. while (tokens.has_next_token()) {
  124. auto component = TRY(parse_simple_selector(tokens));
  125. if (!component.has_value())
  126. break;
  127. if (component->type == Selector::SimpleSelector::Type::TagName && !simple_selectors.is_empty()) {
  128. // Tag-name selectors can only go at the beginning of a compound selector.
  129. return ParseError::SyntaxError;
  130. }
  131. simple_selectors.append(component.release_value());
  132. }
  133. if (simple_selectors.is_empty())
  134. return Optional<Selector::CompoundSelector> {};
  135. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  136. }
  137. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  138. {
  139. auto const& current_value = tokens.consume_a_token();
  140. if (current_value.is(Token::Type::Delim)) {
  141. switch (current_value.token().delim()) {
  142. case '>':
  143. return Selector::Combinator::ImmediateChild;
  144. case '+':
  145. return Selector::Combinator::NextSibling;
  146. case '~':
  147. return Selector::Combinator::SubsequentSibling;
  148. case '|': {
  149. auto const& next = tokens.next_token();
  150. if (next.is(Token::Type::EndOfFile))
  151. return {};
  152. if (next.is_delim('|')) {
  153. tokens.discard_a_token();
  154. return Selector::Combinator::Column;
  155. }
  156. }
  157. }
  158. }
  159. tokens.reconsume_current_input_token();
  160. return {};
  161. }
  162. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  163. {
  164. auto is_name = [](ComponentValue const& token) {
  165. return token.is_delim('*') || token.is(Token::Type::Ident);
  166. };
  167. auto get_name = [](ComponentValue const& token) {
  168. if (token.is_delim('*'))
  169. return "*"_fly_string;
  170. return token.token().ident();
  171. };
  172. // There are 3 possibilities here:
  173. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  174. // 1) `|<name>`
  175. // 2) `<namespace>|<name>`
  176. // 3) `<name>`
  177. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  178. auto transaction = tokens.begin_transaction();
  179. auto const& first_token = tokens.consume_a_token();
  180. if (first_token.is_delim('|')) {
  181. // Case 1: `|<name>`
  182. if (is_name(tokens.next_token())) {
  183. auto const& name_token = tokens.consume_a_token();
  184. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  185. return {};
  186. transaction.commit();
  187. return Selector::SimpleSelector::QualifiedName {
  188. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  189. .name = get_name(name_token),
  190. };
  191. }
  192. return {};
  193. }
  194. if (!is_name(first_token))
  195. return {};
  196. if (tokens.next_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  197. // Case 2: `<namespace>|<name>`
  198. tokens.discard_a_token(); // `|`
  199. auto namespace_ = get_name(first_token);
  200. auto name = get_name(tokens.consume_a_token());
  201. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  202. return {};
  203. auto namespace_type = namespace_ == "*"sv
  204. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  205. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  206. transaction.commit();
  207. return Selector::SimpleSelector::QualifiedName {
  208. .namespace_type = namespace_type,
  209. .namespace_ = namespace_,
  210. .name = name,
  211. };
  212. }
  213. // Case 3: `<name>`
  214. auto& name_token = first_token;
  215. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  216. return {};
  217. transaction.commit();
  218. return Selector::SimpleSelector::QualifiedName {
  219. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  220. .name = get_name(name_token),
  221. };
  222. }
  223. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  224. {
  225. auto attribute_tokens = TokenStream { first_value.block().value };
  226. attribute_tokens.discard_whitespace();
  227. if (!attribute_tokens.has_next_token()) {
  228. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  229. return ParseError::SyntaxError;
  230. }
  231. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  232. if (!maybe_qualified_name.has_value()) {
  233. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.next_token().to_debug_string());
  234. return ParseError::SyntaxError;
  235. }
  236. auto qualified_name = maybe_qualified_name.release_value();
  237. Selector::SimpleSelector simple_selector {
  238. .type = Selector::SimpleSelector::Type::Attribute,
  239. .value = Selector::SimpleSelector::Attribute {
  240. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  241. .qualified_name = qualified_name,
  242. .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  243. }
  244. };
  245. attribute_tokens.discard_whitespace();
  246. if (!attribute_tokens.has_next_token())
  247. return simple_selector;
  248. auto const& delim_part = attribute_tokens.consume_a_token();
  249. if (!delim_part.is(Token::Type::Delim)) {
  250. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  251. return ParseError::SyntaxError;
  252. }
  253. if (delim_part.token().delim() == '=') {
  254. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  255. } else {
  256. if (!attribute_tokens.has_next_token()) {
  257. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  258. return ParseError::SyntaxError;
  259. }
  260. auto const& delim_second_part = attribute_tokens.consume_a_token();
  261. if (!delim_second_part.is_delim('=')) {
  262. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  263. return ParseError::SyntaxError;
  264. }
  265. switch (delim_part.token().delim()) {
  266. case '~':
  267. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  268. break;
  269. case '*':
  270. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  271. break;
  272. case '|':
  273. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  274. break;
  275. case '^':
  276. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  277. break;
  278. case '$':
  279. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  280. break;
  281. default:
  282. attribute_tokens.reconsume_current_input_token();
  283. }
  284. }
  285. attribute_tokens.discard_whitespace();
  286. if (!attribute_tokens.has_next_token()) {
  287. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  288. return ParseError::SyntaxError;
  289. }
  290. auto const& value_part = attribute_tokens.consume_a_token();
  291. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  292. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  293. return ParseError::SyntaxError;
  294. }
  295. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  296. simple_selector.attribute().value = value_string.to_string();
  297. attribute_tokens.discard_whitespace();
  298. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  299. if (attribute_tokens.has_next_token()) {
  300. auto const& case_sensitivity_part = attribute_tokens.consume_a_token();
  301. if (case_sensitivity_part.is(Token::Type::Ident)) {
  302. auto case_sensitivity = case_sensitivity_part.token().ident();
  303. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  304. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  305. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  306. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  307. } else {
  308. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  309. return ParseError::SyntaxError;
  310. }
  311. } else {
  312. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  313. return ParseError::SyntaxError;
  314. }
  315. }
  316. if (attribute_tokens.has_next_token()) {
  317. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  318. return ParseError::SyntaxError;
  319. }
  320. return simple_selector;
  321. }
  322. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  323. {
  324. auto peek_token_ends_selector = [&]() -> bool {
  325. auto const& value = tokens.next_token();
  326. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  327. };
  328. if (peek_token_ends_selector())
  329. return ParseError::SyntaxError;
  330. bool is_pseudo = false;
  331. if (tokens.next_token().is(Token::Type::Colon)) {
  332. is_pseudo = true;
  333. tokens.discard_a_token();
  334. if (peek_token_ends_selector())
  335. return ParseError::SyntaxError;
  336. }
  337. if (is_pseudo) {
  338. auto const& name_token = tokens.consume_a_token();
  339. if (!name_token.is(Token::Type::Ident)) {
  340. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  341. return ParseError::SyntaxError;
  342. }
  343. auto pseudo_name = name_token.token().ident();
  344. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  345. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  346. return Selector::SimpleSelector {
  347. .type = Selector::SimpleSelector::Type::PseudoElement,
  348. .value = pseudo_element.release_value()
  349. };
  350. }
  351. // https://www.w3.org/TR/selectors-4/#compat
  352. // All other pseudo-elements whose names begin with the string “-webkit-” (matched ASCII case-insensitively)
  353. // and that are not functional notations must be treated as valid at parse time. (That is, ::-webkit-asdf is
  354. // valid at parse time, but ::-webkit-jkl() is not.) If they’re not otherwise recognized and supported, they
  355. // must be treated as matching nothing, and are unknown -webkit- pseudo-elements.
  356. if (pseudo_name.starts_with_bytes("-webkit-"sv, CaseSensitivity::CaseInsensitive)) {
  357. return Selector::SimpleSelector {
  358. .type = Selector::SimpleSelector::Type::PseudoElement,
  359. // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
  360. .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, pseudo_name.to_string().to_ascii_lowercase() },
  361. };
  362. }
  363. if (has_ignored_vendor_prefix(pseudo_name))
  364. return ParseError::IncludesIgnoredVendorPrefix;
  365. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  366. return ParseError::SyntaxError;
  367. }
  368. if (peek_token_ends_selector())
  369. return ParseError::SyntaxError;
  370. auto const& pseudo_class_token = tokens.consume_a_token();
  371. if (pseudo_class_token.is(Token::Type::Ident)) {
  372. auto pseudo_name = pseudo_class_token.token().ident();
  373. if (has_ignored_vendor_prefix(pseudo_name))
  374. return ParseError::IncludesIgnoredVendorPrefix;
  375. auto make_pseudo_class_selector = [](auto pseudo_class) {
  376. return Selector::SimpleSelector {
  377. .type = Selector::SimpleSelector::Type::PseudoClass,
  378. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  379. };
  380. };
  381. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  382. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  383. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  384. return ParseError::SyntaxError;
  385. }
  386. return make_pseudo_class_selector(pseudo_class.value());
  387. }
  388. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  389. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  390. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  391. switch (pseudo_element.value().type()) {
  392. case Selector::PseudoElement::Type::After:
  393. case Selector::PseudoElement::Type::Before:
  394. case Selector::PseudoElement::Type::FirstLetter:
  395. case Selector::PseudoElement::Type::FirstLine:
  396. return Selector::SimpleSelector {
  397. .type = Selector::SimpleSelector::Type::PseudoElement,
  398. .value = pseudo_element.value()
  399. };
  400. default:
  401. break;
  402. }
  403. }
  404. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  405. return ParseError::SyntaxError;
  406. }
  407. if (pseudo_class_token.is_function()) {
  408. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  409. auto tokens = TokenStream<ComponentValue>(function_values);
  410. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  411. if (!nth_child_pattern.has_value()) {
  412. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  413. return ParseError::SyntaxError;
  414. }
  415. tokens.discard_whitespace();
  416. if (!tokens.has_next_token()) {
  417. return Selector::SimpleSelector {
  418. .type = Selector::SimpleSelector::Type::PseudoClass,
  419. .value = Selector::SimpleSelector::PseudoClassSelector {
  420. .type = pseudo_class,
  421. .nth_child_pattern = nth_child_pattern.release_value() }
  422. };
  423. }
  424. if (!allow_of)
  425. return ParseError::SyntaxError;
  426. // Parse the `of <selector-list>` syntax
  427. auto const& maybe_of = tokens.consume_a_token();
  428. if (!maybe_of.is_ident("of"sv))
  429. return ParseError::SyntaxError;
  430. tokens.discard_whitespace();
  431. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  432. tokens.discard_whitespace();
  433. if (tokens.has_next_token())
  434. return ParseError::SyntaxError;
  435. return Selector::SimpleSelector {
  436. .type = Selector::SimpleSelector::Type::PseudoClass,
  437. .value = Selector::SimpleSelector::PseudoClassSelector {
  438. .type = pseudo_class,
  439. .nth_child_pattern = nth_child_pattern.release_value(),
  440. .argument_selector_list = move(selector_list) }
  441. };
  442. };
  443. auto const& pseudo_function = pseudo_class_token.function();
  444. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name);
  445. if (!maybe_pseudo_class.has_value()) {
  446. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name);
  447. return ParseError::SyntaxError;
  448. }
  449. auto pseudo_class = maybe_pseudo_class.value();
  450. auto metadata = pseudo_class_metadata(pseudo_class);
  451. if (!metadata.is_valid_as_function) {
  452. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name);
  453. return ParseError::SyntaxError;
  454. }
  455. if (pseudo_function.value.is_empty()) {
  456. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name);
  457. return ParseError::SyntaxError;
  458. }
  459. switch (metadata.parameter_type) {
  460. case PseudoClassMetadata::ParameterType::ANPlusB:
  461. return parse_nth_child_selector(pseudo_class, pseudo_function.value, false);
  462. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  463. return parse_nth_child_selector(pseudo_class, pseudo_function.value, true);
  464. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  465. auto function_token_stream = TokenStream(pseudo_function.value);
  466. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  467. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  468. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name);
  469. return ParseError::SyntaxError;
  470. }
  471. auto compound_selector = compound_selector_or_error.release_value().release_value();
  472. compound_selector.combinator = Selector::Combinator::None;
  473. Vector compound_selectors { move(compound_selector) };
  474. auto selector = Selector::create(move(compound_selectors));
  475. return Selector::SimpleSelector {
  476. .type = Selector::SimpleSelector::Type::PseudoClass,
  477. .value = Selector::SimpleSelector::PseudoClassSelector {
  478. .type = pseudo_class,
  479. .argument_selector_list = { move(selector) } }
  480. };
  481. }
  482. case PseudoClassMetadata::ParameterType::ForgivingRelativeSelectorList:
  483. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  484. auto function_token_stream = TokenStream(pseudo_function.value);
  485. auto selector_type = metadata.parameter_type == PseudoClassMetadata::ParameterType::ForgivingSelectorList
  486. ? SelectorType::Standalone
  487. : SelectorType::Relative;
  488. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  489. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, selector_type, SelectorParsingMode::Forgiving));
  490. return Selector::SimpleSelector {
  491. .type = Selector::SimpleSelector::Type::PseudoClass,
  492. .value = Selector::SimpleSelector::PseudoClassSelector {
  493. .type = pseudo_class,
  494. .argument_selector_list = move(argument_selector_list) }
  495. };
  496. }
  497. case PseudoClassMetadata::ParameterType::Ident: {
  498. auto function_token_stream = TokenStream(pseudo_function.value);
  499. function_token_stream.discard_whitespace();
  500. auto const& maybe_keyword_token = function_token_stream.consume_a_token();
  501. function_token_stream.discard_whitespace();
  502. if (!maybe_keyword_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  503. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: not an ident", pseudo_function.name);
  504. return ParseError::SyntaxError;
  505. }
  506. auto maybe_keyword = keyword_from_string(maybe_keyword_token.token().ident());
  507. if (!maybe_keyword.has_value()) {
  508. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: unrecognized keyword", pseudo_function.name);
  509. return ParseError::SyntaxError;
  510. }
  511. return Selector::SimpleSelector {
  512. .type = Selector::SimpleSelector::Type::PseudoClass,
  513. .value = Selector::SimpleSelector::PseudoClassSelector {
  514. .type = pseudo_class,
  515. .keyword = maybe_keyword.value() }
  516. };
  517. }
  518. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  519. Vector<FlyString> languages;
  520. auto function_token_stream = TokenStream(pseudo_function.value);
  521. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  522. for (auto const& language_token_list : language_token_lists) {
  523. auto language_token_stream = TokenStream(language_token_list);
  524. language_token_stream.discard_whitespace();
  525. auto const& language_token = language_token_stream.consume_a_token();
  526. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  527. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name);
  528. return ParseError::SyntaxError;
  529. }
  530. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  531. languages.append(language_string);
  532. language_token_stream.discard_whitespace();
  533. if (language_token_stream.has_next_token()) {
  534. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name);
  535. return ParseError::SyntaxError;
  536. }
  537. }
  538. return Selector::SimpleSelector {
  539. .type = Selector::SimpleSelector::Type::PseudoClass,
  540. .value = Selector::SimpleSelector::PseudoClassSelector {
  541. .type = pseudo_class,
  542. .languages = move(languages) }
  543. };
  544. }
  545. case PseudoClassMetadata::ParameterType::SelectorList: {
  546. auto function_token_stream = TokenStream(pseudo_function.value);
  547. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  548. return Selector::SimpleSelector {
  549. .type = Selector::SimpleSelector::Type::PseudoClass,
  550. .value = Selector::SimpleSelector::PseudoClassSelector {
  551. .type = pseudo_class,
  552. .argument_selector_list = move(not_selector) }
  553. };
  554. }
  555. case PseudoClassMetadata::ParameterType::None:
  556. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  557. VERIFY_NOT_REACHED();
  558. }
  559. }
  560. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  561. return ParseError::SyntaxError;
  562. }
  563. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  564. {
  565. auto peek_token_ends_selector = [&]() -> bool {
  566. auto const& value = tokens.next_token();
  567. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  568. };
  569. if (peek_token_ends_selector())
  570. return Optional<Selector::SimpleSelector> {};
  571. // Handle universal and tag-name types together, since both can be namespaced
  572. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  573. if (qualified_name->name.name == "*"sv) {
  574. return Selector::SimpleSelector {
  575. .type = Selector::SimpleSelector::Type::Universal,
  576. .value = qualified_name.release_value(),
  577. };
  578. }
  579. return Selector::SimpleSelector {
  580. .type = Selector::SimpleSelector::Type::TagName,
  581. .value = qualified_name.release_value(),
  582. };
  583. }
  584. auto const& first_value = tokens.consume_a_token();
  585. if (first_value.is(Token::Type::Delim)) {
  586. u32 delim = first_value.token().delim();
  587. switch (delim) {
  588. case '*':
  589. // Handled already
  590. VERIFY_NOT_REACHED();
  591. case '&':
  592. return Selector::SimpleSelector {
  593. .type = Selector::SimpleSelector::Type::Nesting,
  594. };
  595. case '.': {
  596. if (peek_token_ends_selector())
  597. return ParseError::SyntaxError;
  598. auto const& class_name_value = tokens.consume_a_token();
  599. if (!class_name_value.is(Token::Type::Ident)) {
  600. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  601. return ParseError::SyntaxError;
  602. }
  603. return Selector::SimpleSelector {
  604. .type = Selector::SimpleSelector::Type::Class,
  605. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  606. };
  607. }
  608. case '>':
  609. case '+':
  610. case '~':
  611. case '|':
  612. // Whitespace is not required between the compound-selector and a combinator.
  613. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  614. tokens.reconsume_current_input_token();
  615. return Optional<Selector::SimpleSelector> {};
  616. default:
  617. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  618. return ParseError::SyntaxError;
  619. }
  620. }
  621. if (first_value.is(Token::Type::Hash)) {
  622. if (first_value.token().hash_type() != Token::HashType::Id) {
  623. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  624. return ParseError::SyntaxError;
  625. }
  626. return Selector::SimpleSelector {
  627. .type = Selector::SimpleSelector::Type::Id,
  628. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  629. };
  630. }
  631. if (first_value.is_block() && first_value.block().is_square())
  632. return TRY(parse_attribute_simple_selector(first_value));
  633. if (first_value.is(Token::Type::Colon))
  634. return TRY(parse_pseudo_simple_selector(tokens));
  635. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  636. return ParseError::SyntaxError;
  637. }
  638. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  639. {
  640. auto transaction = values.begin_transaction();
  641. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  642. if constexpr (CSS_PARSER_DEBUG) {
  643. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  644. values.dump_all_tokens();
  645. }
  646. return {};
  647. };
  648. auto is_sign = [](ComponentValue const& value) -> bool {
  649. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  650. };
  651. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  652. if (!value.is(Token::Type::Dimension))
  653. return false;
  654. if (!value.token().number().is_integer())
  655. return false;
  656. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  657. return false;
  658. return true;
  659. };
  660. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  661. if (!value.is(Token::Type::Dimension))
  662. return false;
  663. if (!value.token().number().is_integer())
  664. return false;
  665. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  666. return false;
  667. return true;
  668. };
  669. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  670. if (!value.is(Token::Type::Dimension))
  671. return false;
  672. if (!value.token().number().is_integer())
  673. return false;
  674. auto dimension_unit = value.token().dimension_unit();
  675. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  676. return false;
  677. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  678. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  679. return false;
  680. }
  681. return true;
  682. };
  683. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  684. if (!value.is(Token::Type::Ident))
  685. return false;
  686. auto ident = value.token().ident();
  687. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  688. return false;
  689. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  690. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  691. return false;
  692. }
  693. return true;
  694. };
  695. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  696. if (!value.is(Token::Type::Ident))
  697. return false;
  698. auto ident = value.token().ident();
  699. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  700. return false;
  701. if (ident.bytes_as_string_view().length() == 3)
  702. return false;
  703. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  704. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  705. return false;
  706. }
  707. return true;
  708. };
  709. auto is_integer = [](ComponentValue const& value) -> bool {
  710. return value.is(Token::Type::Number) && value.token().number().is_integer();
  711. };
  712. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  713. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  714. };
  715. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  716. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  717. };
  718. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  719. // Unfortunately these can't be in the same order as in the spec.
  720. values.discard_whitespace();
  721. auto const& first_value = values.consume_a_token();
  722. // odd | even
  723. if (first_value.is(Token::Type::Ident)) {
  724. auto ident = first_value.token().ident();
  725. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  726. transaction.commit();
  727. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  728. }
  729. if (ident.equals_ignoring_ascii_case("even"sv)) {
  730. transaction.commit();
  731. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  732. }
  733. }
  734. // <integer>
  735. if (is_integer(first_value)) {
  736. int b = first_value.token().to_integer();
  737. transaction.commit();
  738. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  739. }
  740. // <n-dimension>
  741. // <n-dimension> <signed-integer>
  742. // <n-dimension> ['+' | '-'] <signless-integer>
  743. if (is_n_dimension(first_value)) {
  744. int a = first_value.token().dimension_value_int();
  745. values.discard_whitespace();
  746. // <n-dimension> <signed-integer>
  747. if (is_signed_integer(values.next_token())) {
  748. int b = values.consume_a_token().token().to_integer();
  749. transaction.commit();
  750. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  751. }
  752. // <n-dimension> ['+' | '-'] <signless-integer>
  753. {
  754. auto child_transaction = transaction.create_child();
  755. auto const& second_value = values.consume_a_token();
  756. values.discard_whitespace();
  757. auto const& third_value = values.consume_a_token();
  758. if (is_sign(second_value) && is_signless_integer(third_value)) {
  759. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  760. child_transaction.commit();
  761. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  762. }
  763. }
  764. // <n-dimension>
  765. transaction.commit();
  766. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  767. }
  768. // <ndash-dimension> <signless-integer>
  769. if (is_ndash_dimension(first_value)) {
  770. values.discard_whitespace();
  771. auto const& second_value = values.consume_a_token();
  772. if (is_signless_integer(second_value)) {
  773. int a = first_value.token().dimension_value_int();
  774. int b = -second_value.token().to_integer();
  775. transaction.commit();
  776. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  777. }
  778. return syntax_error();
  779. }
  780. // <ndashdigit-dimension>
  781. if (is_ndashdigit_dimension(first_value)) {
  782. auto const& dimension = first_value.token();
  783. int a = dimension.dimension_value_int();
  784. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_number<int>();
  785. if (maybe_b.has_value()) {
  786. transaction.commit();
  787. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  788. }
  789. return syntax_error();
  790. }
  791. // <dashndashdigit-ident>
  792. if (is_dashndashdigit_ident(first_value)) {
  793. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_number<int>();
  794. if (maybe_b.has_value()) {
  795. transaction.commit();
  796. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  797. }
  798. return syntax_error();
  799. }
  800. // -n
  801. // -n <signed-integer>
  802. // -n ['+' | '-'] <signless-integer>
  803. if (first_value.is_ident("-n"sv)) {
  804. values.discard_whitespace();
  805. // -n <signed-integer>
  806. if (is_signed_integer(values.next_token())) {
  807. int b = values.consume_a_token().token().to_integer();
  808. transaction.commit();
  809. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  810. }
  811. // -n ['+' | '-'] <signless-integer>
  812. {
  813. auto child_transaction = transaction.create_child();
  814. auto const& second_value = values.consume_a_token();
  815. values.discard_whitespace();
  816. auto const& third_value = values.consume_a_token();
  817. if (is_sign(second_value) && is_signless_integer(third_value)) {
  818. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  819. child_transaction.commit();
  820. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  821. }
  822. }
  823. // -n
  824. transaction.commit();
  825. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  826. }
  827. // -n- <signless-integer>
  828. if (first_value.is_ident("-n-"sv)) {
  829. values.discard_whitespace();
  830. auto const& second_value = values.consume_a_token();
  831. if (is_signless_integer(second_value)) {
  832. int b = -second_value.token().to_integer();
  833. transaction.commit();
  834. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  835. }
  836. return syntax_error();
  837. }
  838. // All that's left now are these:
  839. // '+'?† n
  840. // '+'?† n <signed-integer>
  841. // '+'?† n ['+' | '-'] <signless-integer>
  842. // '+'?† n- <signless-integer>
  843. // '+'?† <ndashdigit-ident>
  844. // In all of these cases, the + is optional, and has no effect.
  845. // So, we just skip the +, and carry on.
  846. if (!first_value.is_delim('+')) {
  847. values.reconsume_current_input_token();
  848. // We do *not* skip whitespace here.
  849. }
  850. auto const& first_after_plus = values.consume_a_token();
  851. // '+'?† n
  852. // '+'?† n <signed-integer>
  853. // '+'?† n ['+' | '-'] <signless-integer>
  854. if (first_after_plus.is_ident("n"sv)) {
  855. values.discard_whitespace();
  856. // '+'?† n <signed-integer>
  857. if (is_signed_integer(values.next_token())) {
  858. int b = values.consume_a_token().token().to_integer();
  859. transaction.commit();
  860. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  861. }
  862. // '+'?† n ['+' | '-'] <signless-integer>
  863. {
  864. auto child_transaction = transaction.create_child();
  865. auto const& second_value = values.consume_a_token();
  866. values.discard_whitespace();
  867. auto const& third_value = values.consume_a_token();
  868. if (is_sign(second_value) && is_signless_integer(third_value)) {
  869. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  870. child_transaction.commit();
  871. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  872. }
  873. }
  874. // '+'?† n
  875. transaction.commit();
  876. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  877. }
  878. // '+'?† n- <signless-integer>
  879. if (first_after_plus.is_ident("n-"sv)) {
  880. values.discard_whitespace();
  881. auto const& second_value = values.consume_a_token();
  882. if (is_signless_integer(second_value)) {
  883. int b = -second_value.token().to_integer();
  884. transaction.commit();
  885. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  886. }
  887. return syntax_error();
  888. }
  889. // '+'?† <ndashdigit-ident>
  890. if (is_ndashdigit_ident(first_after_plus)) {
  891. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_number<int>();
  892. if (maybe_b.has_value()) {
  893. transaction.commit();
  894. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  895. }
  896. return syntax_error();
  897. }
  898. return syntax_error();
  899. }
  900. }