SelectorParsing.cpp 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::CSS::Parser {
  14. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  15. {
  16. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  17. if (!selector_list.is_error())
  18. return selector_list.release_value();
  19. return {};
  20. }
  21. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  22. {
  23. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  24. if (!selector_list.is_error())
  25. return selector_list.release_value();
  26. return {};
  27. }
  28. Optional<Selector::PseudoElement> Parser::parse_as_pseudo_element_selector()
  29. {
  30. // FIXME: This is quite janky. Selector parsing is not at all designed to allow parsing just a single part of a selector.
  31. // So, this code parses a whole selector, then rejects it if it's not a single pseudo-element simple selector.
  32. // Come back and fix this, future Sam!
  33. auto maybe_selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, SelectorParsingMode::Standard);
  34. if (maybe_selector_list.is_error())
  35. return {};
  36. auto& selector_list = maybe_selector_list.value();
  37. if (selector_list.size() != 1)
  38. return {};
  39. auto& selector = selector_list.first();
  40. if (selector->compound_selectors().size() != 1)
  41. return {};
  42. auto& first_compound_selector = selector->compound_selectors().first();
  43. if (first_compound_selector.simple_selectors.size() != 1)
  44. return {};
  45. auto& simple_selector = first_compound_selector.simple_selectors.first();
  46. if (simple_selector.type != Selector::SimpleSelector::Type::PseudoElement)
  47. return {};
  48. return simple_selector.pseudo_element();
  49. }
  50. template<typename T>
  51. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  52. {
  53. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  54. Vector<NonnullRefPtr<Selector>> selectors;
  55. for (auto& selector_parts : comma_separated_lists) {
  56. auto stream = TokenStream(selector_parts);
  57. auto selector = parse_complex_selector(stream, mode);
  58. if (selector.is_error()) {
  59. if (parsing_mode == SelectorParsingMode::Forgiving)
  60. continue;
  61. return selector.error();
  62. }
  63. selectors.append(selector.release_value());
  64. }
  65. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  66. return ParseError::SyntaxError;
  67. return selectors;
  68. }
  69. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  70. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  71. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  72. {
  73. Vector<Selector::CompoundSelector> compound_selectors;
  74. auto first_selector = TRY(parse_compound_selector(tokens));
  75. if (!first_selector.has_value())
  76. return ParseError::SyntaxError;
  77. if (mode == SelectorType::Standalone) {
  78. if (first_selector->combinator != Selector::Combinator::Descendant)
  79. return ParseError::SyntaxError;
  80. first_selector->combinator = Selector::Combinator::None;
  81. }
  82. compound_selectors.append(first_selector.release_value());
  83. while (tokens.has_next_token()) {
  84. auto compound_selector = TRY(parse_compound_selector(tokens));
  85. if (!compound_selector.has_value())
  86. break;
  87. compound_selectors.append(compound_selector.release_value());
  88. }
  89. if (compound_selectors.is_empty())
  90. return ParseError::SyntaxError;
  91. return Selector::create(move(compound_selectors));
  92. }
  93. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  94. {
  95. tokens.skip_whitespace();
  96. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  97. tokens.skip_whitespace();
  98. Vector<Selector::SimpleSelector> simple_selectors;
  99. while (tokens.has_next_token()) {
  100. auto component = TRY(parse_simple_selector(tokens));
  101. if (!component.has_value())
  102. break;
  103. simple_selectors.append(component.release_value());
  104. }
  105. if (simple_selectors.is_empty())
  106. return Optional<Selector::CompoundSelector> {};
  107. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  108. }
  109. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  110. {
  111. auto const& current_value = tokens.next_token();
  112. if (current_value.is(Token::Type::Delim)) {
  113. switch (current_value.token().delim()) {
  114. case '>':
  115. return Selector::Combinator::ImmediateChild;
  116. case '+':
  117. return Selector::Combinator::NextSibling;
  118. case '~':
  119. return Selector::Combinator::SubsequentSibling;
  120. case '|': {
  121. auto const& next = tokens.peek_token();
  122. if (next.is(Token::Type::EndOfFile))
  123. return {};
  124. if (next.is_delim('|')) {
  125. tokens.next_token();
  126. return Selector::Combinator::Column;
  127. }
  128. }
  129. }
  130. }
  131. tokens.reconsume_current_input_token();
  132. return {};
  133. }
  134. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  135. {
  136. auto is_name = [](ComponentValue const& token) {
  137. return token.is_delim('*') || token.is(Token::Type::Ident);
  138. };
  139. auto get_name = [](ComponentValue const& token) {
  140. if (token.is_delim('*'))
  141. return "*"_fly_string;
  142. return token.token().ident();
  143. };
  144. // There are 3 possibilities here:
  145. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  146. // 1) `|<name>`
  147. // 2) `<namespace>|<name>`
  148. // 3) `<name>`
  149. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  150. auto transaction = tokens.begin_transaction();
  151. auto first_token = tokens.next_token();
  152. if (first_token.is_delim('|')) {
  153. // Case 1: `|<name>`
  154. if (is_name(tokens.peek_token())) {
  155. auto name_token = tokens.next_token();
  156. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  157. return {};
  158. transaction.commit();
  159. return Selector::SimpleSelector::QualifiedName {
  160. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  161. .name = get_name(name_token),
  162. };
  163. }
  164. return {};
  165. }
  166. if (!is_name(first_token))
  167. return {};
  168. if (tokens.peek_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  169. // Case 2: `<namespace>|<name>`
  170. (void)tokens.next_token(); // `|`
  171. auto namespace_ = get_name(first_token);
  172. auto name = get_name(tokens.next_token());
  173. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  174. return {};
  175. auto namespace_type = namespace_ == "*"sv
  176. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  177. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  178. transaction.commit();
  179. return Selector::SimpleSelector::QualifiedName {
  180. .namespace_type = namespace_type,
  181. .namespace_ = namespace_,
  182. .name = name,
  183. };
  184. }
  185. // Case 3: `<name>`
  186. auto& name_token = first_token;
  187. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  188. return {};
  189. transaction.commit();
  190. return Selector::SimpleSelector::QualifiedName {
  191. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  192. .name = get_name(name_token),
  193. };
  194. }
  195. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  196. {
  197. auto attribute_tokens = TokenStream { first_value.block().values() };
  198. attribute_tokens.skip_whitespace();
  199. if (!attribute_tokens.has_next_token()) {
  200. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  201. return ParseError::SyntaxError;
  202. }
  203. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  204. if (!maybe_qualified_name.has_value()) {
  205. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.peek_token().to_debug_string());
  206. return ParseError::SyntaxError;
  207. }
  208. auto qualified_name = maybe_qualified_name.release_value();
  209. Selector::SimpleSelector simple_selector {
  210. .type = Selector::SimpleSelector::Type::Attribute,
  211. .value = Selector::SimpleSelector::Attribute {
  212. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  213. // FIXME: Case-sensitivity is defined by the document language.
  214. // HTML is insensitive with attribute names, and our code generally assumes
  215. // they are converted to lowercase, so we do that here too. If we want to be
  216. // correct with XML later, we'll need to keep the original case and then do
  217. // a case-insensitive compare later.
  218. .qualified_name = qualified_name,
  219. .case_type = case_insensitive_html_attributes.contains_slow(qualified_name.name.lowercase_name)
  220. ? Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch
  221. : Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  222. }
  223. };
  224. attribute_tokens.skip_whitespace();
  225. if (!attribute_tokens.has_next_token())
  226. return simple_selector;
  227. auto const& delim_part = attribute_tokens.next_token();
  228. if (!delim_part.is(Token::Type::Delim)) {
  229. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  230. return ParseError::SyntaxError;
  231. }
  232. if (delim_part.token().delim() == '=') {
  233. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  234. } else {
  235. if (!attribute_tokens.has_next_token()) {
  236. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  237. return ParseError::SyntaxError;
  238. }
  239. auto const& delim_second_part = attribute_tokens.next_token();
  240. if (!delim_second_part.is_delim('=')) {
  241. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  242. return ParseError::SyntaxError;
  243. }
  244. switch (delim_part.token().delim()) {
  245. case '~':
  246. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  247. break;
  248. case '*':
  249. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  250. break;
  251. case '|':
  252. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  253. break;
  254. case '^':
  255. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  256. break;
  257. case '$':
  258. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  259. break;
  260. default:
  261. attribute_tokens.reconsume_current_input_token();
  262. }
  263. }
  264. attribute_tokens.skip_whitespace();
  265. if (!attribute_tokens.has_next_token()) {
  266. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  267. return ParseError::SyntaxError;
  268. }
  269. auto const& value_part = attribute_tokens.next_token();
  270. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  271. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  272. return ParseError::SyntaxError;
  273. }
  274. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  275. simple_selector.attribute().value = value_string.to_string();
  276. attribute_tokens.skip_whitespace();
  277. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  278. if (attribute_tokens.has_next_token()) {
  279. auto const& case_sensitivity_part = attribute_tokens.next_token();
  280. if (case_sensitivity_part.is(Token::Type::Ident)) {
  281. auto case_sensitivity = case_sensitivity_part.token().ident();
  282. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  283. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  284. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  285. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  286. } else {
  287. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  288. return ParseError::SyntaxError;
  289. }
  290. } else {
  291. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  292. return ParseError::SyntaxError;
  293. }
  294. }
  295. if (attribute_tokens.has_next_token()) {
  296. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  297. return ParseError::SyntaxError;
  298. }
  299. return simple_selector;
  300. }
  301. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  302. {
  303. auto peek_token_ends_selector = [&]() -> bool {
  304. auto const& value = tokens.peek_token();
  305. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  306. };
  307. if (peek_token_ends_selector())
  308. return ParseError::SyntaxError;
  309. bool is_pseudo = false;
  310. if (tokens.peek_token().is(Token::Type::Colon)) {
  311. is_pseudo = true;
  312. tokens.next_token();
  313. if (peek_token_ends_selector())
  314. return ParseError::SyntaxError;
  315. }
  316. if (is_pseudo) {
  317. auto const& name_token = tokens.next_token();
  318. if (!name_token.is(Token::Type::Ident)) {
  319. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  320. return ParseError::SyntaxError;
  321. }
  322. auto pseudo_name = name_token.token().ident();
  323. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  324. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  325. return Selector::SimpleSelector {
  326. .type = Selector::SimpleSelector::Type::PseudoElement,
  327. .value = pseudo_element.release_value()
  328. };
  329. }
  330. // https://www.w3.org/TR/selectors-4/#compat
  331. // All other pseudo-elements whose names begin with the string “-webkit-” (matched ASCII case-insensitively)
  332. // and that are not functional notations must be treated as valid at parse time. (That is, ::-webkit-asdf is
  333. // valid at parse time, but ::-webkit-jkl() is not.) If they’re not otherwise recognized and supported, they
  334. // must be treated as matching nothing, and are unknown -webkit- pseudo-elements.
  335. if (pseudo_name.starts_with_bytes("-webkit-"sv, CaseSensitivity::CaseInsensitive)) {
  336. return Selector::SimpleSelector {
  337. .type = Selector::SimpleSelector::Type::PseudoElement,
  338. // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
  339. .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) },
  340. };
  341. }
  342. if (has_ignored_vendor_prefix(pseudo_name))
  343. return ParseError::IncludesIgnoredVendorPrefix;
  344. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  345. return ParseError::SyntaxError;
  346. }
  347. if (peek_token_ends_selector())
  348. return ParseError::SyntaxError;
  349. auto const& pseudo_class_token = tokens.next_token();
  350. if (pseudo_class_token.is(Token::Type::Ident)) {
  351. auto pseudo_name = pseudo_class_token.token().ident();
  352. if (has_ignored_vendor_prefix(pseudo_name))
  353. return ParseError::IncludesIgnoredVendorPrefix;
  354. auto make_pseudo_class_selector = [](auto pseudo_class) {
  355. return Selector::SimpleSelector {
  356. .type = Selector::SimpleSelector::Type::PseudoClass,
  357. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  358. };
  359. };
  360. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  361. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  362. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  363. return ParseError::SyntaxError;
  364. }
  365. return make_pseudo_class_selector(pseudo_class.value());
  366. }
  367. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  368. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  369. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  370. switch (pseudo_element.value().type()) {
  371. case Selector::PseudoElement::Type::After:
  372. case Selector::PseudoElement::Type::Before:
  373. case Selector::PseudoElement::Type::FirstLetter:
  374. case Selector::PseudoElement::Type::FirstLine:
  375. return Selector::SimpleSelector {
  376. .type = Selector::SimpleSelector::Type::PseudoElement,
  377. .value = pseudo_element.value()
  378. };
  379. default:
  380. break;
  381. }
  382. }
  383. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  384. return ParseError::SyntaxError;
  385. }
  386. if (pseudo_class_token.is_function()) {
  387. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  388. auto tokens = TokenStream<ComponentValue>(function_values);
  389. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  390. if (!nth_child_pattern.has_value()) {
  391. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  392. return ParseError::SyntaxError;
  393. }
  394. tokens.skip_whitespace();
  395. if (!tokens.has_next_token()) {
  396. return Selector::SimpleSelector {
  397. .type = Selector::SimpleSelector::Type::PseudoClass,
  398. .value = Selector::SimpleSelector::PseudoClassSelector {
  399. .type = pseudo_class,
  400. .nth_child_pattern = nth_child_pattern.release_value() }
  401. };
  402. }
  403. if (!allow_of)
  404. return ParseError::SyntaxError;
  405. // Parse the `of <selector-list>` syntax
  406. auto const& maybe_of = tokens.next_token();
  407. if (!maybe_of.is_ident("of"sv))
  408. return ParseError::SyntaxError;
  409. tokens.skip_whitespace();
  410. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  411. tokens.skip_whitespace();
  412. if (tokens.has_next_token())
  413. return ParseError::SyntaxError;
  414. return Selector::SimpleSelector {
  415. .type = Selector::SimpleSelector::Type::PseudoClass,
  416. .value = Selector::SimpleSelector::PseudoClassSelector {
  417. .type = pseudo_class,
  418. .nth_child_pattern = nth_child_pattern.release_value(),
  419. .argument_selector_list = move(selector_list) }
  420. };
  421. };
  422. auto const& pseudo_function = pseudo_class_token.function();
  423. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name());
  424. if (!maybe_pseudo_class.has_value()) {
  425. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name());
  426. return ParseError::SyntaxError;
  427. }
  428. auto pseudo_class = maybe_pseudo_class.value();
  429. auto metadata = pseudo_class_metadata(pseudo_class);
  430. if (!metadata.is_valid_as_function) {
  431. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name());
  432. return ParseError::SyntaxError;
  433. }
  434. if (pseudo_function.values().is_empty()) {
  435. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name());
  436. return ParseError::SyntaxError;
  437. }
  438. switch (metadata.parameter_type) {
  439. case PseudoClassMetadata::ParameterType::ANPlusB:
  440. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false);
  441. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  442. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true);
  443. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  444. auto function_token_stream = TokenStream(pseudo_function.values());
  445. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  446. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  447. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name());
  448. return ParseError::SyntaxError;
  449. }
  450. auto compound_selector = compound_selector_or_error.release_value().release_value();
  451. compound_selector.combinator = Selector::Combinator::None;
  452. Vector compound_selectors { move(compound_selector) };
  453. auto selector = Selector::create(move(compound_selectors));
  454. return Selector::SimpleSelector {
  455. .type = Selector::SimpleSelector::Type::PseudoClass,
  456. .value = Selector::SimpleSelector::PseudoClassSelector {
  457. .type = pseudo_class,
  458. .argument_selector_list = { move(selector) } }
  459. };
  460. }
  461. case PseudoClassMetadata::ParameterType::ForgivingRelativeSelectorList:
  462. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  463. auto function_token_stream = TokenStream(pseudo_function.values());
  464. auto selector_type = metadata.parameter_type == PseudoClassMetadata::ParameterType::ForgivingSelectorList
  465. ? SelectorType::Standalone
  466. : SelectorType::Relative;
  467. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  468. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, selector_type, SelectorParsingMode::Forgiving));
  469. return Selector::SimpleSelector {
  470. .type = Selector::SimpleSelector::Type::PseudoClass,
  471. .value = Selector::SimpleSelector::PseudoClassSelector {
  472. .type = pseudo_class,
  473. .argument_selector_list = move(argument_selector_list) }
  474. };
  475. }
  476. case PseudoClassMetadata::ParameterType::Ident: {
  477. auto function_token_stream = TokenStream(pseudo_function.values());
  478. function_token_stream.skip_whitespace();
  479. auto maybe_ident_token = function_token_stream.next_token();
  480. function_token_stream.skip_whitespace();
  481. if (!maybe_ident_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  482. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: not an ident", pseudo_function.name());
  483. return ParseError::SyntaxError;
  484. }
  485. auto maybe_ident = value_id_from_string(maybe_ident_token.token().ident());
  486. if (!maybe_ident.has_value()) {
  487. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: unrecognized ident", pseudo_function.name());
  488. return ParseError::SyntaxError;
  489. }
  490. return Selector::SimpleSelector {
  491. .type = Selector::SimpleSelector::Type::PseudoClass,
  492. .value = Selector::SimpleSelector::PseudoClassSelector {
  493. .type = pseudo_class,
  494. .identifier = maybe_ident.value() }
  495. };
  496. }
  497. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  498. Vector<FlyString> languages;
  499. auto function_token_stream = TokenStream(pseudo_function.values());
  500. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  501. for (auto language_token_list : language_token_lists) {
  502. auto language_token_stream = TokenStream(language_token_list);
  503. language_token_stream.skip_whitespace();
  504. auto language_token = language_token_stream.next_token();
  505. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  506. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name());
  507. return ParseError::SyntaxError;
  508. }
  509. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  510. languages.append(language_string);
  511. language_token_stream.skip_whitespace();
  512. if (language_token_stream.has_next_token()) {
  513. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name());
  514. return ParseError::SyntaxError;
  515. }
  516. }
  517. return Selector::SimpleSelector {
  518. .type = Selector::SimpleSelector::Type::PseudoClass,
  519. .value = Selector::SimpleSelector::PseudoClassSelector {
  520. .type = pseudo_class,
  521. .languages = move(languages) }
  522. };
  523. }
  524. case PseudoClassMetadata::ParameterType::SelectorList: {
  525. auto function_token_stream = TokenStream(pseudo_function.values());
  526. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  527. return Selector::SimpleSelector {
  528. .type = Selector::SimpleSelector::Type::PseudoClass,
  529. .value = Selector::SimpleSelector::PseudoClassSelector {
  530. .type = pseudo_class,
  531. .argument_selector_list = move(not_selector) }
  532. };
  533. }
  534. case PseudoClassMetadata::ParameterType::None:
  535. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  536. VERIFY_NOT_REACHED();
  537. }
  538. }
  539. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  540. return ParseError::SyntaxError;
  541. }
  542. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  543. {
  544. auto peek_token_ends_selector = [&]() -> bool {
  545. auto const& value = tokens.peek_token();
  546. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  547. };
  548. if (peek_token_ends_selector())
  549. return Optional<Selector::SimpleSelector> {};
  550. // Handle universal and tag-name types together, since both can be namespaced
  551. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  552. if (qualified_name->name.name == "*"sv) {
  553. return Selector::SimpleSelector {
  554. .type = Selector::SimpleSelector::Type::Universal,
  555. .value = qualified_name.release_value(),
  556. };
  557. }
  558. return Selector::SimpleSelector {
  559. .type = Selector::SimpleSelector::Type::TagName,
  560. .value = qualified_name.release_value(),
  561. };
  562. }
  563. auto const& first_value = tokens.next_token();
  564. if (first_value.is(Token::Type::Delim)) {
  565. u32 delim = first_value.token().delim();
  566. switch (delim) {
  567. case '*':
  568. // Handled already
  569. VERIFY_NOT_REACHED();
  570. case '.': {
  571. if (peek_token_ends_selector())
  572. return ParseError::SyntaxError;
  573. auto const& class_name_value = tokens.next_token();
  574. if (!class_name_value.is(Token::Type::Ident)) {
  575. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  576. return ParseError::SyntaxError;
  577. }
  578. return Selector::SimpleSelector {
  579. .type = Selector::SimpleSelector::Type::Class,
  580. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  581. };
  582. }
  583. case '>':
  584. case '+':
  585. case '~':
  586. case '|':
  587. // Whitespace is not required between the compound-selector and a combinator.
  588. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  589. tokens.reconsume_current_input_token();
  590. return Optional<Selector::SimpleSelector> {};
  591. default:
  592. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  593. return ParseError::SyntaxError;
  594. }
  595. }
  596. if (first_value.is(Token::Type::Hash)) {
  597. if (first_value.token().hash_type() != Token::HashType::Id) {
  598. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  599. return ParseError::SyntaxError;
  600. }
  601. return Selector::SimpleSelector {
  602. .type = Selector::SimpleSelector::Type::Id,
  603. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  604. };
  605. }
  606. if (first_value.is_block() && first_value.block().is_square())
  607. return TRY(parse_attribute_simple_selector(first_value));
  608. if (first_value.is(Token::Type::Colon))
  609. return TRY(parse_pseudo_simple_selector(tokens));
  610. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  611. return ParseError::SyntaxError;
  612. }
  613. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  614. {
  615. auto transaction = values.begin_transaction();
  616. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  617. if constexpr (CSS_PARSER_DEBUG) {
  618. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  619. values.dump_all_tokens();
  620. }
  621. return {};
  622. };
  623. auto is_sign = [](ComponentValue const& value) -> bool {
  624. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  625. };
  626. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  627. if (!value.is(Token::Type::Dimension))
  628. return false;
  629. if (!value.token().number().is_integer())
  630. return false;
  631. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  632. return false;
  633. return true;
  634. };
  635. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  636. if (!value.is(Token::Type::Dimension))
  637. return false;
  638. if (!value.token().number().is_integer())
  639. return false;
  640. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  641. return false;
  642. return true;
  643. };
  644. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  645. if (!value.is(Token::Type::Dimension))
  646. return false;
  647. if (!value.token().number().is_integer())
  648. return false;
  649. auto dimension_unit = value.token().dimension_unit();
  650. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  651. return false;
  652. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  653. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  654. return false;
  655. }
  656. return true;
  657. };
  658. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  659. if (!value.is(Token::Type::Ident))
  660. return false;
  661. auto ident = value.token().ident();
  662. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  663. return false;
  664. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  665. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  666. return false;
  667. }
  668. return true;
  669. };
  670. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  671. if (!value.is(Token::Type::Ident))
  672. return false;
  673. auto ident = value.token().ident();
  674. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  675. return false;
  676. if (ident.bytes_as_string_view().length() == 3)
  677. return false;
  678. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  679. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  680. return false;
  681. }
  682. return true;
  683. };
  684. auto is_integer = [](ComponentValue const& value) -> bool {
  685. return value.is(Token::Type::Number) && value.token().number().is_integer();
  686. };
  687. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  688. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  689. };
  690. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  691. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  692. };
  693. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  694. // Unfortunately these can't be in the same order as in the spec.
  695. values.skip_whitespace();
  696. auto const& first_value = values.next_token();
  697. // odd | even
  698. if (first_value.is(Token::Type::Ident)) {
  699. auto ident = first_value.token().ident();
  700. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  701. transaction.commit();
  702. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  703. }
  704. if (ident.equals_ignoring_ascii_case("even"sv)) {
  705. transaction.commit();
  706. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  707. }
  708. }
  709. // <integer>
  710. if (is_integer(first_value)) {
  711. int b = first_value.token().to_integer();
  712. transaction.commit();
  713. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  714. }
  715. // <n-dimension>
  716. // <n-dimension> <signed-integer>
  717. // <n-dimension> ['+' | '-'] <signless-integer>
  718. if (is_n_dimension(first_value)) {
  719. int a = first_value.token().dimension_value_int();
  720. values.skip_whitespace();
  721. // <n-dimension> <signed-integer>
  722. if (is_signed_integer(values.peek_token())) {
  723. int b = values.next_token().token().to_integer();
  724. transaction.commit();
  725. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  726. }
  727. // <n-dimension> ['+' | '-'] <signless-integer>
  728. {
  729. auto child_transaction = transaction.create_child();
  730. auto const& second_value = values.next_token();
  731. values.skip_whitespace();
  732. auto const& third_value = values.next_token();
  733. if (is_sign(second_value) && is_signless_integer(third_value)) {
  734. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  735. child_transaction.commit();
  736. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  737. }
  738. }
  739. // <n-dimension>
  740. transaction.commit();
  741. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  742. }
  743. // <ndash-dimension> <signless-integer>
  744. if (is_ndash_dimension(first_value)) {
  745. values.skip_whitespace();
  746. auto const& second_value = values.next_token();
  747. if (is_signless_integer(second_value)) {
  748. int a = first_value.token().dimension_value_int();
  749. int b = -second_value.token().to_integer();
  750. transaction.commit();
  751. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  752. }
  753. return syntax_error();
  754. }
  755. // <ndashdigit-dimension>
  756. if (is_ndashdigit_dimension(first_value)) {
  757. auto const& dimension = first_value.token();
  758. int a = dimension.dimension_value_int();
  759. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_number<int>();
  760. if (maybe_b.has_value()) {
  761. transaction.commit();
  762. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  763. }
  764. return syntax_error();
  765. }
  766. // <dashndashdigit-ident>
  767. if (is_dashndashdigit_ident(first_value)) {
  768. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_number<int>();
  769. if (maybe_b.has_value()) {
  770. transaction.commit();
  771. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  772. }
  773. return syntax_error();
  774. }
  775. // -n
  776. // -n <signed-integer>
  777. // -n ['+' | '-'] <signless-integer>
  778. if (first_value.is_ident("-n"sv)) {
  779. values.skip_whitespace();
  780. // -n <signed-integer>
  781. if (is_signed_integer(values.peek_token())) {
  782. int b = values.next_token().token().to_integer();
  783. transaction.commit();
  784. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  785. }
  786. // -n ['+' | '-'] <signless-integer>
  787. {
  788. auto child_transaction = transaction.create_child();
  789. auto const& second_value = values.next_token();
  790. values.skip_whitespace();
  791. auto const& third_value = values.next_token();
  792. if (is_sign(second_value) && is_signless_integer(third_value)) {
  793. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  794. child_transaction.commit();
  795. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  796. }
  797. }
  798. // -n
  799. transaction.commit();
  800. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  801. }
  802. // -n- <signless-integer>
  803. if (first_value.is_ident("-n-"sv)) {
  804. values.skip_whitespace();
  805. auto const& second_value = values.next_token();
  806. if (is_signless_integer(second_value)) {
  807. int b = -second_value.token().to_integer();
  808. transaction.commit();
  809. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  810. }
  811. return syntax_error();
  812. }
  813. // All that's left now are these:
  814. // '+'?† n
  815. // '+'?† n <signed-integer>
  816. // '+'?† n ['+' | '-'] <signless-integer>
  817. // '+'?† n- <signless-integer>
  818. // '+'?† <ndashdigit-ident>
  819. // In all of these cases, the + is optional, and has no effect.
  820. // So, we just skip the +, and carry on.
  821. if (!first_value.is_delim('+')) {
  822. values.reconsume_current_input_token();
  823. // We do *not* skip whitespace here.
  824. }
  825. auto const& first_after_plus = values.next_token();
  826. // '+'?† n
  827. // '+'?† n <signed-integer>
  828. // '+'?† n ['+' | '-'] <signless-integer>
  829. if (first_after_plus.is_ident("n"sv)) {
  830. values.skip_whitespace();
  831. // '+'?† n <signed-integer>
  832. if (is_signed_integer(values.peek_token())) {
  833. int b = values.next_token().token().to_integer();
  834. transaction.commit();
  835. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  836. }
  837. // '+'?† n ['+' | '-'] <signless-integer>
  838. {
  839. auto child_transaction = transaction.create_child();
  840. auto const& second_value = values.next_token();
  841. values.skip_whitespace();
  842. auto const& third_value = values.next_token();
  843. if (is_sign(second_value) && is_signless_integer(third_value)) {
  844. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  845. child_transaction.commit();
  846. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  847. }
  848. }
  849. // '+'?† n
  850. transaction.commit();
  851. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  852. }
  853. // '+'?† n- <signless-integer>
  854. if (first_after_plus.is_ident("n-"sv)) {
  855. values.skip_whitespace();
  856. auto const& second_value = values.next_token();
  857. if (is_signless_integer(second_value)) {
  858. int b = -second_value.token().to_integer();
  859. transaction.commit();
  860. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  861. }
  862. return syntax_error();
  863. }
  864. // '+'?† <ndashdigit-ident>
  865. if (is_ndashdigit_ident(first_after_plus)) {
  866. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_number<int>();
  867. if (maybe_b.has_value()) {
  868. transaction.commit();
  869. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  870. }
  871. return syntax_error();
  872. }
  873. return syntax_error();
  874. }
  875. }