SelectorParsing.cpp 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2024, Sam Atkins <sam@ladybird.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::CSS::Parser {
  14. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  15. {
  16. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  17. if (!selector_list.is_error())
  18. return selector_list.release_value();
  19. return {};
  20. }
  21. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  22. {
  23. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  24. if (!selector_list.is_error())
  25. return selector_list.release_value();
  26. return {};
  27. }
  28. Optional<Selector::PseudoElement> Parser::parse_as_pseudo_element_selector()
  29. {
  30. // FIXME: This is quite janky. Selector parsing is not at all designed to allow parsing just a single part of a selector.
  31. // So, this code parses a whole selector, then rejects it if it's not a single pseudo-element simple selector.
  32. // Come back and fix this, future Sam!
  33. auto maybe_selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, SelectorParsingMode::Standard);
  34. if (maybe_selector_list.is_error())
  35. return {};
  36. auto& selector_list = maybe_selector_list.value();
  37. if (selector_list.size() != 1)
  38. return {};
  39. auto& selector = selector_list.first();
  40. if (selector->compound_selectors().size() != 1)
  41. return {};
  42. auto& first_compound_selector = selector->compound_selectors().first();
  43. if (first_compound_selector.simple_selectors.size() != 1)
  44. return {};
  45. auto& simple_selector = first_compound_selector.simple_selectors.first();
  46. if (simple_selector.type != Selector::SimpleSelector::Type::PseudoElement)
  47. return {};
  48. return simple_selector.pseudo_element();
  49. }
  50. static NonnullRefPtr<Selector> create_invalid_selector(Selector::Combinator combinator, Vector<ComponentValue> component_values)
  51. {
  52. // Trim leading and trailing whitespace
  53. while (!component_values.is_empty() && component_values.first().is(Token::Type::Whitespace)) {
  54. component_values.take_first();
  55. }
  56. while (!component_values.is_empty() && component_values.last().is(Token::Type::Whitespace)) {
  57. component_values.take_last();
  58. }
  59. Selector::SimpleSelector simple {
  60. .type = Selector::SimpleSelector::Type::Invalid,
  61. .value = Selector::SimpleSelector::Invalid {
  62. .component_values = move(component_values),
  63. }
  64. };
  65. Selector::CompoundSelector compound {
  66. .combinator = combinator,
  67. .simple_selectors = { move(simple) }
  68. };
  69. return Selector::create({ move(compound) });
  70. }
  71. template<typename T>
  72. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  73. {
  74. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  75. SelectorList selectors;
  76. for (auto& selector_parts : comma_separated_lists) {
  77. auto stream = TokenStream(selector_parts);
  78. auto selector = parse_complex_selector(stream, mode);
  79. if (selector.is_error()) {
  80. if (parsing_mode == SelectorParsingMode::Forgiving) {
  81. // Keep the invalid selector around for serialization and nesting
  82. auto combinator = mode == SelectorType::Standalone ? Selector::Combinator::None : Selector::Combinator::Descendant;
  83. selectors.append(create_invalid_selector(combinator, move(selector_parts)));
  84. continue;
  85. }
  86. return selector.error();
  87. }
  88. selectors.append(selector.release_value());
  89. }
  90. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  91. return ParseError::SyntaxError;
  92. return selectors;
  93. }
  94. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  95. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  96. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  97. {
  98. Vector<Selector::CompoundSelector> compound_selectors;
  99. auto first_selector = TRY(parse_compound_selector(tokens));
  100. if (!first_selector.has_value())
  101. return ParseError::SyntaxError;
  102. if (mode == SelectorType::Standalone) {
  103. if (first_selector->combinator != Selector::Combinator::Descendant)
  104. return ParseError::SyntaxError;
  105. first_selector->combinator = Selector::Combinator::None;
  106. }
  107. compound_selectors.append(first_selector.release_value());
  108. while (tokens.has_next_token()) {
  109. auto compound_selector = TRY(parse_compound_selector(tokens));
  110. if (!compound_selector.has_value())
  111. break;
  112. compound_selectors.append(compound_selector.release_value());
  113. }
  114. if (compound_selectors.is_empty())
  115. return ParseError::SyntaxError;
  116. return Selector::create(move(compound_selectors));
  117. }
  118. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  119. {
  120. tokens.discard_whitespace();
  121. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  122. tokens.discard_whitespace();
  123. Vector<Selector::SimpleSelector> simple_selectors;
  124. while (tokens.has_next_token()) {
  125. auto component = TRY(parse_simple_selector(tokens));
  126. if (!component.has_value())
  127. break;
  128. if (component->type == Selector::SimpleSelector::Type::TagName && !simple_selectors.is_empty()) {
  129. // Tag-name selectors can only go at the beginning of a compound selector.
  130. return ParseError::SyntaxError;
  131. }
  132. simple_selectors.append(component.release_value());
  133. }
  134. if (simple_selectors.is_empty())
  135. return Optional<Selector::CompoundSelector> {};
  136. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  137. }
  138. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  139. {
  140. auto const& current_value = tokens.consume_a_token();
  141. if (current_value.is(Token::Type::Delim)) {
  142. switch (current_value.token().delim()) {
  143. case '>':
  144. return Selector::Combinator::ImmediateChild;
  145. case '+':
  146. return Selector::Combinator::NextSibling;
  147. case '~':
  148. return Selector::Combinator::SubsequentSibling;
  149. case '|': {
  150. auto const& next = tokens.next_token();
  151. if (next.is(Token::Type::EndOfFile))
  152. return {};
  153. if (next.is_delim('|')) {
  154. tokens.discard_a_token();
  155. return Selector::Combinator::Column;
  156. }
  157. }
  158. }
  159. }
  160. tokens.reconsume_current_input_token();
  161. return {};
  162. }
  163. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  164. {
  165. auto is_name = [](ComponentValue const& token) {
  166. return token.is_delim('*') || token.is(Token::Type::Ident);
  167. };
  168. auto get_name = [](ComponentValue const& token) {
  169. if (token.is_delim('*'))
  170. return "*"_fly_string;
  171. return token.token().ident();
  172. };
  173. // There are 3 possibilities here:
  174. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  175. // 1) `|<name>`
  176. // 2) `<namespace>|<name>`
  177. // 3) `<name>`
  178. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  179. auto transaction = tokens.begin_transaction();
  180. auto const& first_token = tokens.consume_a_token();
  181. if (first_token.is_delim('|')) {
  182. // Case 1: `|<name>`
  183. if (is_name(tokens.next_token())) {
  184. auto const& name_token = tokens.consume_a_token();
  185. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  186. return {};
  187. transaction.commit();
  188. return Selector::SimpleSelector::QualifiedName {
  189. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  190. .name = get_name(name_token),
  191. };
  192. }
  193. return {};
  194. }
  195. if (!is_name(first_token))
  196. return {};
  197. if (tokens.next_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  198. // Case 2: `<namespace>|<name>`
  199. tokens.discard_a_token(); // `|`
  200. auto namespace_ = get_name(first_token);
  201. auto name = get_name(tokens.consume_a_token());
  202. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  203. return {};
  204. auto namespace_type = namespace_ == "*"sv
  205. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  206. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  207. transaction.commit();
  208. return Selector::SimpleSelector::QualifiedName {
  209. .namespace_type = namespace_type,
  210. .namespace_ = namespace_,
  211. .name = name,
  212. };
  213. }
  214. // Case 3: `<name>`
  215. auto& name_token = first_token;
  216. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  217. return {};
  218. transaction.commit();
  219. return Selector::SimpleSelector::QualifiedName {
  220. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  221. .name = get_name(name_token),
  222. };
  223. }
  224. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  225. {
  226. auto attribute_tokens = TokenStream { first_value.block().value };
  227. attribute_tokens.discard_whitespace();
  228. if (!attribute_tokens.has_next_token()) {
  229. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  230. return ParseError::SyntaxError;
  231. }
  232. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  233. if (!maybe_qualified_name.has_value()) {
  234. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.next_token().to_debug_string());
  235. return ParseError::SyntaxError;
  236. }
  237. auto qualified_name = maybe_qualified_name.release_value();
  238. Selector::SimpleSelector simple_selector {
  239. .type = Selector::SimpleSelector::Type::Attribute,
  240. .value = Selector::SimpleSelector::Attribute {
  241. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  242. .qualified_name = qualified_name,
  243. .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  244. }
  245. };
  246. attribute_tokens.discard_whitespace();
  247. if (!attribute_tokens.has_next_token())
  248. return simple_selector;
  249. auto const& delim_part = attribute_tokens.consume_a_token();
  250. if (!delim_part.is(Token::Type::Delim)) {
  251. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  252. return ParseError::SyntaxError;
  253. }
  254. if (delim_part.token().delim() == '=') {
  255. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  256. } else {
  257. if (!attribute_tokens.has_next_token()) {
  258. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  259. return ParseError::SyntaxError;
  260. }
  261. auto const& delim_second_part = attribute_tokens.consume_a_token();
  262. if (!delim_second_part.is_delim('=')) {
  263. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  264. return ParseError::SyntaxError;
  265. }
  266. switch (delim_part.token().delim()) {
  267. case '~':
  268. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  269. break;
  270. case '*':
  271. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  272. break;
  273. case '|':
  274. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  275. break;
  276. case '^':
  277. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  278. break;
  279. case '$':
  280. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  281. break;
  282. default:
  283. attribute_tokens.reconsume_current_input_token();
  284. }
  285. }
  286. attribute_tokens.discard_whitespace();
  287. if (!attribute_tokens.has_next_token()) {
  288. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  289. return ParseError::SyntaxError;
  290. }
  291. auto const& value_part = attribute_tokens.consume_a_token();
  292. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  293. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  294. return ParseError::SyntaxError;
  295. }
  296. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  297. simple_selector.attribute().value = value_string.to_string();
  298. attribute_tokens.discard_whitespace();
  299. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  300. if (attribute_tokens.has_next_token()) {
  301. auto const& case_sensitivity_part = attribute_tokens.consume_a_token();
  302. if (case_sensitivity_part.is(Token::Type::Ident)) {
  303. auto case_sensitivity = case_sensitivity_part.token().ident();
  304. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  305. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  306. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  307. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  308. } else {
  309. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  310. return ParseError::SyntaxError;
  311. }
  312. } else {
  313. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  314. return ParseError::SyntaxError;
  315. }
  316. }
  317. if (attribute_tokens.has_next_token()) {
  318. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  319. return ParseError::SyntaxError;
  320. }
  321. return simple_selector;
  322. }
  323. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  324. {
  325. auto peek_token_ends_selector = [&]() -> bool {
  326. auto const& value = tokens.next_token();
  327. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  328. };
  329. if (peek_token_ends_selector())
  330. return ParseError::SyntaxError;
  331. bool is_pseudo = false;
  332. if (tokens.next_token().is(Token::Type::Colon)) {
  333. is_pseudo = true;
  334. tokens.discard_a_token();
  335. if (peek_token_ends_selector())
  336. return ParseError::SyntaxError;
  337. }
  338. if (is_pseudo) {
  339. auto const& name_token = tokens.consume_a_token();
  340. if (!name_token.is(Token::Type::Ident)) {
  341. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  342. return ParseError::SyntaxError;
  343. }
  344. auto pseudo_name = name_token.token().ident();
  345. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  346. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  347. return Selector::SimpleSelector {
  348. .type = Selector::SimpleSelector::Type::PseudoElement,
  349. .value = pseudo_element.release_value()
  350. };
  351. }
  352. // https://www.w3.org/TR/selectors-4/#compat
  353. // All other pseudo-elements whose names begin with the string “-webkit-” (matched ASCII case-insensitively)
  354. // and that are not functional notations must be treated as valid at parse time. (That is, ::-webkit-asdf is
  355. // valid at parse time, but ::-webkit-jkl() is not.) If they’re not otherwise recognized and supported, they
  356. // must be treated as matching nothing, and are unknown -webkit- pseudo-elements.
  357. if (pseudo_name.starts_with_bytes("-webkit-"sv, CaseSensitivity::CaseInsensitive)) {
  358. return Selector::SimpleSelector {
  359. .type = Selector::SimpleSelector::Type::PseudoElement,
  360. // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
  361. .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, pseudo_name.to_string().to_ascii_lowercase() },
  362. };
  363. }
  364. if (has_ignored_vendor_prefix(pseudo_name))
  365. return ParseError::IncludesIgnoredVendorPrefix;
  366. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  367. return ParseError::SyntaxError;
  368. }
  369. if (peek_token_ends_selector())
  370. return ParseError::SyntaxError;
  371. auto const& pseudo_class_token = tokens.consume_a_token();
  372. if (pseudo_class_token.is(Token::Type::Ident)) {
  373. auto pseudo_name = pseudo_class_token.token().ident();
  374. if (has_ignored_vendor_prefix(pseudo_name))
  375. return ParseError::IncludesIgnoredVendorPrefix;
  376. auto make_pseudo_class_selector = [](auto pseudo_class) {
  377. return Selector::SimpleSelector {
  378. .type = Selector::SimpleSelector::Type::PseudoClass,
  379. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  380. };
  381. };
  382. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  383. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  384. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  385. return ParseError::SyntaxError;
  386. }
  387. return make_pseudo_class_selector(pseudo_class.value());
  388. }
  389. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  390. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  391. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  392. switch (pseudo_element.value().type()) {
  393. case Selector::PseudoElement::Type::After:
  394. case Selector::PseudoElement::Type::Before:
  395. case Selector::PseudoElement::Type::FirstLetter:
  396. case Selector::PseudoElement::Type::FirstLine:
  397. return Selector::SimpleSelector {
  398. .type = Selector::SimpleSelector::Type::PseudoElement,
  399. .value = pseudo_element.value()
  400. };
  401. default:
  402. break;
  403. }
  404. }
  405. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  406. return ParseError::SyntaxError;
  407. }
  408. if (pseudo_class_token.is_function()) {
  409. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  410. auto tokens = TokenStream<ComponentValue>(function_values);
  411. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  412. if (!nth_child_pattern.has_value()) {
  413. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  414. return ParseError::SyntaxError;
  415. }
  416. tokens.discard_whitespace();
  417. if (!tokens.has_next_token()) {
  418. return Selector::SimpleSelector {
  419. .type = Selector::SimpleSelector::Type::PseudoClass,
  420. .value = Selector::SimpleSelector::PseudoClassSelector {
  421. .type = pseudo_class,
  422. .nth_child_pattern = nth_child_pattern.release_value() }
  423. };
  424. }
  425. if (!allow_of)
  426. return ParseError::SyntaxError;
  427. // Parse the `of <selector-list>` syntax
  428. auto const& maybe_of = tokens.consume_a_token();
  429. if (!maybe_of.is_ident("of"sv))
  430. return ParseError::SyntaxError;
  431. tokens.discard_whitespace();
  432. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  433. tokens.discard_whitespace();
  434. if (tokens.has_next_token())
  435. return ParseError::SyntaxError;
  436. return Selector::SimpleSelector {
  437. .type = Selector::SimpleSelector::Type::PseudoClass,
  438. .value = Selector::SimpleSelector::PseudoClassSelector {
  439. .type = pseudo_class,
  440. .nth_child_pattern = nth_child_pattern.release_value(),
  441. .argument_selector_list = move(selector_list) }
  442. };
  443. };
  444. auto const& pseudo_function = pseudo_class_token.function();
  445. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name);
  446. if (!maybe_pseudo_class.has_value()) {
  447. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name);
  448. return ParseError::SyntaxError;
  449. }
  450. auto pseudo_class = maybe_pseudo_class.value();
  451. auto metadata = pseudo_class_metadata(pseudo_class);
  452. if (!metadata.is_valid_as_function) {
  453. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name);
  454. return ParseError::SyntaxError;
  455. }
  456. if (pseudo_function.value.is_empty()) {
  457. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name);
  458. return ParseError::SyntaxError;
  459. }
  460. switch (metadata.parameter_type) {
  461. case PseudoClassMetadata::ParameterType::ANPlusB:
  462. return parse_nth_child_selector(pseudo_class, pseudo_function.value, false);
  463. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  464. return parse_nth_child_selector(pseudo_class, pseudo_function.value, true);
  465. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  466. auto function_token_stream = TokenStream(pseudo_function.value);
  467. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  468. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  469. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name);
  470. return ParseError::SyntaxError;
  471. }
  472. auto compound_selector = compound_selector_or_error.release_value().release_value();
  473. compound_selector.combinator = Selector::Combinator::None;
  474. Vector compound_selectors { move(compound_selector) };
  475. auto selector = Selector::create(move(compound_selectors));
  476. return Selector::SimpleSelector {
  477. .type = Selector::SimpleSelector::Type::PseudoClass,
  478. .value = Selector::SimpleSelector::PseudoClassSelector {
  479. .type = pseudo_class,
  480. .argument_selector_list = { move(selector) } }
  481. };
  482. }
  483. case PseudoClassMetadata::ParameterType::ForgivingRelativeSelectorList:
  484. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  485. auto function_token_stream = TokenStream(pseudo_function.value);
  486. auto selector_type = metadata.parameter_type == PseudoClassMetadata::ParameterType::ForgivingSelectorList
  487. ? SelectorType::Standalone
  488. : SelectorType::Relative;
  489. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  490. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, selector_type, SelectorParsingMode::Forgiving));
  491. return Selector::SimpleSelector {
  492. .type = Selector::SimpleSelector::Type::PseudoClass,
  493. .value = Selector::SimpleSelector::PseudoClassSelector {
  494. .type = pseudo_class,
  495. .argument_selector_list = move(argument_selector_list) }
  496. };
  497. }
  498. case PseudoClassMetadata::ParameterType::Ident: {
  499. auto function_token_stream = TokenStream(pseudo_function.value);
  500. function_token_stream.discard_whitespace();
  501. auto const& maybe_keyword_token = function_token_stream.consume_a_token();
  502. function_token_stream.discard_whitespace();
  503. if (!maybe_keyword_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  504. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: not an ident", pseudo_function.name);
  505. return ParseError::SyntaxError;
  506. }
  507. auto maybe_keyword = keyword_from_string(maybe_keyword_token.token().ident());
  508. if (!maybe_keyword.has_value()) {
  509. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: unrecognized keyword", pseudo_function.name);
  510. return ParseError::SyntaxError;
  511. }
  512. return Selector::SimpleSelector {
  513. .type = Selector::SimpleSelector::Type::PseudoClass,
  514. .value = Selector::SimpleSelector::PseudoClassSelector {
  515. .type = pseudo_class,
  516. .keyword = maybe_keyword.value() }
  517. };
  518. }
  519. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  520. Vector<FlyString> languages;
  521. auto function_token_stream = TokenStream(pseudo_function.value);
  522. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  523. for (auto const& language_token_list : language_token_lists) {
  524. auto language_token_stream = TokenStream(language_token_list);
  525. language_token_stream.discard_whitespace();
  526. auto const& language_token = language_token_stream.consume_a_token();
  527. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  528. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name);
  529. return ParseError::SyntaxError;
  530. }
  531. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  532. languages.append(language_string);
  533. language_token_stream.discard_whitespace();
  534. if (language_token_stream.has_next_token()) {
  535. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name);
  536. return ParseError::SyntaxError;
  537. }
  538. }
  539. return Selector::SimpleSelector {
  540. .type = Selector::SimpleSelector::Type::PseudoClass,
  541. .value = Selector::SimpleSelector::PseudoClassSelector {
  542. .type = pseudo_class,
  543. .languages = move(languages) }
  544. };
  545. }
  546. case PseudoClassMetadata::ParameterType::SelectorList: {
  547. auto function_token_stream = TokenStream(pseudo_function.value);
  548. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  549. return Selector::SimpleSelector {
  550. .type = Selector::SimpleSelector::Type::PseudoClass,
  551. .value = Selector::SimpleSelector::PseudoClassSelector {
  552. .type = pseudo_class,
  553. .argument_selector_list = move(not_selector) }
  554. };
  555. }
  556. case PseudoClassMetadata::ParameterType::None:
  557. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  558. VERIFY_NOT_REACHED();
  559. }
  560. }
  561. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  562. return ParseError::SyntaxError;
  563. }
  564. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  565. {
  566. auto peek_token_ends_selector = [&]() -> bool {
  567. auto const& value = tokens.next_token();
  568. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  569. };
  570. if (peek_token_ends_selector())
  571. return Optional<Selector::SimpleSelector> {};
  572. // Handle universal and tag-name types together, since both can be namespaced
  573. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  574. if (qualified_name->name.name == "*"sv) {
  575. return Selector::SimpleSelector {
  576. .type = Selector::SimpleSelector::Type::Universal,
  577. .value = qualified_name.release_value(),
  578. };
  579. }
  580. return Selector::SimpleSelector {
  581. .type = Selector::SimpleSelector::Type::TagName,
  582. .value = qualified_name.release_value(),
  583. };
  584. }
  585. auto const& first_value = tokens.consume_a_token();
  586. if (first_value.is(Token::Type::Delim)) {
  587. u32 delim = first_value.token().delim();
  588. switch (delim) {
  589. case '*':
  590. // Handled already
  591. VERIFY_NOT_REACHED();
  592. case '&':
  593. return Selector::SimpleSelector {
  594. .type = Selector::SimpleSelector::Type::Nesting,
  595. };
  596. case '.': {
  597. if (peek_token_ends_selector())
  598. return ParseError::SyntaxError;
  599. auto const& class_name_value = tokens.consume_a_token();
  600. if (!class_name_value.is(Token::Type::Ident)) {
  601. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  602. return ParseError::SyntaxError;
  603. }
  604. return Selector::SimpleSelector {
  605. .type = Selector::SimpleSelector::Type::Class,
  606. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  607. };
  608. }
  609. case '>':
  610. case '+':
  611. case '~':
  612. case '|':
  613. // Whitespace is not required between the compound-selector and a combinator.
  614. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  615. tokens.reconsume_current_input_token();
  616. return Optional<Selector::SimpleSelector> {};
  617. default:
  618. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  619. return ParseError::SyntaxError;
  620. }
  621. }
  622. if (first_value.is(Token::Type::Hash)) {
  623. if (first_value.token().hash_type() != Token::HashType::Id) {
  624. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  625. return ParseError::SyntaxError;
  626. }
  627. return Selector::SimpleSelector {
  628. .type = Selector::SimpleSelector::Type::Id,
  629. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  630. };
  631. }
  632. if (first_value.is_block() && first_value.block().is_square())
  633. return TRY(parse_attribute_simple_selector(first_value));
  634. if (first_value.is(Token::Type::Colon))
  635. return TRY(parse_pseudo_simple_selector(tokens));
  636. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  637. return ParseError::SyntaxError;
  638. }
  639. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  640. {
  641. auto transaction = values.begin_transaction();
  642. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  643. if constexpr (CSS_PARSER_DEBUG) {
  644. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  645. values.dump_all_tokens();
  646. }
  647. return {};
  648. };
  649. auto is_sign = [](ComponentValue const& value) -> bool {
  650. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  651. };
  652. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  653. if (!value.is(Token::Type::Dimension))
  654. return false;
  655. if (!value.token().number().is_integer())
  656. return false;
  657. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  658. return false;
  659. return true;
  660. };
  661. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  662. if (!value.is(Token::Type::Dimension))
  663. return false;
  664. if (!value.token().number().is_integer())
  665. return false;
  666. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  667. return false;
  668. return true;
  669. };
  670. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  671. if (!value.is(Token::Type::Dimension))
  672. return false;
  673. if (!value.token().number().is_integer())
  674. return false;
  675. auto dimension_unit = value.token().dimension_unit();
  676. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  677. return false;
  678. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  679. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  680. return false;
  681. }
  682. return true;
  683. };
  684. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  685. if (!value.is(Token::Type::Ident))
  686. return false;
  687. auto ident = value.token().ident();
  688. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  689. return false;
  690. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  691. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  692. return false;
  693. }
  694. return true;
  695. };
  696. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  697. if (!value.is(Token::Type::Ident))
  698. return false;
  699. auto ident = value.token().ident();
  700. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  701. return false;
  702. if (ident.bytes_as_string_view().length() == 3)
  703. return false;
  704. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  705. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  706. return false;
  707. }
  708. return true;
  709. };
  710. auto is_integer = [](ComponentValue const& value) -> bool {
  711. return value.is(Token::Type::Number) && value.token().number().is_integer();
  712. };
  713. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  714. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  715. };
  716. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  717. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  718. };
  719. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  720. // Unfortunately these can't be in the same order as in the spec.
  721. values.discard_whitespace();
  722. auto const& first_value = values.consume_a_token();
  723. // odd | even
  724. if (first_value.is(Token::Type::Ident)) {
  725. auto ident = first_value.token().ident();
  726. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  727. transaction.commit();
  728. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  729. }
  730. if (ident.equals_ignoring_ascii_case("even"sv)) {
  731. transaction.commit();
  732. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  733. }
  734. }
  735. // <integer>
  736. if (is_integer(first_value)) {
  737. int b = first_value.token().to_integer();
  738. transaction.commit();
  739. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  740. }
  741. // <n-dimension>
  742. // <n-dimension> <signed-integer>
  743. // <n-dimension> ['+' | '-'] <signless-integer>
  744. if (is_n_dimension(first_value)) {
  745. int a = first_value.token().dimension_value_int();
  746. values.discard_whitespace();
  747. // <n-dimension> <signed-integer>
  748. if (is_signed_integer(values.next_token())) {
  749. int b = values.consume_a_token().token().to_integer();
  750. transaction.commit();
  751. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  752. }
  753. // <n-dimension> ['+' | '-'] <signless-integer>
  754. {
  755. auto child_transaction = transaction.create_child();
  756. auto const& second_value = values.consume_a_token();
  757. values.discard_whitespace();
  758. auto const& third_value = values.consume_a_token();
  759. if (is_sign(second_value) && is_signless_integer(third_value)) {
  760. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  761. child_transaction.commit();
  762. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  763. }
  764. }
  765. // <n-dimension>
  766. transaction.commit();
  767. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  768. }
  769. // <ndash-dimension> <signless-integer>
  770. if (is_ndash_dimension(first_value)) {
  771. values.discard_whitespace();
  772. auto const& second_value = values.consume_a_token();
  773. if (is_signless_integer(second_value)) {
  774. int a = first_value.token().dimension_value_int();
  775. int b = -second_value.token().to_integer();
  776. transaction.commit();
  777. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  778. }
  779. return syntax_error();
  780. }
  781. // <ndashdigit-dimension>
  782. if (is_ndashdigit_dimension(first_value)) {
  783. auto const& dimension = first_value.token();
  784. int a = dimension.dimension_value_int();
  785. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_number<int>();
  786. if (maybe_b.has_value()) {
  787. transaction.commit();
  788. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  789. }
  790. return syntax_error();
  791. }
  792. // <dashndashdigit-ident>
  793. if (is_dashndashdigit_ident(first_value)) {
  794. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_number<int>();
  795. if (maybe_b.has_value()) {
  796. transaction.commit();
  797. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  798. }
  799. return syntax_error();
  800. }
  801. // -n
  802. // -n <signed-integer>
  803. // -n ['+' | '-'] <signless-integer>
  804. if (first_value.is_ident("-n"sv)) {
  805. values.discard_whitespace();
  806. // -n <signed-integer>
  807. if (is_signed_integer(values.next_token())) {
  808. int b = values.consume_a_token().token().to_integer();
  809. transaction.commit();
  810. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  811. }
  812. // -n ['+' | '-'] <signless-integer>
  813. {
  814. auto child_transaction = transaction.create_child();
  815. auto const& second_value = values.consume_a_token();
  816. values.discard_whitespace();
  817. auto const& third_value = values.consume_a_token();
  818. if (is_sign(second_value) && is_signless_integer(third_value)) {
  819. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  820. child_transaction.commit();
  821. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  822. }
  823. }
  824. // -n
  825. transaction.commit();
  826. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  827. }
  828. // -n- <signless-integer>
  829. if (first_value.is_ident("-n-"sv)) {
  830. values.discard_whitespace();
  831. auto const& second_value = values.consume_a_token();
  832. if (is_signless_integer(second_value)) {
  833. int b = -second_value.token().to_integer();
  834. transaction.commit();
  835. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  836. }
  837. return syntax_error();
  838. }
  839. // All that's left now are these:
  840. // '+'?† n
  841. // '+'?† n <signed-integer>
  842. // '+'?† n ['+' | '-'] <signless-integer>
  843. // '+'?† n- <signless-integer>
  844. // '+'?† <ndashdigit-ident>
  845. // In all of these cases, the + is optional, and has no effect.
  846. // So, we just skip the +, and carry on.
  847. if (!first_value.is_delim('+')) {
  848. values.reconsume_current_input_token();
  849. // We do *not* skip whitespace here.
  850. }
  851. auto const& first_after_plus = values.consume_a_token();
  852. // '+'?† n
  853. // '+'?† n <signed-integer>
  854. // '+'?† n ['+' | '-'] <signless-integer>
  855. if (first_after_plus.is_ident("n"sv)) {
  856. values.discard_whitespace();
  857. // '+'?† n <signed-integer>
  858. if (is_signed_integer(values.next_token())) {
  859. int b = values.consume_a_token().token().to_integer();
  860. transaction.commit();
  861. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  862. }
  863. // '+'?† n ['+' | '-'] <signless-integer>
  864. {
  865. auto child_transaction = transaction.create_child();
  866. auto const& second_value = values.consume_a_token();
  867. values.discard_whitespace();
  868. auto const& third_value = values.consume_a_token();
  869. if (is_sign(second_value) && is_signless_integer(third_value)) {
  870. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  871. child_transaction.commit();
  872. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  873. }
  874. }
  875. // '+'?† n
  876. transaction.commit();
  877. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  878. }
  879. // '+'?† n- <signless-integer>
  880. if (first_after_plus.is_ident("n-"sv)) {
  881. values.discard_whitespace();
  882. auto const& second_value = values.consume_a_token();
  883. if (is_signless_integer(second_value)) {
  884. int b = -second_value.token().to_integer();
  885. transaction.commit();
  886. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  887. }
  888. return syntax_error();
  889. }
  890. // '+'?† <ndashdigit-ident>
  891. if (is_ndashdigit_ident(first_after_plus)) {
  892. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_number<int>();
  893. if (maybe_b.has_value()) {
  894. transaction.commit();
  895. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  896. }
  897. return syntax_error();
  898. }
  899. return syntax_error();
  900. }
  901. }