SelectorParsing.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. namespace Web::CSS::Parser {
  13. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  14. {
  15. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  16. if (!selector_list.is_error())
  17. return selector_list.release_value();
  18. return {};
  19. }
  20. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  21. {
  22. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  23. if (!selector_list.is_error())
  24. return selector_list.release_value();
  25. return {};
  26. }
  27. template<typename T>
  28. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  29. {
  30. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  31. Vector<NonnullRefPtr<Selector>> selectors;
  32. for (auto& selector_parts : comma_separated_lists) {
  33. auto stream = TokenStream(selector_parts);
  34. auto selector = parse_complex_selector(stream, mode);
  35. if (selector.is_error()) {
  36. if (parsing_mode == SelectorParsingMode::Forgiving)
  37. continue;
  38. return selector.error();
  39. }
  40. selectors.append(selector.release_value());
  41. }
  42. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  43. return ParseError::SyntaxError;
  44. return selectors;
  45. }
  46. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  47. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  48. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  49. {
  50. Vector<Selector::CompoundSelector> compound_selectors;
  51. auto first_selector = TRY(parse_compound_selector(tokens));
  52. if (!first_selector.has_value())
  53. return ParseError::SyntaxError;
  54. if (mode == SelectorType::Standalone) {
  55. if (first_selector->combinator != Selector::Combinator::Descendant)
  56. return ParseError::SyntaxError;
  57. first_selector->combinator = Selector::Combinator::None;
  58. }
  59. compound_selectors.append(first_selector.release_value());
  60. while (tokens.has_next_token()) {
  61. auto compound_selector = TRY(parse_compound_selector(tokens));
  62. if (!compound_selector.has_value())
  63. break;
  64. compound_selectors.append(compound_selector.release_value());
  65. }
  66. if (compound_selectors.is_empty())
  67. return ParseError::SyntaxError;
  68. return Selector::create(move(compound_selectors));
  69. }
  70. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  71. {
  72. tokens.skip_whitespace();
  73. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  74. tokens.skip_whitespace();
  75. Vector<Selector::SimpleSelector> simple_selectors;
  76. while (tokens.has_next_token()) {
  77. auto component = TRY(parse_simple_selector(tokens));
  78. if (!component.has_value())
  79. break;
  80. simple_selectors.append(component.release_value());
  81. }
  82. if (simple_selectors.is_empty())
  83. return Optional<Selector::CompoundSelector> {};
  84. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  85. }
  86. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  87. {
  88. auto const& current_value = tokens.next_token();
  89. if (current_value.is(Token::Type::Delim)) {
  90. switch (current_value.token().delim()) {
  91. case '>':
  92. return Selector::Combinator::ImmediateChild;
  93. case '+':
  94. return Selector::Combinator::NextSibling;
  95. case '~':
  96. return Selector::Combinator::SubsequentSibling;
  97. case '|': {
  98. auto const& next = tokens.peek_token();
  99. if (next.is(Token::Type::EndOfFile))
  100. return {};
  101. if (next.is_delim('|')) {
  102. tokens.next_token();
  103. return Selector::Combinator::Column;
  104. }
  105. }
  106. }
  107. }
  108. tokens.reconsume_current_input_token();
  109. return {};
  110. }
  111. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  112. {
  113. auto is_name = [](ComponentValue const& token) {
  114. return token.is_delim('*') || token.is(Token::Type::Ident);
  115. };
  116. auto get_name = [](ComponentValue const& token) {
  117. if (token.is_delim('*'))
  118. return "*"_fly_string;
  119. return token.token().ident();
  120. };
  121. // There are 3 possibilities here:
  122. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  123. // 1) `|<name>`
  124. // 2) `<namespace>|<name>`
  125. // 3) `<name>`
  126. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  127. auto transaction = tokens.begin_transaction();
  128. auto first_token = tokens.next_token();
  129. if (first_token.is_delim('|')) {
  130. // Case 1: `|<name>`
  131. if (is_name(tokens.peek_token())) {
  132. auto name_token = tokens.next_token();
  133. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  134. return {};
  135. transaction.commit();
  136. return Selector::SimpleSelector::QualifiedName {
  137. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  138. .name = get_name(name_token),
  139. };
  140. }
  141. return {};
  142. }
  143. if (!is_name(first_token))
  144. return {};
  145. if (tokens.peek_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  146. // Case 2: `<namespace>|<name>`
  147. (void)tokens.next_token(); // `|`
  148. auto namespace_ = get_name(first_token);
  149. auto name = get_name(tokens.next_token());
  150. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  151. return {};
  152. auto namespace_type = namespace_ == "*"sv
  153. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  154. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  155. transaction.commit();
  156. return Selector::SimpleSelector::QualifiedName {
  157. .namespace_type = namespace_type,
  158. .namespace_ = namespace_,
  159. .name = name,
  160. };
  161. }
  162. // Case 3: `<name>`
  163. auto& name_token = first_token;
  164. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  165. return {};
  166. transaction.commit();
  167. return Selector::SimpleSelector::QualifiedName {
  168. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  169. .name = get_name(name_token),
  170. };
  171. }
  172. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  173. {
  174. auto attribute_tokens = TokenStream { first_value.block().values() };
  175. attribute_tokens.skip_whitespace();
  176. if (!attribute_tokens.has_next_token()) {
  177. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  178. return ParseError::SyntaxError;
  179. }
  180. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  181. if (!maybe_qualified_name.has_value()) {
  182. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.peek_token().to_debug_string());
  183. return ParseError::SyntaxError;
  184. }
  185. Selector::SimpleSelector simple_selector {
  186. .type = Selector::SimpleSelector::Type::Attribute,
  187. .value = Selector::SimpleSelector::Attribute {
  188. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  189. // FIXME: Case-sensitivity is defined by the document language.
  190. // HTML is insensitive with attribute names, and our code generally assumes
  191. // they are converted to lowercase, so we do that here too. If we want to be
  192. // correct with XML later, we'll need to keep the original case and then do
  193. // a case-insensitive compare later.
  194. .qualified_name = maybe_qualified_name.release_value(),
  195. .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  196. }
  197. };
  198. attribute_tokens.skip_whitespace();
  199. if (!attribute_tokens.has_next_token())
  200. return simple_selector;
  201. auto const& delim_part = attribute_tokens.next_token();
  202. if (!delim_part.is(Token::Type::Delim)) {
  203. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  204. return ParseError::SyntaxError;
  205. }
  206. if (delim_part.token().delim() == '=') {
  207. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  208. } else {
  209. if (!attribute_tokens.has_next_token()) {
  210. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  211. return ParseError::SyntaxError;
  212. }
  213. auto const& delim_second_part = attribute_tokens.next_token();
  214. if (!delim_second_part.is_delim('=')) {
  215. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  216. return ParseError::SyntaxError;
  217. }
  218. switch (delim_part.token().delim()) {
  219. case '~':
  220. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  221. break;
  222. case '*':
  223. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  224. break;
  225. case '|':
  226. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  227. break;
  228. case '^':
  229. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  230. break;
  231. case '$':
  232. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  233. break;
  234. default:
  235. attribute_tokens.reconsume_current_input_token();
  236. }
  237. }
  238. attribute_tokens.skip_whitespace();
  239. if (!attribute_tokens.has_next_token()) {
  240. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  241. return ParseError::SyntaxError;
  242. }
  243. auto const& value_part = attribute_tokens.next_token();
  244. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  245. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  246. return ParseError::SyntaxError;
  247. }
  248. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  249. simple_selector.attribute().value = value_string.to_string();
  250. attribute_tokens.skip_whitespace();
  251. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  252. if (attribute_tokens.has_next_token()) {
  253. auto const& case_sensitivity_part = attribute_tokens.next_token();
  254. if (case_sensitivity_part.is(Token::Type::Ident)) {
  255. auto case_sensitivity = case_sensitivity_part.token().ident();
  256. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  257. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  258. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  259. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  260. } else {
  261. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  262. return ParseError::SyntaxError;
  263. }
  264. } else {
  265. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  266. return ParseError::SyntaxError;
  267. }
  268. }
  269. if (attribute_tokens.has_next_token()) {
  270. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  271. return ParseError::SyntaxError;
  272. }
  273. return simple_selector;
  274. }
  275. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  276. {
  277. auto peek_token_ends_selector = [&]() -> bool {
  278. auto const& value = tokens.peek_token();
  279. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  280. };
  281. if (peek_token_ends_selector())
  282. return ParseError::SyntaxError;
  283. bool is_pseudo = false;
  284. if (tokens.peek_token().is(Token::Type::Colon)) {
  285. is_pseudo = true;
  286. tokens.next_token();
  287. if (peek_token_ends_selector())
  288. return ParseError::SyntaxError;
  289. }
  290. if (is_pseudo) {
  291. auto const& name_token = tokens.next_token();
  292. if (!name_token.is(Token::Type::Ident)) {
  293. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  294. return ParseError::SyntaxError;
  295. }
  296. auto pseudo_name = name_token.token().ident();
  297. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  298. if (auto pseudo_element = pseudo_element_from_string(pseudo_name); pseudo_element.has_value()) {
  299. return Selector::SimpleSelector {
  300. .type = Selector::SimpleSelector::Type::PseudoElement,
  301. .value = pseudo_element.release_value()
  302. };
  303. }
  304. if (has_ignored_vendor_prefix(pseudo_name))
  305. return ParseError::IncludesIgnoredVendorPrefix;
  306. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  307. return ParseError::SyntaxError;
  308. }
  309. if (peek_token_ends_selector())
  310. return ParseError::SyntaxError;
  311. auto const& pseudo_class_token = tokens.next_token();
  312. if (pseudo_class_token.is(Token::Type::Ident)) {
  313. auto pseudo_name = pseudo_class_token.token().ident();
  314. if (has_ignored_vendor_prefix(pseudo_name))
  315. return ParseError::IncludesIgnoredVendorPrefix;
  316. auto make_pseudo_class_selector = [](auto pseudo_class) {
  317. return Selector::SimpleSelector {
  318. .type = Selector::SimpleSelector::Type::PseudoClass,
  319. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  320. };
  321. };
  322. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  323. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  324. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  325. return ParseError::SyntaxError;
  326. }
  327. return make_pseudo_class_selector(pseudo_class.value());
  328. }
  329. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  330. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  331. if (auto pseudo_element = pseudo_element_from_string(pseudo_name); pseudo_element.has_value()) {
  332. switch (pseudo_element.value()) {
  333. case Selector::PseudoElement::After:
  334. case Selector::PseudoElement::Before:
  335. case Selector::PseudoElement::FirstLetter:
  336. case Selector::PseudoElement::FirstLine:
  337. return Selector::SimpleSelector {
  338. .type = Selector::SimpleSelector::Type::PseudoElement,
  339. .value = pseudo_element.value()
  340. };
  341. default:
  342. break;
  343. }
  344. }
  345. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  346. return ParseError::SyntaxError;
  347. }
  348. if (pseudo_class_token.is_function()) {
  349. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  350. auto tokens = TokenStream<ComponentValue>(function_values);
  351. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  352. if (!nth_child_pattern.has_value()) {
  353. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  354. return ParseError::SyntaxError;
  355. }
  356. tokens.skip_whitespace();
  357. if (!tokens.has_next_token()) {
  358. return Selector::SimpleSelector {
  359. .type = Selector::SimpleSelector::Type::PseudoClass,
  360. .value = Selector::SimpleSelector::PseudoClassSelector {
  361. .type = pseudo_class,
  362. .nth_child_pattern = nth_child_pattern.release_value() }
  363. };
  364. }
  365. if (!allow_of)
  366. return ParseError::SyntaxError;
  367. // Parse the `of <selector-list>` syntax
  368. auto const& maybe_of = tokens.next_token();
  369. if (!maybe_of.is_ident("of"sv))
  370. return ParseError::SyntaxError;
  371. tokens.skip_whitespace();
  372. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  373. tokens.skip_whitespace();
  374. if (tokens.has_next_token())
  375. return ParseError::SyntaxError;
  376. return Selector::SimpleSelector {
  377. .type = Selector::SimpleSelector::Type::PseudoClass,
  378. .value = Selector::SimpleSelector::PseudoClassSelector {
  379. .type = pseudo_class,
  380. .nth_child_pattern = nth_child_pattern.release_value(),
  381. .argument_selector_list = move(selector_list) }
  382. };
  383. };
  384. auto const& pseudo_function = pseudo_class_token.function();
  385. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name());
  386. if (!maybe_pseudo_class.has_value()) {
  387. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name());
  388. return ParseError::SyntaxError;
  389. }
  390. auto pseudo_class = maybe_pseudo_class.value();
  391. auto metadata = pseudo_class_metadata(pseudo_class);
  392. if (!metadata.is_valid_as_function) {
  393. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name());
  394. return ParseError::SyntaxError;
  395. }
  396. if (pseudo_function.values().is_empty()) {
  397. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name());
  398. return ParseError::SyntaxError;
  399. }
  400. switch (metadata.parameter_type) {
  401. case PseudoClassMetadata::ParameterType::ANPlusB:
  402. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false);
  403. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  404. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true);
  405. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  406. auto function_token_stream = TokenStream(pseudo_function.values());
  407. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  408. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  409. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name());
  410. return ParseError::SyntaxError;
  411. }
  412. Vector compound_selectors { compound_selector_or_error.release_value().release_value() };
  413. auto selector = Selector::create(move(compound_selectors));
  414. return Selector::SimpleSelector {
  415. .type = Selector::SimpleSelector::Type::PseudoClass,
  416. .value = Selector::SimpleSelector::PseudoClassSelector {
  417. .type = pseudo_class,
  418. .argument_selector_list = { move(selector) } }
  419. };
  420. }
  421. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  422. auto function_token_stream = TokenStream(pseudo_function.values());
  423. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  424. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, SelectorType::Standalone, SelectorParsingMode::Forgiving));
  425. return Selector::SimpleSelector {
  426. .type = Selector::SimpleSelector::Type::PseudoClass,
  427. .value = Selector::SimpleSelector::PseudoClassSelector {
  428. .type = pseudo_class,
  429. .argument_selector_list = move(argument_selector_list) }
  430. };
  431. }
  432. case PseudoClassMetadata::ParameterType::Ident: {
  433. auto function_token_stream = TokenStream(pseudo_function.values());
  434. function_token_stream.skip_whitespace();
  435. auto maybe_ident_token = function_token_stream.next_token();
  436. function_token_stream.skip_whitespace();
  437. if (!maybe_ident_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  438. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: not an ident", pseudo_function.name());
  439. return ParseError::SyntaxError;
  440. }
  441. auto maybe_ident = value_id_from_string(maybe_ident_token.token().ident());
  442. if (!maybe_ident.has_value()) {
  443. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: unrecognized ident", pseudo_function.name());
  444. return ParseError::SyntaxError;
  445. }
  446. return Selector::SimpleSelector {
  447. .type = Selector::SimpleSelector::Type::PseudoClass,
  448. .value = Selector::SimpleSelector::PseudoClassSelector {
  449. .type = pseudo_class,
  450. .identifier = maybe_ident.value() }
  451. };
  452. }
  453. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  454. Vector<FlyString> languages;
  455. auto function_token_stream = TokenStream(pseudo_function.values());
  456. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  457. for (auto language_token_list : language_token_lists) {
  458. auto language_token_stream = TokenStream(language_token_list);
  459. language_token_stream.skip_whitespace();
  460. auto language_token = language_token_stream.next_token();
  461. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  462. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name());
  463. return ParseError::SyntaxError;
  464. }
  465. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  466. languages.append(language_string);
  467. language_token_stream.skip_whitespace();
  468. if (language_token_stream.has_next_token()) {
  469. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name());
  470. return ParseError::SyntaxError;
  471. }
  472. }
  473. return Selector::SimpleSelector {
  474. .type = Selector::SimpleSelector::Type::PseudoClass,
  475. .value = Selector::SimpleSelector::PseudoClassSelector {
  476. .type = pseudo_class,
  477. .languages = move(languages) }
  478. };
  479. }
  480. case PseudoClassMetadata::ParameterType::SelectorList: {
  481. auto function_token_stream = TokenStream(pseudo_function.values());
  482. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  483. return Selector::SimpleSelector {
  484. .type = Selector::SimpleSelector::Type::PseudoClass,
  485. .value = Selector::SimpleSelector::PseudoClassSelector {
  486. .type = pseudo_class,
  487. .argument_selector_list = move(not_selector) }
  488. };
  489. }
  490. case PseudoClassMetadata::ParameterType::None:
  491. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  492. VERIFY_NOT_REACHED();
  493. }
  494. }
  495. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  496. return ParseError::SyntaxError;
  497. }
  498. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  499. {
  500. auto peek_token_ends_selector = [&]() -> bool {
  501. auto const& value = tokens.peek_token();
  502. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  503. };
  504. if (peek_token_ends_selector())
  505. return Optional<Selector::SimpleSelector> {};
  506. // Handle universal and tag-name types together, since both can be namespaced
  507. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  508. if (qualified_name->name.name == "*"sv) {
  509. return Selector::SimpleSelector {
  510. .type = Selector::SimpleSelector::Type::Universal,
  511. .value = qualified_name.release_value(),
  512. };
  513. }
  514. return Selector::SimpleSelector {
  515. .type = Selector::SimpleSelector::Type::TagName,
  516. .value = qualified_name.release_value(),
  517. };
  518. }
  519. auto const& first_value = tokens.next_token();
  520. if (first_value.is(Token::Type::Delim)) {
  521. u32 delim = first_value.token().delim();
  522. switch (delim) {
  523. case '*':
  524. // Handled already
  525. VERIFY_NOT_REACHED();
  526. case '.': {
  527. if (peek_token_ends_selector())
  528. return ParseError::SyntaxError;
  529. auto const& class_name_value = tokens.next_token();
  530. if (!class_name_value.is(Token::Type::Ident)) {
  531. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  532. return ParseError::SyntaxError;
  533. }
  534. return Selector::SimpleSelector {
  535. .type = Selector::SimpleSelector::Type::Class,
  536. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  537. };
  538. }
  539. case '>':
  540. case '+':
  541. case '~':
  542. case '|':
  543. // Whitespace is not required between the compound-selector and a combinator.
  544. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  545. tokens.reconsume_current_input_token();
  546. return Optional<Selector::SimpleSelector> {};
  547. default:
  548. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  549. return ParseError::SyntaxError;
  550. }
  551. }
  552. if (first_value.is(Token::Type::Hash)) {
  553. if (first_value.token().hash_type() != Token::HashType::Id) {
  554. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  555. return ParseError::SyntaxError;
  556. }
  557. return Selector::SimpleSelector {
  558. .type = Selector::SimpleSelector::Type::Id,
  559. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  560. };
  561. }
  562. if (first_value.is_block() && first_value.block().is_square())
  563. return TRY(parse_attribute_simple_selector(first_value));
  564. if (first_value.is(Token::Type::Colon))
  565. return TRY(parse_pseudo_simple_selector(tokens));
  566. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  567. return ParseError::SyntaxError;
  568. }
  569. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  570. {
  571. auto transaction = values.begin_transaction();
  572. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  573. if constexpr (CSS_PARSER_DEBUG) {
  574. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  575. values.dump_all_tokens();
  576. }
  577. return {};
  578. };
  579. auto is_sign = [](ComponentValue const& value) -> bool {
  580. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  581. };
  582. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  583. if (!value.is(Token::Type::Dimension))
  584. return false;
  585. if (!value.token().number().is_integer())
  586. return false;
  587. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  588. return false;
  589. return true;
  590. };
  591. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  592. if (!value.is(Token::Type::Dimension))
  593. return false;
  594. if (!value.token().number().is_integer())
  595. return false;
  596. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  597. return false;
  598. return true;
  599. };
  600. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  601. if (!value.is(Token::Type::Dimension))
  602. return false;
  603. if (!value.token().number().is_integer())
  604. return false;
  605. auto dimension_unit = value.token().dimension_unit();
  606. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  607. return false;
  608. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  609. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  610. return false;
  611. }
  612. return true;
  613. };
  614. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  615. if (!value.is(Token::Type::Ident))
  616. return false;
  617. auto ident = value.token().ident();
  618. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  619. return false;
  620. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  621. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  622. return false;
  623. }
  624. return true;
  625. };
  626. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  627. if (!value.is(Token::Type::Ident))
  628. return false;
  629. auto ident = value.token().ident();
  630. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  631. return false;
  632. if (ident.bytes_as_string_view().length() == 3)
  633. return false;
  634. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  635. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  636. return false;
  637. }
  638. return true;
  639. };
  640. auto is_integer = [](ComponentValue const& value) -> bool {
  641. return value.is(Token::Type::Number) && value.token().number().is_integer();
  642. };
  643. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  644. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  645. };
  646. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  647. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  648. };
  649. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  650. // Unfortunately these can't be in the same order as in the spec.
  651. values.skip_whitespace();
  652. auto const& first_value = values.next_token();
  653. // odd | even
  654. if (first_value.is(Token::Type::Ident)) {
  655. auto ident = first_value.token().ident();
  656. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  657. transaction.commit();
  658. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  659. }
  660. if (ident.equals_ignoring_ascii_case("even"sv)) {
  661. transaction.commit();
  662. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  663. }
  664. }
  665. // <integer>
  666. if (is_integer(first_value)) {
  667. int b = first_value.token().to_integer();
  668. transaction.commit();
  669. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  670. }
  671. // <n-dimension>
  672. // <n-dimension> <signed-integer>
  673. // <n-dimension> ['+' | '-'] <signless-integer>
  674. if (is_n_dimension(first_value)) {
  675. int a = first_value.token().dimension_value_int();
  676. values.skip_whitespace();
  677. // <n-dimension> <signed-integer>
  678. if (is_signed_integer(values.peek_token())) {
  679. int b = values.next_token().token().to_integer();
  680. transaction.commit();
  681. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  682. }
  683. // <n-dimension> ['+' | '-'] <signless-integer>
  684. {
  685. auto child_transaction = transaction.create_child();
  686. auto const& second_value = values.next_token();
  687. values.skip_whitespace();
  688. auto const& third_value = values.next_token();
  689. if (is_sign(second_value) && is_signless_integer(third_value)) {
  690. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  691. child_transaction.commit();
  692. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  693. }
  694. }
  695. // <n-dimension>
  696. transaction.commit();
  697. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  698. }
  699. // <ndash-dimension> <signless-integer>
  700. if (is_ndash_dimension(first_value)) {
  701. values.skip_whitespace();
  702. auto const& second_value = values.next_token();
  703. if (is_signless_integer(second_value)) {
  704. int a = first_value.token().dimension_value_int();
  705. int b = -second_value.token().to_integer();
  706. transaction.commit();
  707. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  708. }
  709. return syntax_error();
  710. }
  711. // <ndashdigit-dimension>
  712. if (is_ndashdigit_dimension(first_value)) {
  713. auto const& dimension = first_value.token();
  714. int a = dimension.dimension_value_int();
  715. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_int();
  716. if (maybe_b.has_value()) {
  717. transaction.commit();
  718. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  719. }
  720. return syntax_error();
  721. }
  722. // <dashndashdigit-ident>
  723. if (is_dashndashdigit_ident(first_value)) {
  724. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_int();
  725. if (maybe_b.has_value()) {
  726. transaction.commit();
  727. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  728. }
  729. return syntax_error();
  730. }
  731. // -n
  732. // -n <signed-integer>
  733. // -n ['+' | '-'] <signless-integer>
  734. if (first_value.is_ident("-n"sv)) {
  735. values.skip_whitespace();
  736. // -n <signed-integer>
  737. if (is_signed_integer(values.peek_token())) {
  738. int b = values.next_token().token().to_integer();
  739. transaction.commit();
  740. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  741. }
  742. // -n ['+' | '-'] <signless-integer>
  743. {
  744. auto child_transaction = transaction.create_child();
  745. auto const& second_value = values.next_token();
  746. values.skip_whitespace();
  747. auto const& third_value = values.next_token();
  748. if (is_sign(second_value) && is_signless_integer(third_value)) {
  749. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  750. child_transaction.commit();
  751. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  752. }
  753. }
  754. // -n
  755. transaction.commit();
  756. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  757. }
  758. // -n- <signless-integer>
  759. if (first_value.is_ident("-n-"sv)) {
  760. values.skip_whitespace();
  761. auto const& second_value = values.next_token();
  762. if (is_signless_integer(second_value)) {
  763. int b = -second_value.token().to_integer();
  764. transaction.commit();
  765. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  766. }
  767. return syntax_error();
  768. }
  769. // All that's left now are these:
  770. // '+'?† n
  771. // '+'?† n <signed-integer>
  772. // '+'?† n ['+' | '-'] <signless-integer>
  773. // '+'?† n- <signless-integer>
  774. // '+'?† <ndashdigit-ident>
  775. // In all of these cases, the + is optional, and has no effect.
  776. // So, we just skip the +, and carry on.
  777. if (!first_value.is_delim('+')) {
  778. values.reconsume_current_input_token();
  779. // We do *not* skip whitespace here.
  780. }
  781. auto const& first_after_plus = values.next_token();
  782. // '+'?† n
  783. // '+'?† n <signed-integer>
  784. // '+'?† n ['+' | '-'] <signless-integer>
  785. if (first_after_plus.is_ident("n"sv)) {
  786. values.skip_whitespace();
  787. // '+'?† n <signed-integer>
  788. if (is_signed_integer(values.peek_token())) {
  789. int b = values.next_token().token().to_integer();
  790. transaction.commit();
  791. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  792. }
  793. // '+'?† n ['+' | '-'] <signless-integer>
  794. {
  795. auto child_transaction = transaction.create_child();
  796. auto const& second_value = values.next_token();
  797. values.skip_whitespace();
  798. auto const& third_value = values.next_token();
  799. if (is_sign(second_value) && is_signless_integer(third_value)) {
  800. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  801. child_transaction.commit();
  802. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  803. }
  804. }
  805. // '+'?† n
  806. transaction.commit();
  807. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  808. }
  809. // '+'?† n- <signless-integer>
  810. if (first_after_plus.is_ident("n-"sv)) {
  811. values.skip_whitespace();
  812. auto const& second_value = values.next_token();
  813. if (is_signless_integer(second_value)) {
  814. int b = -second_value.token().to_integer();
  815. transaction.commit();
  816. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  817. }
  818. return syntax_error();
  819. }
  820. // '+'?† <ndashdigit-ident>
  821. if (is_ndashdigit_ident(first_after_plus)) {
  822. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_int();
  823. if (maybe_b.has_value()) {
  824. transaction.commit();
  825. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  826. }
  827. return syntax_error();
  828. }
  829. return syntax_error();
  830. }
  831. }