SelectorParsing.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::CSS::Parser {
  14. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  15. {
  16. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  17. if (!selector_list.is_error())
  18. return selector_list.release_value();
  19. return {};
  20. }
  21. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  22. {
  23. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  24. if (!selector_list.is_error())
  25. return selector_list.release_value();
  26. return {};
  27. }
  28. template<typename T>
  29. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  30. {
  31. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  32. Vector<NonnullRefPtr<Selector>> selectors;
  33. for (auto& selector_parts : comma_separated_lists) {
  34. auto stream = TokenStream(selector_parts);
  35. auto selector = parse_complex_selector(stream, mode);
  36. if (selector.is_error()) {
  37. if (parsing_mode == SelectorParsingMode::Forgiving)
  38. continue;
  39. return selector.error();
  40. }
  41. selectors.append(selector.release_value());
  42. }
  43. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  44. return ParseError::SyntaxError;
  45. return selectors;
  46. }
  47. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  48. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  49. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  50. {
  51. Vector<Selector::CompoundSelector> compound_selectors;
  52. auto first_selector = TRY(parse_compound_selector(tokens));
  53. if (!first_selector.has_value())
  54. return ParseError::SyntaxError;
  55. if (mode == SelectorType::Standalone) {
  56. if (first_selector->combinator != Selector::Combinator::Descendant)
  57. return ParseError::SyntaxError;
  58. first_selector->combinator = Selector::Combinator::None;
  59. }
  60. compound_selectors.append(first_selector.release_value());
  61. while (tokens.has_next_token()) {
  62. auto compound_selector = TRY(parse_compound_selector(tokens));
  63. if (!compound_selector.has_value())
  64. break;
  65. compound_selectors.append(compound_selector.release_value());
  66. }
  67. if (compound_selectors.is_empty())
  68. return ParseError::SyntaxError;
  69. return Selector::create(move(compound_selectors));
  70. }
  71. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  72. {
  73. tokens.skip_whitespace();
  74. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  75. tokens.skip_whitespace();
  76. Vector<Selector::SimpleSelector> simple_selectors;
  77. while (tokens.has_next_token()) {
  78. auto component = TRY(parse_simple_selector(tokens));
  79. if (!component.has_value())
  80. break;
  81. simple_selectors.append(component.release_value());
  82. }
  83. if (simple_selectors.is_empty())
  84. return Optional<Selector::CompoundSelector> {};
  85. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  86. }
  87. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  88. {
  89. auto const& current_value = tokens.next_token();
  90. if (current_value.is(Token::Type::Delim)) {
  91. switch (current_value.token().delim()) {
  92. case '>':
  93. return Selector::Combinator::ImmediateChild;
  94. case '+':
  95. return Selector::Combinator::NextSibling;
  96. case '~':
  97. return Selector::Combinator::SubsequentSibling;
  98. case '|': {
  99. auto const& next = tokens.peek_token();
  100. if (next.is(Token::Type::EndOfFile))
  101. return {};
  102. if (next.is_delim('|')) {
  103. tokens.next_token();
  104. return Selector::Combinator::Column;
  105. }
  106. }
  107. }
  108. }
  109. tokens.reconsume_current_input_token();
  110. return {};
  111. }
  112. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  113. {
  114. auto is_name = [](ComponentValue const& token) {
  115. return token.is_delim('*') || token.is(Token::Type::Ident);
  116. };
  117. auto get_name = [](ComponentValue const& token) {
  118. if (token.is_delim('*'))
  119. return "*"_fly_string;
  120. return token.token().ident();
  121. };
  122. // There are 3 possibilities here:
  123. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  124. // 1) `|<name>`
  125. // 2) `<namespace>|<name>`
  126. // 3) `<name>`
  127. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  128. auto transaction = tokens.begin_transaction();
  129. auto first_token = tokens.next_token();
  130. if (first_token.is_delim('|')) {
  131. // Case 1: `|<name>`
  132. if (is_name(tokens.peek_token())) {
  133. auto name_token = tokens.next_token();
  134. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  135. return {};
  136. transaction.commit();
  137. return Selector::SimpleSelector::QualifiedName {
  138. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  139. .name = get_name(name_token),
  140. };
  141. }
  142. return {};
  143. }
  144. if (!is_name(first_token))
  145. return {};
  146. if (tokens.peek_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  147. // Case 2: `<namespace>|<name>`
  148. (void)tokens.next_token(); // `|`
  149. auto namespace_ = get_name(first_token);
  150. auto name = get_name(tokens.next_token());
  151. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  152. return {};
  153. auto namespace_type = namespace_ == "*"sv
  154. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  155. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  156. transaction.commit();
  157. return Selector::SimpleSelector::QualifiedName {
  158. .namespace_type = namespace_type,
  159. .namespace_ = namespace_,
  160. .name = name,
  161. };
  162. }
  163. // Case 3: `<name>`
  164. auto& name_token = first_token;
  165. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  166. return {};
  167. transaction.commit();
  168. return Selector::SimpleSelector::QualifiedName {
  169. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  170. .name = get_name(name_token),
  171. };
  172. }
  173. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  174. {
  175. auto attribute_tokens = TokenStream { first_value.block().values() };
  176. attribute_tokens.skip_whitespace();
  177. if (!attribute_tokens.has_next_token()) {
  178. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  179. return ParseError::SyntaxError;
  180. }
  181. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  182. if (!maybe_qualified_name.has_value()) {
  183. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.peek_token().to_debug_string());
  184. return ParseError::SyntaxError;
  185. }
  186. Selector::SimpleSelector simple_selector {
  187. .type = Selector::SimpleSelector::Type::Attribute,
  188. .value = Selector::SimpleSelector::Attribute {
  189. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  190. // FIXME: Case-sensitivity is defined by the document language.
  191. // HTML is insensitive with attribute names, and our code generally assumes
  192. // they are converted to lowercase, so we do that here too. If we want to be
  193. // correct with XML later, we'll need to keep the original case and then do
  194. // a case-insensitive compare later.
  195. .qualified_name = maybe_qualified_name.release_value(),
  196. .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  197. }
  198. };
  199. attribute_tokens.skip_whitespace();
  200. if (!attribute_tokens.has_next_token())
  201. return simple_selector;
  202. auto const& delim_part = attribute_tokens.next_token();
  203. if (!delim_part.is(Token::Type::Delim)) {
  204. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  205. return ParseError::SyntaxError;
  206. }
  207. if (delim_part.token().delim() == '=') {
  208. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  209. } else {
  210. if (!attribute_tokens.has_next_token()) {
  211. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  212. return ParseError::SyntaxError;
  213. }
  214. auto const& delim_second_part = attribute_tokens.next_token();
  215. if (!delim_second_part.is_delim('=')) {
  216. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  217. return ParseError::SyntaxError;
  218. }
  219. switch (delim_part.token().delim()) {
  220. case '~':
  221. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  222. break;
  223. case '*':
  224. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  225. break;
  226. case '|':
  227. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  228. break;
  229. case '^':
  230. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  231. break;
  232. case '$':
  233. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  234. break;
  235. default:
  236. attribute_tokens.reconsume_current_input_token();
  237. }
  238. }
  239. attribute_tokens.skip_whitespace();
  240. if (!attribute_tokens.has_next_token()) {
  241. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  242. return ParseError::SyntaxError;
  243. }
  244. auto const& value_part = attribute_tokens.next_token();
  245. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  246. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  247. return ParseError::SyntaxError;
  248. }
  249. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  250. simple_selector.attribute().value = value_string.to_string();
  251. attribute_tokens.skip_whitespace();
  252. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  253. if (attribute_tokens.has_next_token()) {
  254. auto const& case_sensitivity_part = attribute_tokens.next_token();
  255. if (case_sensitivity_part.is(Token::Type::Ident)) {
  256. auto case_sensitivity = case_sensitivity_part.token().ident();
  257. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  258. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  259. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  260. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  261. } else {
  262. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  263. return ParseError::SyntaxError;
  264. }
  265. } else {
  266. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  267. return ParseError::SyntaxError;
  268. }
  269. }
  270. if (attribute_tokens.has_next_token()) {
  271. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  272. return ParseError::SyntaxError;
  273. }
  274. return simple_selector;
  275. }
  276. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  277. {
  278. auto peek_token_ends_selector = [&]() -> bool {
  279. auto const& value = tokens.peek_token();
  280. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  281. };
  282. if (peek_token_ends_selector())
  283. return ParseError::SyntaxError;
  284. bool is_pseudo = false;
  285. if (tokens.peek_token().is(Token::Type::Colon)) {
  286. is_pseudo = true;
  287. tokens.next_token();
  288. if (peek_token_ends_selector())
  289. return ParseError::SyntaxError;
  290. }
  291. if (is_pseudo) {
  292. auto const& name_token = tokens.next_token();
  293. if (!name_token.is(Token::Type::Ident)) {
  294. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  295. return ParseError::SyntaxError;
  296. }
  297. auto pseudo_name = name_token.token().ident();
  298. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  299. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  300. return Selector::SimpleSelector {
  301. .type = Selector::SimpleSelector::Type::PseudoElement,
  302. .value = pseudo_element.release_value()
  303. };
  304. }
  305. // https://www.w3.org/TR/selectors-4/#compat
  306. // All other pseudo-elements whose names begin with the string “-webkit-” (matched ASCII case-insensitively)
  307. // and that are not functional notations must be treated as valid at parse time. (That is, ::-webkit-asdf is
  308. // valid at parse time, but ::-webkit-jkl() is not.) If they’re not otherwise recognized and supported, they
  309. // must be treated as matching nothing, and are unknown -webkit- pseudo-elements.
  310. if (pseudo_name.starts_with_bytes("-webkit-"sv, CaseSensitivity::CaseInsensitive)) {
  311. return Selector::SimpleSelector {
  312. .type = Selector::SimpleSelector::Type::PseudoElement,
  313. // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
  314. .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) },
  315. };
  316. }
  317. if (has_ignored_vendor_prefix(pseudo_name))
  318. return ParseError::IncludesIgnoredVendorPrefix;
  319. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  320. return ParseError::SyntaxError;
  321. }
  322. if (peek_token_ends_selector())
  323. return ParseError::SyntaxError;
  324. auto const& pseudo_class_token = tokens.next_token();
  325. if (pseudo_class_token.is(Token::Type::Ident)) {
  326. auto pseudo_name = pseudo_class_token.token().ident();
  327. if (has_ignored_vendor_prefix(pseudo_name))
  328. return ParseError::IncludesIgnoredVendorPrefix;
  329. auto make_pseudo_class_selector = [](auto pseudo_class) {
  330. return Selector::SimpleSelector {
  331. .type = Selector::SimpleSelector::Type::PseudoClass,
  332. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  333. };
  334. };
  335. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  336. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  337. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  338. return ParseError::SyntaxError;
  339. }
  340. return make_pseudo_class_selector(pseudo_class.value());
  341. }
  342. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  343. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  344. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  345. switch (pseudo_element.value().type()) {
  346. case Selector::PseudoElement::Type::After:
  347. case Selector::PseudoElement::Type::Before:
  348. case Selector::PseudoElement::Type::FirstLetter:
  349. case Selector::PseudoElement::Type::FirstLine:
  350. return Selector::SimpleSelector {
  351. .type = Selector::SimpleSelector::Type::PseudoElement,
  352. .value = pseudo_element.value()
  353. };
  354. default:
  355. break;
  356. }
  357. }
  358. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  359. return ParseError::SyntaxError;
  360. }
  361. if (pseudo_class_token.is_function()) {
  362. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  363. auto tokens = TokenStream<ComponentValue>(function_values);
  364. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  365. if (!nth_child_pattern.has_value()) {
  366. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  367. return ParseError::SyntaxError;
  368. }
  369. tokens.skip_whitespace();
  370. if (!tokens.has_next_token()) {
  371. return Selector::SimpleSelector {
  372. .type = Selector::SimpleSelector::Type::PseudoClass,
  373. .value = Selector::SimpleSelector::PseudoClassSelector {
  374. .type = pseudo_class,
  375. .nth_child_pattern = nth_child_pattern.release_value() }
  376. };
  377. }
  378. if (!allow_of)
  379. return ParseError::SyntaxError;
  380. // Parse the `of <selector-list>` syntax
  381. auto const& maybe_of = tokens.next_token();
  382. if (!maybe_of.is_ident("of"sv))
  383. return ParseError::SyntaxError;
  384. tokens.skip_whitespace();
  385. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  386. tokens.skip_whitespace();
  387. if (tokens.has_next_token())
  388. return ParseError::SyntaxError;
  389. return Selector::SimpleSelector {
  390. .type = Selector::SimpleSelector::Type::PseudoClass,
  391. .value = Selector::SimpleSelector::PseudoClassSelector {
  392. .type = pseudo_class,
  393. .nth_child_pattern = nth_child_pattern.release_value(),
  394. .argument_selector_list = move(selector_list) }
  395. };
  396. };
  397. auto const& pseudo_function = pseudo_class_token.function();
  398. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name());
  399. if (!maybe_pseudo_class.has_value()) {
  400. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name());
  401. return ParseError::SyntaxError;
  402. }
  403. auto pseudo_class = maybe_pseudo_class.value();
  404. auto metadata = pseudo_class_metadata(pseudo_class);
  405. if (!metadata.is_valid_as_function) {
  406. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name());
  407. return ParseError::SyntaxError;
  408. }
  409. if (pseudo_function.values().is_empty()) {
  410. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name());
  411. return ParseError::SyntaxError;
  412. }
  413. switch (metadata.parameter_type) {
  414. case PseudoClassMetadata::ParameterType::ANPlusB:
  415. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false);
  416. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  417. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true);
  418. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  419. auto function_token_stream = TokenStream(pseudo_function.values());
  420. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  421. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  422. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name());
  423. return ParseError::SyntaxError;
  424. }
  425. Vector compound_selectors { compound_selector_or_error.release_value().release_value() };
  426. auto selector = Selector::create(move(compound_selectors));
  427. return Selector::SimpleSelector {
  428. .type = Selector::SimpleSelector::Type::PseudoClass,
  429. .value = Selector::SimpleSelector::PseudoClassSelector {
  430. .type = pseudo_class,
  431. .argument_selector_list = { move(selector) } }
  432. };
  433. }
  434. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  435. auto function_token_stream = TokenStream(pseudo_function.values());
  436. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  437. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, SelectorType::Standalone, SelectorParsingMode::Forgiving));
  438. return Selector::SimpleSelector {
  439. .type = Selector::SimpleSelector::Type::PseudoClass,
  440. .value = Selector::SimpleSelector::PseudoClassSelector {
  441. .type = pseudo_class,
  442. .argument_selector_list = move(argument_selector_list) }
  443. };
  444. }
  445. case PseudoClassMetadata::ParameterType::Ident: {
  446. auto function_token_stream = TokenStream(pseudo_function.values());
  447. function_token_stream.skip_whitespace();
  448. auto maybe_ident_token = function_token_stream.next_token();
  449. function_token_stream.skip_whitespace();
  450. if (!maybe_ident_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  451. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: not an ident", pseudo_function.name());
  452. return ParseError::SyntaxError;
  453. }
  454. auto maybe_ident = value_id_from_string(maybe_ident_token.token().ident());
  455. if (!maybe_ident.has_value()) {
  456. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as an ident: unrecognized ident", pseudo_function.name());
  457. return ParseError::SyntaxError;
  458. }
  459. return Selector::SimpleSelector {
  460. .type = Selector::SimpleSelector::Type::PseudoClass,
  461. .value = Selector::SimpleSelector::PseudoClassSelector {
  462. .type = pseudo_class,
  463. .identifier = maybe_ident.value() }
  464. };
  465. }
  466. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  467. Vector<FlyString> languages;
  468. auto function_token_stream = TokenStream(pseudo_function.values());
  469. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  470. for (auto language_token_list : language_token_lists) {
  471. auto language_token_stream = TokenStream(language_token_list);
  472. language_token_stream.skip_whitespace();
  473. auto language_token = language_token_stream.next_token();
  474. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  475. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name());
  476. return ParseError::SyntaxError;
  477. }
  478. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  479. languages.append(language_string);
  480. language_token_stream.skip_whitespace();
  481. if (language_token_stream.has_next_token()) {
  482. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name());
  483. return ParseError::SyntaxError;
  484. }
  485. }
  486. return Selector::SimpleSelector {
  487. .type = Selector::SimpleSelector::Type::PseudoClass,
  488. .value = Selector::SimpleSelector::PseudoClassSelector {
  489. .type = pseudo_class,
  490. .languages = move(languages) }
  491. };
  492. }
  493. case PseudoClassMetadata::ParameterType::SelectorList: {
  494. auto function_token_stream = TokenStream(pseudo_function.values());
  495. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  496. return Selector::SimpleSelector {
  497. .type = Selector::SimpleSelector::Type::PseudoClass,
  498. .value = Selector::SimpleSelector::PseudoClassSelector {
  499. .type = pseudo_class,
  500. .argument_selector_list = move(not_selector) }
  501. };
  502. }
  503. case PseudoClassMetadata::ParameterType::None:
  504. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  505. VERIFY_NOT_REACHED();
  506. }
  507. }
  508. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  509. return ParseError::SyntaxError;
  510. }
  511. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  512. {
  513. auto peek_token_ends_selector = [&]() -> bool {
  514. auto const& value = tokens.peek_token();
  515. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  516. };
  517. if (peek_token_ends_selector())
  518. return Optional<Selector::SimpleSelector> {};
  519. // Handle universal and tag-name types together, since both can be namespaced
  520. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  521. if (qualified_name->name.name == "*"sv) {
  522. return Selector::SimpleSelector {
  523. .type = Selector::SimpleSelector::Type::Universal,
  524. .value = qualified_name.release_value(),
  525. };
  526. }
  527. return Selector::SimpleSelector {
  528. .type = Selector::SimpleSelector::Type::TagName,
  529. .value = qualified_name.release_value(),
  530. };
  531. }
  532. auto const& first_value = tokens.next_token();
  533. if (first_value.is(Token::Type::Delim)) {
  534. u32 delim = first_value.token().delim();
  535. switch (delim) {
  536. case '*':
  537. // Handled already
  538. VERIFY_NOT_REACHED();
  539. case '.': {
  540. if (peek_token_ends_selector())
  541. return ParseError::SyntaxError;
  542. auto const& class_name_value = tokens.next_token();
  543. if (!class_name_value.is(Token::Type::Ident)) {
  544. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  545. return ParseError::SyntaxError;
  546. }
  547. return Selector::SimpleSelector {
  548. .type = Selector::SimpleSelector::Type::Class,
  549. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  550. };
  551. }
  552. case '>':
  553. case '+':
  554. case '~':
  555. case '|':
  556. // Whitespace is not required between the compound-selector and a combinator.
  557. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  558. tokens.reconsume_current_input_token();
  559. return Optional<Selector::SimpleSelector> {};
  560. default:
  561. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  562. return ParseError::SyntaxError;
  563. }
  564. }
  565. if (first_value.is(Token::Type::Hash)) {
  566. if (first_value.token().hash_type() != Token::HashType::Id) {
  567. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  568. return ParseError::SyntaxError;
  569. }
  570. return Selector::SimpleSelector {
  571. .type = Selector::SimpleSelector::Type::Id,
  572. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  573. };
  574. }
  575. if (first_value.is_block() && first_value.block().is_square())
  576. return TRY(parse_attribute_simple_selector(first_value));
  577. if (first_value.is(Token::Type::Colon))
  578. return TRY(parse_pseudo_simple_selector(tokens));
  579. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  580. return ParseError::SyntaxError;
  581. }
  582. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  583. {
  584. auto transaction = values.begin_transaction();
  585. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  586. if constexpr (CSS_PARSER_DEBUG) {
  587. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  588. values.dump_all_tokens();
  589. }
  590. return {};
  591. };
  592. auto is_sign = [](ComponentValue const& value) -> bool {
  593. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  594. };
  595. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  596. if (!value.is(Token::Type::Dimension))
  597. return false;
  598. if (!value.token().number().is_integer())
  599. return false;
  600. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  601. return false;
  602. return true;
  603. };
  604. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  605. if (!value.is(Token::Type::Dimension))
  606. return false;
  607. if (!value.token().number().is_integer())
  608. return false;
  609. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  610. return false;
  611. return true;
  612. };
  613. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  614. if (!value.is(Token::Type::Dimension))
  615. return false;
  616. if (!value.token().number().is_integer())
  617. return false;
  618. auto dimension_unit = value.token().dimension_unit();
  619. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  620. return false;
  621. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  622. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  623. return false;
  624. }
  625. return true;
  626. };
  627. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  628. if (!value.is(Token::Type::Ident))
  629. return false;
  630. auto ident = value.token().ident();
  631. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  632. return false;
  633. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  634. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  635. return false;
  636. }
  637. return true;
  638. };
  639. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  640. if (!value.is(Token::Type::Ident))
  641. return false;
  642. auto ident = value.token().ident();
  643. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  644. return false;
  645. if (ident.bytes_as_string_view().length() == 3)
  646. return false;
  647. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  648. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  649. return false;
  650. }
  651. return true;
  652. };
  653. auto is_integer = [](ComponentValue const& value) -> bool {
  654. return value.is(Token::Type::Number) && value.token().number().is_integer();
  655. };
  656. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  657. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  658. };
  659. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  660. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  661. };
  662. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  663. // Unfortunately these can't be in the same order as in the spec.
  664. values.skip_whitespace();
  665. auto const& first_value = values.next_token();
  666. // odd | even
  667. if (first_value.is(Token::Type::Ident)) {
  668. auto ident = first_value.token().ident();
  669. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  670. transaction.commit();
  671. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  672. }
  673. if (ident.equals_ignoring_ascii_case("even"sv)) {
  674. transaction.commit();
  675. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  676. }
  677. }
  678. // <integer>
  679. if (is_integer(first_value)) {
  680. int b = first_value.token().to_integer();
  681. transaction.commit();
  682. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  683. }
  684. // <n-dimension>
  685. // <n-dimension> <signed-integer>
  686. // <n-dimension> ['+' | '-'] <signless-integer>
  687. if (is_n_dimension(first_value)) {
  688. int a = first_value.token().dimension_value_int();
  689. values.skip_whitespace();
  690. // <n-dimension> <signed-integer>
  691. if (is_signed_integer(values.peek_token())) {
  692. int b = values.next_token().token().to_integer();
  693. transaction.commit();
  694. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  695. }
  696. // <n-dimension> ['+' | '-'] <signless-integer>
  697. {
  698. auto child_transaction = transaction.create_child();
  699. auto const& second_value = values.next_token();
  700. values.skip_whitespace();
  701. auto const& third_value = values.next_token();
  702. if (is_sign(second_value) && is_signless_integer(third_value)) {
  703. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  704. child_transaction.commit();
  705. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  706. }
  707. }
  708. // <n-dimension>
  709. transaction.commit();
  710. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  711. }
  712. // <ndash-dimension> <signless-integer>
  713. if (is_ndash_dimension(first_value)) {
  714. values.skip_whitespace();
  715. auto const& second_value = values.next_token();
  716. if (is_signless_integer(second_value)) {
  717. int a = first_value.token().dimension_value_int();
  718. int b = -second_value.token().to_integer();
  719. transaction.commit();
  720. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  721. }
  722. return syntax_error();
  723. }
  724. // <ndashdigit-dimension>
  725. if (is_ndashdigit_dimension(first_value)) {
  726. auto const& dimension = first_value.token();
  727. int a = dimension.dimension_value_int();
  728. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_number<int>();
  729. if (maybe_b.has_value()) {
  730. transaction.commit();
  731. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  732. }
  733. return syntax_error();
  734. }
  735. // <dashndashdigit-ident>
  736. if (is_dashndashdigit_ident(first_value)) {
  737. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_number<int>();
  738. if (maybe_b.has_value()) {
  739. transaction.commit();
  740. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  741. }
  742. return syntax_error();
  743. }
  744. // -n
  745. // -n <signed-integer>
  746. // -n ['+' | '-'] <signless-integer>
  747. if (first_value.is_ident("-n"sv)) {
  748. values.skip_whitespace();
  749. // -n <signed-integer>
  750. if (is_signed_integer(values.peek_token())) {
  751. int b = values.next_token().token().to_integer();
  752. transaction.commit();
  753. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  754. }
  755. // -n ['+' | '-'] <signless-integer>
  756. {
  757. auto child_transaction = transaction.create_child();
  758. auto const& second_value = values.next_token();
  759. values.skip_whitespace();
  760. auto const& third_value = values.next_token();
  761. if (is_sign(second_value) && is_signless_integer(third_value)) {
  762. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  763. child_transaction.commit();
  764. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  765. }
  766. }
  767. // -n
  768. transaction.commit();
  769. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  770. }
  771. // -n- <signless-integer>
  772. if (first_value.is_ident("-n-"sv)) {
  773. values.skip_whitespace();
  774. auto const& second_value = values.next_token();
  775. if (is_signless_integer(second_value)) {
  776. int b = -second_value.token().to_integer();
  777. transaction.commit();
  778. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  779. }
  780. return syntax_error();
  781. }
  782. // All that's left now are these:
  783. // '+'?† n
  784. // '+'?† n <signed-integer>
  785. // '+'?† n ['+' | '-'] <signless-integer>
  786. // '+'?† n- <signless-integer>
  787. // '+'?† <ndashdigit-ident>
  788. // In all of these cases, the + is optional, and has no effect.
  789. // So, we just skip the +, and carry on.
  790. if (!first_value.is_delim('+')) {
  791. values.reconsume_current_input_token();
  792. // We do *not* skip whitespace here.
  793. }
  794. auto const& first_after_plus = values.next_token();
  795. // '+'?† n
  796. // '+'?† n <signed-integer>
  797. // '+'?† n ['+' | '-'] <signless-integer>
  798. if (first_after_plus.is_ident("n"sv)) {
  799. values.skip_whitespace();
  800. // '+'?† n <signed-integer>
  801. if (is_signed_integer(values.peek_token())) {
  802. int b = values.next_token().token().to_integer();
  803. transaction.commit();
  804. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  805. }
  806. // '+'?† n ['+' | '-'] <signless-integer>
  807. {
  808. auto child_transaction = transaction.create_child();
  809. auto const& second_value = values.next_token();
  810. values.skip_whitespace();
  811. auto const& third_value = values.next_token();
  812. if (is_sign(second_value) && is_signless_integer(third_value)) {
  813. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  814. child_transaction.commit();
  815. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  816. }
  817. }
  818. // '+'?† n
  819. transaction.commit();
  820. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  821. }
  822. // '+'?† n- <signless-integer>
  823. if (first_after_plus.is_ident("n-"sv)) {
  824. values.skip_whitespace();
  825. auto const& second_value = values.next_token();
  826. if (is_signless_integer(second_value)) {
  827. int b = -second_value.token().to_integer();
  828. transaction.commit();
  829. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  830. }
  831. return syntax_error();
  832. }
  833. // '+'?† <ndashdigit-ident>
  834. if (is_ndashdigit_ident(first_after_plus)) {
  835. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_number<int>();
  836. if (maybe_b.has_value()) {
  837. transaction.commit();
  838. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  839. }
  840. return syntax_error();
  841. }
  842. return syntax_error();
  843. }
  844. }