SelectorParsing.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <andreas@ladybird.org>
  3. * Copyright (c) 2020-2021, the SerenityOS developers.
  4. * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
  5. * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
  6. * Copyright (c) 2022, MacDue <macdue@dueutil.tech>
  7. *
  8. * SPDX-License-Identifier: BSD-2-Clause
  9. */
  10. #include <AK/Debug.h>
  11. #include <LibWeb/CSS/Parser/Parser.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::CSS::Parser {
  14. Optional<SelectorList> Parser::parse_as_selector(SelectorParsingMode parsing_mode)
  15. {
  16. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, parsing_mode);
  17. if (!selector_list.is_error())
  18. return selector_list.release_value();
  19. return {};
  20. }
  21. Optional<SelectorList> Parser::parse_as_relative_selector(SelectorParsingMode parsing_mode)
  22. {
  23. auto selector_list = parse_a_selector_list(m_token_stream, SelectorType::Relative, parsing_mode);
  24. if (!selector_list.is_error())
  25. return selector_list.release_value();
  26. return {};
  27. }
  28. Optional<Selector::PseudoElement> Parser::parse_as_pseudo_element_selector()
  29. {
  30. // FIXME: This is quite janky. Selector parsing is not at all designed to allow parsing just a single part of a selector.
  31. // So, this code parses a whole selector, then rejects it if it's not a single pseudo-element simple selector.
  32. // Come back and fix this, future Sam!
  33. auto maybe_selector_list = parse_a_selector_list(m_token_stream, SelectorType::Standalone, SelectorParsingMode::Standard);
  34. if (maybe_selector_list.is_error())
  35. return {};
  36. auto& selector_list = maybe_selector_list.value();
  37. if (selector_list.size() != 1)
  38. return {};
  39. auto& selector = selector_list.first();
  40. if (selector->compound_selectors().size() != 1)
  41. return {};
  42. auto& first_compound_selector = selector->compound_selectors().first();
  43. if (first_compound_selector.simple_selectors.size() != 1)
  44. return {};
  45. auto& simple_selector = first_compound_selector.simple_selectors.first();
  46. if (simple_selector.type != Selector::SimpleSelector::Type::PseudoElement)
  47. return {};
  48. return simple_selector.pseudo_element();
  49. }
  50. template<typename T>
  51. Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<T>& tokens, SelectorType mode, SelectorParsingMode parsing_mode)
  52. {
  53. auto comma_separated_lists = parse_a_comma_separated_list_of_component_values(tokens);
  54. Vector<NonnullRefPtr<Selector>> selectors;
  55. for (auto& selector_parts : comma_separated_lists) {
  56. auto stream = TokenStream(selector_parts);
  57. auto selector = parse_complex_selector(stream, mode);
  58. if (selector.is_error()) {
  59. if (parsing_mode == SelectorParsingMode::Forgiving)
  60. continue;
  61. return selector.error();
  62. }
  63. selectors.append(selector.release_value());
  64. }
  65. if (selectors.is_empty() && parsing_mode != SelectorParsingMode::Forgiving)
  66. return ParseError::SyntaxError;
  67. return selectors;
  68. }
  69. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<ComponentValue>&, SelectorType, SelectorParsingMode);
  70. template Parser::ParseErrorOr<SelectorList> Parser::parse_a_selector_list(TokenStream<Token>&, SelectorType, SelectorParsingMode);
  71. Parser::ParseErrorOr<NonnullRefPtr<Selector>> Parser::parse_complex_selector(TokenStream<ComponentValue>& tokens, SelectorType mode)
  72. {
  73. Vector<Selector::CompoundSelector> compound_selectors;
  74. auto first_selector = TRY(parse_compound_selector(tokens));
  75. if (!first_selector.has_value())
  76. return ParseError::SyntaxError;
  77. if (mode == SelectorType::Standalone) {
  78. if (first_selector->combinator != Selector::Combinator::Descendant)
  79. return ParseError::SyntaxError;
  80. first_selector->combinator = Selector::Combinator::None;
  81. }
  82. compound_selectors.append(first_selector.release_value());
  83. while (tokens.has_next_token()) {
  84. auto compound_selector = TRY(parse_compound_selector(tokens));
  85. if (!compound_selector.has_value())
  86. break;
  87. compound_selectors.append(compound_selector.release_value());
  88. }
  89. if (compound_selectors.is_empty())
  90. return ParseError::SyntaxError;
  91. return Selector::create(move(compound_selectors));
  92. }
  93. Parser::ParseErrorOr<Optional<Selector::CompoundSelector>> Parser::parse_compound_selector(TokenStream<ComponentValue>& tokens)
  94. {
  95. tokens.discard_whitespace();
  96. auto combinator = parse_selector_combinator(tokens).value_or(Selector::Combinator::Descendant);
  97. tokens.discard_whitespace();
  98. Vector<Selector::SimpleSelector> simple_selectors;
  99. while (tokens.has_next_token()) {
  100. auto component = TRY(parse_simple_selector(tokens));
  101. if (!component.has_value())
  102. break;
  103. simple_selectors.append(component.release_value());
  104. }
  105. if (simple_selectors.is_empty())
  106. return Optional<Selector::CompoundSelector> {};
  107. return Selector::CompoundSelector { combinator, move(simple_selectors) };
  108. }
  109. Optional<Selector::Combinator> Parser::parse_selector_combinator(TokenStream<ComponentValue>& tokens)
  110. {
  111. auto const& current_value = tokens.consume_a_token();
  112. if (current_value.is(Token::Type::Delim)) {
  113. switch (current_value.token().delim()) {
  114. case '>':
  115. return Selector::Combinator::ImmediateChild;
  116. case '+':
  117. return Selector::Combinator::NextSibling;
  118. case '~':
  119. return Selector::Combinator::SubsequentSibling;
  120. case '|': {
  121. auto const& next = tokens.next_token();
  122. if (next.is(Token::Type::EndOfFile))
  123. return {};
  124. if (next.is_delim('|')) {
  125. tokens.discard_a_token();
  126. return Selector::Combinator::Column;
  127. }
  128. }
  129. }
  130. }
  131. tokens.reconsume_current_input_token();
  132. return {};
  133. }
  134. Optional<Selector::SimpleSelector::QualifiedName> Parser::parse_selector_qualified_name(TokenStream<ComponentValue>& tokens, AllowWildcardName allow_wildcard_name)
  135. {
  136. auto is_name = [](ComponentValue const& token) {
  137. return token.is_delim('*') || token.is(Token::Type::Ident);
  138. };
  139. auto get_name = [](ComponentValue const& token) {
  140. if (token.is_delim('*'))
  141. return "*"_fly_string;
  142. return token.token().ident();
  143. };
  144. // There are 3 possibilities here:
  145. // (Where <name> and <namespace> are either an <ident> or a `*` delim)
  146. // 1) `|<name>`
  147. // 2) `<namespace>|<name>`
  148. // 3) `<name>`
  149. // Whitespace is forbidden between any of these parts. https://www.w3.org/TR/selectors-4/#white-space
  150. auto transaction = tokens.begin_transaction();
  151. auto first_token = tokens.consume_a_token();
  152. if (first_token.is_delim('|')) {
  153. // Case 1: `|<name>`
  154. if (is_name(tokens.next_token())) {
  155. auto name_token = tokens.consume_a_token();
  156. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  157. return {};
  158. transaction.commit();
  159. return Selector::SimpleSelector::QualifiedName {
  160. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::None,
  161. .name = get_name(name_token),
  162. };
  163. }
  164. return {};
  165. }
  166. if (!is_name(first_token))
  167. return {};
  168. if (tokens.next_token().is_delim('|') && is_name(tokens.peek_token(1))) {
  169. // Case 2: `<namespace>|<name>`
  170. tokens.discard_a_token(); // `|`
  171. auto namespace_ = get_name(first_token);
  172. auto name = get_name(tokens.consume_a_token());
  173. if (allow_wildcard_name == AllowWildcardName::No && name == "*"sv)
  174. return {};
  175. auto namespace_type = namespace_ == "*"sv
  176. ? Selector::SimpleSelector::QualifiedName::NamespaceType::Any
  177. : Selector::SimpleSelector::QualifiedName::NamespaceType::Named;
  178. transaction.commit();
  179. return Selector::SimpleSelector::QualifiedName {
  180. .namespace_type = namespace_type,
  181. .namespace_ = namespace_,
  182. .name = name,
  183. };
  184. }
  185. // Case 3: `<name>`
  186. auto& name_token = first_token;
  187. if (allow_wildcard_name == AllowWildcardName::No && name_token.is_delim('*'))
  188. return {};
  189. transaction.commit();
  190. return Selector::SimpleSelector::QualifiedName {
  191. .namespace_type = Selector::SimpleSelector::QualifiedName::NamespaceType::Default,
  192. .name = get_name(name_token),
  193. };
  194. }
  195. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_attribute_simple_selector(ComponentValue const& first_value)
  196. {
  197. auto attribute_tokens = TokenStream { first_value.block().values() };
  198. attribute_tokens.discard_whitespace();
  199. if (!attribute_tokens.has_next_token()) {
  200. dbgln_if(CSS_PARSER_DEBUG, "CSS attribute selector is empty!");
  201. return ParseError::SyntaxError;
  202. }
  203. auto maybe_qualified_name = parse_selector_qualified_name(attribute_tokens, AllowWildcardName::No);
  204. if (!maybe_qualified_name.has_value()) {
  205. dbgln_if(CSS_PARSER_DEBUG, "Expected qualified-name for attribute name, got: '{}'", attribute_tokens.next_token().to_debug_string());
  206. return ParseError::SyntaxError;
  207. }
  208. auto qualified_name = maybe_qualified_name.release_value();
  209. Selector::SimpleSelector simple_selector {
  210. .type = Selector::SimpleSelector::Type::Attribute,
  211. .value = Selector::SimpleSelector::Attribute {
  212. .match_type = Selector::SimpleSelector::Attribute::MatchType::HasAttribute,
  213. .qualified_name = qualified_name,
  214. .case_type = Selector::SimpleSelector::Attribute::CaseType::DefaultMatch,
  215. }
  216. };
  217. attribute_tokens.discard_whitespace();
  218. if (!attribute_tokens.has_next_token())
  219. return simple_selector;
  220. auto const& delim_part = attribute_tokens.consume_a_token();
  221. if (!delim_part.is(Token::Type::Delim)) {
  222. dbgln_if(CSS_PARSER_DEBUG, "Expected a delim for attribute comparison, got: '{}'", delim_part.to_debug_string());
  223. return ParseError::SyntaxError;
  224. }
  225. if (delim_part.token().delim() == '=') {
  226. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch;
  227. } else {
  228. if (!attribute_tokens.has_next_token()) {
  229. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended part way through a match type.");
  230. return ParseError::SyntaxError;
  231. }
  232. auto const& delim_second_part = attribute_tokens.consume_a_token();
  233. if (!delim_second_part.is_delim('=')) {
  234. dbgln_if(CSS_PARSER_DEBUG, "Expected a double delim for attribute comparison, got: '{}{}'", delim_part.to_debug_string(), delim_second_part.to_debug_string());
  235. return ParseError::SyntaxError;
  236. }
  237. switch (delim_part.token().delim()) {
  238. case '~':
  239. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsWord;
  240. break;
  241. case '*':
  242. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::ContainsString;
  243. break;
  244. case '|':
  245. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment;
  246. break;
  247. case '^':
  248. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::StartsWithString;
  249. break;
  250. case '$':
  251. simple_selector.attribute().match_type = Selector::SimpleSelector::Attribute::MatchType::EndsWithString;
  252. break;
  253. default:
  254. attribute_tokens.reconsume_current_input_token();
  255. }
  256. }
  257. attribute_tokens.discard_whitespace();
  258. if (!attribute_tokens.has_next_token()) {
  259. dbgln_if(CSS_PARSER_DEBUG, "Attribute selector ended without a value to match.");
  260. return ParseError::SyntaxError;
  261. }
  262. auto const& value_part = attribute_tokens.consume_a_token();
  263. if (!value_part.is(Token::Type::Ident) && !value_part.is(Token::Type::String)) {
  264. dbgln_if(CSS_PARSER_DEBUG, "Expected a string or ident for the value to match attribute against, got: '{}'", value_part.to_debug_string());
  265. return ParseError::SyntaxError;
  266. }
  267. auto const& value_string = value_part.token().is(Token::Type::Ident) ? value_part.token().ident() : value_part.token().string();
  268. simple_selector.attribute().value = value_string.to_string();
  269. attribute_tokens.discard_whitespace();
  270. // Handle case-sensitivity suffixes. https://www.w3.org/TR/selectors-4/#attribute-case
  271. if (attribute_tokens.has_next_token()) {
  272. auto const& case_sensitivity_part = attribute_tokens.consume_a_token();
  273. if (case_sensitivity_part.is(Token::Type::Ident)) {
  274. auto case_sensitivity = case_sensitivity_part.token().ident();
  275. if (case_sensitivity.equals_ignoring_ascii_case("i"sv)) {
  276. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch;
  277. } else if (case_sensitivity.equals_ignoring_ascii_case("s"sv)) {
  278. simple_selector.attribute().case_type = Selector::SimpleSelector::Attribute::CaseType::CaseSensitiveMatch;
  279. } else {
  280. dbgln_if(CSS_PARSER_DEBUG, "Expected a \"i\" or \"s\" attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  281. return ParseError::SyntaxError;
  282. }
  283. } else {
  284. dbgln_if(CSS_PARSER_DEBUG, "Expected an attribute selector case sensitivity identifier, got: '{}'", case_sensitivity_part.to_debug_string());
  285. return ParseError::SyntaxError;
  286. }
  287. }
  288. if (attribute_tokens.has_next_token()) {
  289. dbgln_if(CSS_PARSER_DEBUG, "Was not expecting anything else inside attribute selector.");
  290. return ParseError::SyntaxError;
  291. }
  292. return simple_selector;
  293. }
  294. Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selector(TokenStream<ComponentValue>& tokens)
  295. {
  296. auto peek_token_ends_selector = [&]() -> bool {
  297. auto const& value = tokens.next_token();
  298. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  299. };
  300. if (peek_token_ends_selector())
  301. return ParseError::SyntaxError;
  302. bool is_pseudo = false;
  303. if (tokens.next_token().is(Token::Type::Colon)) {
  304. is_pseudo = true;
  305. tokens.discard_a_token();
  306. if (peek_token_ends_selector())
  307. return ParseError::SyntaxError;
  308. }
  309. if (is_pseudo) {
  310. auto const& name_token = tokens.consume_a_token();
  311. if (!name_token.is(Token::Type::Ident)) {
  312. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident for pseudo-element, got: '{}'", name_token.to_debug_string());
  313. return ParseError::SyntaxError;
  314. }
  315. auto pseudo_name = name_token.token().ident();
  316. // Note: We allow the "ignored" -webkit prefix here for -webkit-progress-bar/-webkit-progress-bar
  317. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  318. return Selector::SimpleSelector {
  319. .type = Selector::SimpleSelector::Type::PseudoElement,
  320. .value = pseudo_element.release_value()
  321. };
  322. }
  323. // https://www.w3.org/TR/selectors-4/#compat
  324. // All other pseudo-elements whose names begin with the string “-webkit-” (matched ASCII case-insensitively)
  325. // and that are not functional notations must be treated as valid at parse time. (That is, ::-webkit-asdf is
  326. // valid at parse time, but ::-webkit-jkl() is not.) If they’re not otherwise recognized and supported, they
  327. // must be treated as matching nothing, and are unknown -webkit- pseudo-elements.
  328. if (pseudo_name.starts_with_bytes("-webkit-"sv, CaseSensitivity::CaseInsensitive)) {
  329. return Selector::SimpleSelector {
  330. .type = Selector::SimpleSelector::Type::PseudoElement,
  331. // Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
  332. .value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) },
  333. };
  334. }
  335. if (has_ignored_vendor_prefix(pseudo_name))
  336. return ParseError::IncludesIgnoredVendorPrefix;
  337. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-element: '::{}'", pseudo_name);
  338. return ParseError::SyntaxError;
  339. }
  340. if (peek_token_ends_selector())
  341. return ParseError::SyntaxError;
  342. auto const& pseudo_class_token = tokens.consume_a_token();
  343. if (pseudo_class_token.is(Token::Type::Ident)) {
  344. auto pseudo_name = pseudo_class_token.token().ident();
  345. if (has_ignored_vendor_prefix(pseudo_name))
  346. return ParseError::IncludesIgnoredVendorPrefix;
  347. auto make_pseudo_class_selector = [](auto pseudo_class) {
  348. return Selector::SimpleSelector {
  349. .type = Selector::SimpleSelector::Type::PseudoClass,
  350. .value = Selector::SimpleSelector::PseudoClassSelector { .type = pseudo_class }
  351. };
  352. };
  353. if (auto pseudo_class = pseudo_class_from_string(pseudo_name); pseudo_class.has_value()) {
  354. if (!pseudo_class_metadata(pseudo_class.value()).is_valid_as_identifier) {
  355. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is only valid as a function", pseudo_name);
  356. return ParseError::SyntaxError;
  357. }
  358. return make_pseudo_class_selector(pseudo_class.value());
  359. }
  360. // Single-colon syntax allowed for ::after, ::before, ::first-letter and ::first-line for compatibility.
  361. // https://www.w3.org/TR/selectors/#pseudo-element-syntax
  362. if (auto pseudo_element = Selector::PseudoElement::from_string(pseudo_name); pseudo_element.has_value()) {
  363. switch (pseudo_element.value().type()) {
  364. case Selector::PseudoElement::Type::After:
  365. case Selector::PseudoElement::Type::Before:
  366. case Selector::PseudoElement::Type::FirstLetter:
  367. case Selector::PseudoElement::Type::FirstLine:
  368. return Selector::SimpleSelector {
  369. .type = Selector::SimpleSelector::Type::PseudoElement,
  370. .value = pseudo_element.value()
  371. };
  372. default:
  373. break;
  374. }
  375. }
  376. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class: ':{}'", pseudo_name);
  377. return ParseError::SyntaxError;
  378. }
  379. if (pseudo_class_token.is_function()) {
  380. auto parse_nth_child_selector = [this](auto pseudo_class, Vector<ComponentValue> const& function_values, bool allow_of = false) -> ParseErrorOr<Selector::SimpleSelector> {
  381. auto tokens = TokenStream<ComponentValue>(function_values);
  382. auto nth_child_pattern = parse_a_n_plus_b_pattern(tokens);
  383. if (!nth_child_pattern.has_value()) {
  384. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid An+B format for {}", pseudo_class_name(pseudo_class));
  385. return ParseError::SyntaxError;
  386. }
  387. tokens.discard_whitespace();
  388. if (!tokens.has_next_token()) {
  389. return Selector::SimpleSelector {
  390. .type = Selector::SimpleSelector::Type::PseudoClass,
  391. .value = Selector::SimpleSelector::PseudoClassSelector {
  392. .type = pseudo_class,
  393. .nth_child_pattern = nth_child_pattern.release_value() }
  394. };
  395. }
  396. if (!allow_of)
  397. return ParseError::SyntaxError;
  398. // Parse the `of <selector-list>` syntax
  399. auto const& maybe_of = tokens.consume_a_token();
  400. if (!maybe_of.is_ident("of"sv))
  401. return ParseError::SyntaxError;
  402. tokens.discard_whitespace();
  403. auto selector_list = TRY(parse_a_selector_list(tokens, SelectorType::Standalone));
  404. tokens.discard_whitespace();
  405. if (tokens.has_next_token())
  406. return ParseError::SyntaxError;
  407. return Selector::SimpleSelector {
  408. .type = Selector::SimpleSelector::Type::PseudoClass,
  409. .value = Selector::SimpleSelector::PseudoClassSelector {
  410. .type = pseudo_class,
  411. .nth_child_pattern = nth_child_pattern.release_value(),
  412. .argument_selector_list = move(selector_list) }
  413. };
  414. };
  415. auto const& pseudo_function = pseudo_class_token.function();
  416. auto maybe_pseudo_class = pseudo_class_from_string(pseudo_function.name());
  417. if (!maybe_pseudo_class.has_value()) {
  418. dbgln_if(CSS_PARSER_DEBUG, "Unrecognized pseudo-class function: ':{}'()", pseudo_function.name());
  419. return ParseError::SyntaxError;
  420. }
  421. auto pseudo_class = maybe_pseudo_class.value();
  422. auto metadata = pseudo_class_metadata(pseudo_class);
  423. if (!metadata.is_valid_as_function) {
  424. dbgln_if(CSS_PARSER_DEBUG, "Pseudo-class ':{}' is not valid as a function", pseudo_function.name());
  425. return ParseError::SyntaxError;
  426. }
  427. if (pseudo_function.values().is_empty()) {
  428. dbgln_if(CSS_PARSER_DEBUG, "Empty :{}() selector", pseudo_function.name());
  429. return ParseError::SyntaxError;
  430. }
  431. switch (metadata.parameter_type) {
  432. case PseudoClassMetadata::ParameterType::ANPlusB:
  433. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), false);
  434. case PseudoClassMetadata::ParameterType::ANPlusBOf:
  435. return parse_nth_child_selector(pseudo_class, pseudo_function.values(), true);
  436. case PseudoClassMetadata::ParameterType::CompoundSelector: {
  437. auto function_token_stream = TokenStream(pseudo_function.values());
  438. auto compound_selector_or_error = parse_compound_selector(function_token_stream);
  439. if (compound_selector_or_error.is_error() || !compound_selector_or_error.value().has_value()) {
  440. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a compound selector", pseudo_function.name());
  441. return ParseError::SyntaxError;
  442. }
  443. auto compound_selector = compound_selector_or_error.release_value().release_value();
  444. compound_selector.combinator = Selector::Combinator::None;
  445. Vector compound_selectors { move(compound_selector) };
  446. auto selector = Selector::create(move(compound_selectors));
  447. return Selector::SimpleSelector {
  448. .type = Selector::SimpleSelector::Type::PseudoClass,
  449. .value = Selector::SimpleSelector::PseudoClassSelector {
  450. .type = pseudo_class,
  451. .argument_selector_list = { move(selector) } }
  452. };
  453. }
  454. case PseudoClassMetadata::ParameterType::ForgivingRelativeSelectorList:
  455. case PseudoClassMetadata::ParameterType::ForgivingSelectorList: {
  456. auto function_token_stream = TokenStream(pseudo_function.values());
  457. auto selector_type = metadata.parameter_type == PseudoClassMetadata::ParameterType::ForgivingSelectorList
  458. ? SelectorType::Standalone
  459. : SelectorType::Relative;
  460. // NOTE: Because it's forgiving, even complete garbage will parse OK as an empty selector-list.
  461. auto argument_selector_list = MUST(parse_a_selector_list(function_token_stream, selector_type, SelectorParsingMode::Forgiving));
  462. return Selector::SimpleSelector {
  463. .type = Selector::SimpleSelector::Type::PseudoClass,
  464. .value = Selector::SimpleSelector::PseudoClassSelector {
  465. .type = pseudo_class,
  466. .argument_selector_list = move(argument_selector_list) }
  467. };
  468. }
  469. case PseudoClassMetadata::ParameterType::Ident: {
  470. auto function_token_stream = TokenStream(pseudo_function.values());
  471. function_token_stream.discard_whitespace();
  472. auto maybe_keyword_token = function_token_stream.consume_a_token();
  473. function_token_stream.discard_whitespace();
  474. if (!maybe_keyword_token.is(Token::Type::Ident) || function_token_stream.has_next_token()) {
  475. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: not an ident", pseudo_function.name());
  476. return ParseError::SyntaxError;
  477. }
  478. auto maybe_keyword = keyword_from_string(maybe_keyword_token.token().ident());
  479. if (!maybe_keyword.has_value()) {
  480. dbgln_if(CSS_PARSER_DEBUG, "Failed to parse :{}() parameter as a keyword: unrecognized keyword", pseudo_function.name());
  481. return ParseError::SyntaxError;
  482. }
  483. return Selector::SimpleSelector {
  484. .type = Selector::SimpleSelector::Type::PseudoClass,
  485. .value = Selector::SimpleSelector::PseudoClassSelector {
  486. .type = pseudo_class,
  487. .keyword = maybe_keyword.value() }
  488. };
  489. }
  490. case PseudoClassMetadata::ParameterType::LanguageRanges: {
  491. Vector<FlyString> languages;
  492. auto function_token_stream = TokenStream(pseudo_function.values());
  493. auto language_token_lists = parse_a_comma_separated_list_of_component_values(function_token_stream);
  494. for (auto language_token_list : language_token_lists) {
  495. auto language_token_stream = TokenStream(language_token_list);
  496. language_token_stream.discard_whitespace();
  497. auto language_token = language_token_stream.consume_a_token();
  498. if (!(language_token.is(Token::Type::Ident) || language_token.is(Token::Type::String))) {
  499. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - not a string/ident", pseudo_function.name());
  500. return ParseError::SyntaxError;
  501. }
  502. auto language_string = language_token.is(Token::Type::String) ? language_token.token().string() : language_token.token().ident();
  503. languages.append(language_string);
  504. language_token_stream.discard_whitespace();
  505. if (language_token_stream.has_next_token()) {
  506. dbgln_if(CSS_PARSER_DEBUG, "Invalid language range in :{}() - trailing tokens", pseudo_function.name());
  507. return ParseError::SyntaxError;
  508. }
  509. }
  510. return Selector::SimpleSelector {
  511. .type = Selector::SimpleSelector::Type::PseudoClass,
  512. .value = Selector::SimpleSelector::PseudoClassSelector {
  513. .type = pseudo_class,
  514. .languages = move(languages) }
  515. };
  516. }
  517. case PseudoClassMetadata::ParameterType::SelectorList: {
  518. auto function_token_stream = TokenStream(pseudo_function.values());
  519. auto not_selector = TRY(parse_a_selector_list(function_token_stream, SelectorType::Standalone));
  520. return Selector::SimpleSelector {
  521. .type = Selector::SimpleSelector::Type::PseudoClass,
  522. .value = Selector::SimpleSelector::PseudoClassSelector {
  523. .type = pseudo_class,
  524. .argument_selector_list = move(not_selector) }
  525. };
  526. }
  527. case PseudoClassMetadata::ParameterType::None:
  528. // `None` means this is not a function-type pseudo-class, so this state should be impossible.
  529. VERIFY_NOT_REACHED();
  530. }
  531. }
  532. dbgln_if(CSS_PARSER_DEBUG, "Unexpected Block in pseudo-class name, expected a function or identifier. '{}'", pseudo_class_token.to_debug_string());
  533. return ParseError::SyntaxError;
  534. }
  535. Parser::ParseErrorOr<Optional<Selector::SimpleSelector>> Parser::parse_simple_selector(TokenStream<ComponentValue>& tokens)
  536. {
  537. auto peek_token_ends_selector = [&]() -> bool {
  538. auto const& value = tokens.next_token();
  539. return (value.is(Token::Type::EndOfFile) || value.is(Token::Type::Whitespace) || value.is(Token::Type::Comma));
  540. };
  541. if (peek_token_ends_selector())
  542. return Optional<Selector::SimpleSelector> {};
  543. // Handle universal and tag-name types together, since both can be namespaced
  544. if (auto qualified_name = parse_selector_qualified_name(tokens, AllowWildcardName::Yes); qualified_name.has_value()) {
  545. if (qualified_name->name.name == "*"sv) {
  546. return Selector::SimpleSelector {
  547. .type = Selector::SimpleSelector::Type::Universal,
  548. .value = qualified_name.release_value(),
  549. };
  550. }
  551. return Selector::SimpleSelector {
  552. .type = Selector::SimpleSelector::Type::TagName,
  553. .value = qualified_name.release_value(),
  554. };
  555. }
  556. auto const& first_value = tokens.consume_a_token();
  557. if (first_value.is(Token::Type::Delim)) {
  558. u32 delim = first_value.token().delim();
  559. switch (delim) {
  560. case '*':
  561. // Handled already
  562. VERIFY_NOT_REACHED();
  563. case '.': {
  564. if (peek_token_ends_selector())
  565. return ParseError::SyntaxError;
  566. auto const& class_name_value = tokens.consume_a_token();
  567. if (!class_name_value.is(Token::Type::Ident)) {
  568. dbgln_if(CSS_PARSER_DEBUG, "Expected an ident after '.', got: {}", class_name_value.to_debug_string());
  569. return ParseError::SyntaxError;
  570. }
  571. return Selector::SimpleSelector {
  572. .type = Selector::SimpleSelector::Type::Class,
  573. .value = Selector::SimpleSelector::Name { class_name_value.token().ident() }
  574. };
  575. }
  576. case '>':
  577. case '+':
  578. case '~':
  579. case '|':
  580. // Whitespace is not required between the compound-selector and a combinator.
  581. // So, if we see a combinator, return that this compound-selector is done, instead of a syntax error.
  582. tokens.reconsume_current_input_token();
  583. return Optional<Selector::SimpleSelector> {};
  584. default:
  585. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  586. return ParseError::SyntaxError;
  587. }
  588. }
  589. if (first_value.is(Token::Type::Hash)) {
  590. if (first_value.token().hash_type() != Token::HashType::Id) {
  591. dbgln_if(CSS_PARSER_DEBUG, "Selector contains hash token that is not an id: {}", first_value.to_debug_string());
  592. return ParseError::SyntaxError;
  593. }
  594. return Selector::SimpleSelector {
  595. .type = Selector::SimpleSelector::Type::Id,
  596. .value = Selector::SimpleSelector::Name { first_value.token().hash_value() }
  597. };
  598. }
  599. if (first_value.is_block() && first_value.block().is_square())
  600. return TRY(parse_attribute_simple_selector(first_value));
  601. if (first_value.is(Token::Type::Colon))
  602. return TRY(parse_pseudo_simple_selector(tokens));
  603. dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid simple selector!");
  604. return ParseError::SyntaxError;
  605. }
  606. Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<ComponentValue>& values)
  607. {
  608. auto transaction = values.begin_transaction();
  609. auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> {
  610. if constexpr (CSS_PARSER_DEBUG) {
  611. dbgln_if(CSS_PARSER_DEBUG, "Invalid An+B value:");
  612. values.dump_all_tokens();
  613. }
  614. return {};
  615. };
  616. auto is_sign = [](ComponentValue const& value) -> bool {
  617. return value.is(Token::Type::Delim) && (value.token().delim() == '+' || value.token().delim() == '-');
  618. };
  619. auto is_n_dimension = [](ComponentValue const& value) -> bool {
  620. if (!value.is(Token::Type::Dimension))
  621. return false;
  622. if (!value.token().number().is_integer())
  623. return false;
  624. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n"sv))
  625. return false;
  626. return true;
  627. };
  628. auto is_ndash_dimension = [](ComponentValue const& value) -> bool {
  629. if (!value.is(Token::Type::Dimension))
  630. return false;
  631. if (!value.token().number().is_integer())
  632. return false;
  633. if (!value.token().dimension_unit().equals_ignoring_ascii_case("n-"sv))
  634. return false;
  635. return true;
  636. };
  637. auto is_ndashdigit_dimension = [](ComponentValue const& value) -> bool {
  638. if (!value.is(Token::Type::Dimension))
  639. return false;
  640. if (!value.token().number().is_integer())
  641. return false;
  642. auto dimension_unit = value.token().dimension_unit();
  643. if (!dimension_unit.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  644. return false;
  645. for (size_t i = 2; i < dimension_unit.bytes_as_string_view().length(); ++i) {
  646. if (!is_ascii_digit(dimension_unit.bytes_as_string_view()[i]))
  647. return false;
  648. }
  649. return true;
  650. };
  651. auto is_ndashdigit_ident = [](ComponentValue const& value) -> bool {
  652. if (!value.is(Token::Type::Ident))
  653. return false;
  654. auto ident = value.token().ident();
  655. if (!ident.starts_with_bytes("n-"sv, CaseSensitivity::CaseInsensitive))
  656. return false;
  657. for (size_t i = 2; i < ident.bytes_as_string_view().length(); ++i) {
  658. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  659. return false;
  660. }
  661. return true;
  662. };
  663. auto is_dashndashdigit_ident = [](ComponentValue const& value) -> bool {
  664. if (!value.is(Token::Type::Ident))
  665. return false;
  666. auto ident = value.token().ident();
  667. if (!ident.starts_with_bytes("-n-"sv, CaseSensitivity::CaseInsensitive))
  668. return false;
  669. if (ident.bytes_as_string_view().length() == 3)
  670. return false;
  671. for (size_t i = 3; i < ident.bytes_as_string_view().length(); ++i) {
  672. if (!is_ascii_digit(ident.bytes_as_string_view()[i]))
  673. return false;
  674. }
  675. return true;
  676. };
  677. auto is_integer = [](ComponentValue const& value) -> bool {
  678. return value.is(Token::Type::Number) && value.token().number().is_integer();
  679. };
  680. auto is_signed_integer = [](ComponentValue const& value) -> bool {
  681. return value.is(Token::Type::Number) && value.token().number().is_integer_with_explicit_sign();
  682. };
  683. auto is_signless_integer = [](ComponentValue const& value) -> bool {
  684. return value.is(Token::Type::Number) && !value.token().number().is_integer_with_explicit_sign();
  685. };
  686. // https://www.w3.org/TR/css-syntax-3/#the-anb-type
  687. // Unfortunately these can't be in the same order as in the spec.
  688. values.discard_whitespace();
  689. auto const& first_value = values.consume_a_token();
  690. // odd | even
  691. if (first_value.is(Token::Type::Ident)) {
  692. auto ident = first_value.token().ident();
  693. if (ident.equals_ignoring_ascii_case("odd"sv)) {
  694. transaction.commit();
  695. return Selector::SimpleSelector::ANPlusBPattern { 2, 1 };
  696. }
  697. if (ident.equals_ignoring_ascii_case("even"sv)) {
  698. transaction.commit();
  699. return Selector::SimpleSelector::ANPlusBPattern { 2, 0 };
  700. }
  701. }
  702. // <integer>
  703. if (is_integer(first_value)) {
  704. int b = first_value.token().to_integer();
  705. transaction.commit();
  706. return Selector::SimpleSelector::ANPlusBPattern { 0, b };
  707. }
  708. // <n-dimension>
  709. // <n-dimension> <signed-integer>
  710. // <n-dimension> ['+' | '-'] <signless-integer>
  711. if (is_n_dimension(first_value)) {
  712. int a = first_value.token().dimension_value_int();
  713. values.discard_whitespace();
  714. // <n-dimension> <signed-integer>
  715. if (is_signed_integer(values.next_token())) {
  716. int b = values.consume_a_token().token().to_integer();
  717. transaction.commit();
  718. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  719. }
  720. // <n-dimension> ['+' | '-'] <signless-integer>
  721. {
  722. auto child_transaction = transaction.create_child();
  723. auto const& second_value = values.consume_a_token();
  724. values.discard_whitespace();
  725. auto const& third_value = values.consume_a_token();
  726. if (is_sign(second_value) && is_signless_integer(third_value)) {
  727. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  728. child_transaction.commit();
  729. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  730. }
  731. }
  732. // <n-dimension>
  733. transaction.commit();
  734. return Selector::SimpleSelector::ANPlusBPattern { a, 0 };
  735. }
  736. // <ndash-dimension> <signless-integer>
  737. if (is_ndash_dimension(first_value)) {
  738. values.discard_whitespace();
  739. auto const& second_value = values.consume_a_token();
  740. if (is_signless_integer(second_value)) {
  741. int a = first_value.token().dimension_value_int();
  742. int b = -second_value.token().to_integer();
  743. transaction.commit();
  744. return Selector::SimpleSelector::ANPlusBPattern { a, b };
  745. }
  746. return syntax_error();
  747. }
  748. // <ndashdigit-dimension>
  749. if (is_ndashdigit_dimension(first_value)) {
  750. auto const& dimension = first_value.token();
  751. int a = dimension.dimension_value_int();
  752. auto maybe_b = dimension.dimension_unit().bytes_as_string_view().substring_view(1).to_number<int>();
  753. if (maybe_b.has_value()) {
  754. transaction.commit();
  755. return Selector::SimpleSelector::ANPlusBPattern { a, maybe_b.value() };
  756. }
  757. return syntax_error();
  758. }
  759. // <dashndashdigit-ident>
  760. if (is_dashndashdigit_ident(first_value)) {
  761. auto maybe_b = first_value.token().ident().bytes_as_string_view().substring_view(2).to_number<int>();
  762. if (maybe_b.has_value()) {
  763. transaction.commit();
  764. return Selector::SimpleSelector::ANPlusBPattern { -1, maybe_b.value() };
  765. }
  766. return syntax_error();
  767. }
  768. // -n
  769. // -n <signed-integer>
  770. // -n ['+' | '-'] <signless-integer>
  771. if (first_value.is_ident("-n"sv)) {
  772. values.discard_whitespace();
  773. // -n <signed-integer>
  774. if (is_signed_integer(values.next_token())) {
  775. int b = values.consume_a_token().token().to_integer();
  776. transaction.commit();
  777. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  778. }
  779. // -n ['+' | '-'] <signless-integer>
  780. {
  781. auto child_transaction = transaction.create_child();
  782. auto const& second_value = values.consume_a_token();
  783. values.discard_whitespace();
  784. auto const& third_value = values.consume_a_token();
  785. if (is_sign(second_value) && is_signless_integer(third_value)) {
  786. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  787. child_transaction.commit();
  788. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  789. }
  790. }
  791. // -n
  792. transaction.commit();
  793. return Selector::SimpleSelector::ANPlusBPattern { -1, 0 };
  794. }
  795. // -n- <signless-integer>
  796. if (first_value.is_ident("-n-"sv)) {
  797. values.discard_whitespace();
  798. auto const& second_value = values.consume_a_token();
  799. if (is_signless_integer(second_value)) {
  800. int b = -second_value.token().to_integer();
  801. transaction.commit();
  802. return Selector::SimpleSelector::ANPlusBPattern { -1, b };
  803. }
  804. return syntax_error();
  805. }
  806. // All that's left now are these:
  807. // '+'?† n
  808. // '+'?† n <signed-integer>
  809. // '+'?† n ['+' | '-'] <signless-integer>
  810. // '+'?† n- <signless-integer>
  811. // '+'?† <ndashdigit-ident>
  812. // In all of these cases, the + is optional, and has no effect.
  813. // So, we just skip the +, and carry on.
  814. if (!first_value.is_delim('+')) {
  815. values.reconsume_current_input_token();
  816. // We do *not* skip whitespace here.
  817. }
  818. auto const& first_after_plus = values.consume_a_token();
  819. // '+'?† n
  820. // '+'?† n <signed-integer>
  821. // '+'?† n ['+' | '-'] <signless-integer>
  822. if (first_after_plus.is_ident("n"sv)) {
  823. values.discard_whitespace();
  824. // '+'?† n <signed-integer>
  825. if (is_signed_integer(values.next_token())) {
  826. int b = values.consume_a_token().token().to_integer();
  827. transaction.commit();
  828. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  829. }
  830. // '+'?† n ['+' | '-'] <signless-integer>
  831. {
  832. auto child_transaction = transaction.create_child();
  833. auto const& second_value = values.consume_a_token();
  834. values.discard_whitespace();
  835. auto const& third_value = values.consume_a_token();
  836. if (is_sign(second_value) && is_signless_integer(third_value)) {
  837. int b = third_value.token().to_integer() * (second_value.is_delim('+') ? 1 : -1);
  838. child_transaction.commit();
  839. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  840. }
  841. }
  842. // '+'?† n
  843. transaction.commit();
  844. return Selector::SimpleSelector::ANPlusBPattern { 1, 0 };
  845. }
  846. // '+'?† n- <signless-integer>
  847. if (first_after_plus.is_ident("n-"sv)) {
  848. values.discard_whitespace();
  849. auto const& second_value = values.consume_a_token();
  850. if (is_signless_integer(second_value)) {
  851. int b = -second_value.token().to_integer();
  852. transaction.commit();
  853. return Selector::SimpleSelector::ANPlusBPattern { 1, b };
  854. }
  855. return syntax_error();
  856. }
  857. // '+'?† <ndashdigit-ident>
  858. if (is_ndashdigit_ident(first_after_plus)) {
  859. auto maybe_b = first_after_plus.token().ident().bytes_as_string_view().substring_view(1).to_number<int>();
  860. if (maybe_b.has_value()) {
  861. transaction.commit();
  862. return Selector::SimpleSelector::ANPlusBPattern { 1, maybe_b.value() };
  863. }
  864. return syntax_error();
  865. }
  866. return syntax_error();
  867. }
  868. }