Parser.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/SourceLocation.h>
  7. #include <LibWeb/CSS/Parser/AtStyleRule.h>
  8. #include <LibWeb/CSS/Parser/DeclarationOrAtRule.h>
  9. #include <LibWeb/CSS/Parser/Parser.h>
  10. #include <LibWeb/CSS/Parser/QualifiedStyleRule.h>
  11. #include <LibWeb/CSS/Parser/StyleBlockRule.h>
  12. #include <LibWeb/CSS/Parser/StyleComponentValueRule.h>
  13. #include <LibWeb/CSS/Parser/StyleFunctionRule.h>
  14. #include <LibWeb/CSS/Selector.h>
  15. #include <LibWeb/Dump.h>
  16. #define CSS_PARSER_TRACE 1
  17. static void log_parse_error(const SourceLocation& location = SourceLocation::current())
  18. {
  19. dbgln_if(CSS_PARSER_TRACE, "Parse error (CSS) {}", location);
  20. }
  21. namespace Web::CSS {
  22. Parser::Parser(const StringView& input, const String& encoding)
  23. : m_tokenizer(input, encoding)
  24. {
  25. m_tokens = m_tokenizer.parse();
  26. }
  27. Parser::~Parser()
  28. {
  29. }
  30. Token Parser::peek_token()
  31. {
  32. size_t next_offset = m_iterator_offset + 1;
  33. if (next_offset < m_tokens.size()) {
  34. return m_tokens.at(next_offset);
  35. }
  36. return m_tokens.at(m_iterator_offset);
  37. }
  38. Token Parser::next_token()
  39. {
  40. if (m_iterator_offset < (int)m_tokens.size()) {
  41. ++m_iterator_offset;
  42. }
  43. auto token = m_tokens.at(m_iterator_offset);
  44. return token;
  45. }
  46. Token Parser::current_token()
  47. {
  48. return m_tokens.at(m_iterator_offset);
  49. }
  50. Vector<QualifiedStyleRule> Parser::parse_as_stylesheet()
  51. {
  52. auto rules = consume_a_list_of_rules(true);
  53. dbgln("Printing rules:");
  54. for (auto& rule : rules) {
  55. dbgln("PRE:");
  56. for (auto& pre : rule.m_prelude) {
  57. dbgln("{}", pre);
  58. }
  59. dbgln("BLOCK:");
  60. dbgln("{}", rule.m_block.to_string());
  61. dbgln("");
  62. auto selectors = parse_selectors(rule.m_prelude);
  63. CSS::Selector selector = Selector(move(selectors));
  64. dump_selector(selector);
  65. }
  66. return rules;
  67. }
  68. Vector<CSS::Selector::ComplexSelector> Parser::parse_selectors(Vector<String> parts)
  69. {
  70. // TODO:
  71. // This is a mess because the prelude is parsed as a string.
  72. // It should really be parsed as its class, but the cpp gods have forsaken me
  73. // and I can't make it work due to cyclic includes.
  74. Vector<CSS::Selector::ComplexSelector> selectors;
  75. size_t index = 0;
  76. auto parse_simple_selector = [&]() -> Optional<CSS::Selector::SimpleSelector> {
  77. if (index >= parts.size()) {
  78. return {};
  79. }
  80. auto currentToken = parts.at(index);
  81. CSS::Selector::SimpleSelector::Type type;
  82. if (currentToken == "*") {
  83. type = CSS::Selector::SimpleSelector::Type::Universal;
  84. index++;
  85. CSS::Selector::SimpleSelector result;
  86. result.type = type;
  87. return result;
  88. }
  89. if (currentToken == ".") {
  90. type = CSS::Selector::SimpleSelector::Type::Class;
  91. } else if (currentToken == "#") {
  92. type = CSS::Selector::SimpleSelector::Type::Id;
  93. } else if (currentToken == "*") {
  94. type = CSS::Selector::SimpleSelector::Type::Universal;
  95. } else {
  96. type = CSS::Selector::SimpleSelector::Type::TagName;
  97. }
  98. index++;
  99. auto value = currentToken;
  100. if (type == CSS::Selector::SimpleSelector::Type::TagName) {
  101. value = value.to_lowercase();
  102. }
  103. CSS::Selector::SimpleSelector simple_selector;
  104. simple_selector.type = type;
  105. simple_selector.value = value;
  106. if (index >= parts.size()) {
  107. return simple_selector;
  108. }
  109. currentToken = parts.at(index);
  110. if (currentToken.starts_with('[')) {
  111. auto adjusted = currentToken.substring(1, currentToken.length() - 2);
  112. // TODO: split on String :^)
  113. Vector<String> attribute_parts = adjusted.split(',');
  114. simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::HasAttribute;
  115. simple_selector.attribute_name = attribute_parts.first();
  116. size_t attribute_index = 1;
  117. if (attribute_index >= attribute_parts.size()) {
  118. return simple_selector;
  119. }
  120. if (attribute_parts.at(attribute_index) == " =") {
  121. simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
  122. attribute_index++;
  123. }
  124. if (attribute_parts.at(attribute_index) == " ~") {
  125. simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::Contains;
  126. attribute_index += 2;
  127. }
  128. if (attribute_parts.at(attribute_index) == " |") {
  129. simple_selector.attribute_match_type = CSS::Selector::SimpleSelector::AttributeMatchType::StartsWith;
  130. attribute_index += 2;
  131. }
  132. simple_selector.attribute_value = attribute_parts.at(attribute_index);
  133. return simple_selector;
  134. }
  135. if (currentToken == ":") {
  136. bool is_pseudo = false;
  137. index++;
  138. if (index >= parts.size()) {
  139. return {};
  140. }
  141. currentToken = parts.at(index);
  142. if (currentToken == ":") {
  143. is_pseudo = true;
  144. index++;
  145. }
  146. if (index >= parts.size()) {
  147. return {};
  148. }
  149. currentToken = parts.at(index);
  150. auto pseudo_name = currentToken;
  151. index++;
  152. // Ignore for now, otherwise we produce a "false positive" selector
  153. // and apply styles to the element itself, not its pseudo element
  154. if (is_pseudo) {
  155. return {};
  156. }
  157. if (pseudo_name.equals_ignoring_case("link")) {
  158. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Link;
  159. } else if (pseudo_name.equals_ignoring_case("visited")) {
  160. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Visited;
  161. } else if (pseudo_name.equals_ignoring_case("hover")) {
  162. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Hover;
  163. } else if (pseudo_name.equals_ignoring_case("focus")) {
  164. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Focus;
  165. } else if (pseudo_name.equals_ignoring_case("first-child")) {
  166. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::FirstChild;
  167. } else if (pseudo_name.equals_ignoring_case("last-child")) {
  168. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::LastChild;
  169. } else if (pseudo_name.equals_ignoring_case("only-child")) {
  170. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::OnlyChild;
  171. } else if (pseudo_name.equals_ignoring_case("empty")) {
  172. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Empty;
  173. } else if (pseudo_name.equals_ignoring_case("root")) {
  174. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::Root;
  175. } else if (pseudo_name.equals_ignoring_case("first-of-type")) {
  176. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::FirstOfType;
  177. } else if (pseudo_name.equals_ignoring_case("last-of-type")) {
  178. simple_selector.pseudo_class = CSS::Selector::SimpleSelector::PseudoClass::LastOfType;
  179. } else if (pseudo_name.equals_ignoring_case("before")) {
  180. simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::Before;
  181. } else if (pseudo_name.equals_ignoring_case("after")) {
  182. simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::After;
  183. } else {
  184. dbgln("Unknown pseudo class: '{}'", pseudo_name);
  185. return simple_selector;
  186. }
  187. }
  188. return simple_selector;
  189. };
  190. auto parse_complex_selector = [&]() -> Optional<CSS::Selector::ComplexSelector> {
  191. auto relation = CSS::Selector::ComplexSelector::Relation::Descendant;
  192. auto currentToken = parts.at(index);
  193. if (is_combinator(currentToken)) {
  194. if (currentToken == ">") {
  195. relation = CSS::Selector::ComplexSelector::Relation::ImmediateChild;
  196. }
  197. if (currentToken == "+") {
  198. relation = CSS::Selector::ComplexSelector::Relation::AdjacentSibling;
  199. }
  200. if (currentToken == "~") {
  201. relation = CSS::Selector::ComplexSelector::Relation::GeneralSibling;
  202. }
  203. if (currentToken == "||") {
  204. relation = CSS::Selector::ComplexSelector::Relation::Column;
  205. }
  206. index++;
  207. }
  208. Vector<CSS::Selector::SimpleSelector> simple_selectors;
  209. for (;;) {
  210. auto component = parse_simple_selector();
  211. if (!component.has_value()) {
  212. break;
  213. }
  214. simple_selectors.append(component.value());
  215. }
  216. if (simple_selectors.is_empty())
  217. return {};
  218. return CSS::Selector::ComplexSelector { relation, move(simple_selectors) };
  219. };
  220. for (;;) {
  221. auto complex = parse_complex_selector();
  222. if (complex.has_value()) {
  223. selectors.append(complex.value());
  224. }
  225. if (index >= parts.size()) {
  226. break;
  227. }
  228. auto currentToken = parts.at(index);
  229. if (currentToken != ",") {
  230. break;
  231. }
  232. index++;
  233. }
  234. if (selectors.is_empty()) {
  235. return {};
  236. }
  237. selectors.first().relation = CSS::Selector::ComplexSelector::Relation::None;
  238. return selectors;
  239. }
  240. void Parser::dump_all_tokens()
  241. {
  242. dbgln("Dumping all tokens:");
  243. for (auto& token : m_tokens)
  244. dbgln("{}", token.to_string());
  245. }
  246. void Parser::reconsume_current_input_token()
  247. {
  248. --m_iterator_offset;
  249. }
  250. bool Parser::is_combinator(String input)
  251. {
  252. return input == ">" || input == "+" || input == "~" || input == "||";
  253. }
  254. Vector<QualifiedStyleRule> Parser::consume_a_list_of_rules(bool top_level)
  255. {
  256. Vector<QualifiedStyleRule> rules;
  257. for (;;) {
  258. auto token = next_token();
  259. if (token.is_whitespace()) {
  260. continue;
  261. }
  262. if (token.is_eof()) {
  263. break;
  264. }
  265. if (token.is_cdo() || token.is_cdc()) {
  266. if (top_level) {
  267. continue;
  268. }
  269. reconsume_current_input_token();
  270. auto maybe_qualified = consume_a_qualified_rule();
  271. if (maybe_qualified.has_value()) {
  272. rules.append(maybe_qualified.value());
  273. }
  274. continue;
  275. }
  276. if (token.is_at()) {
  277. reconsume_current_input_token();
  278. rules.append(consume_an_at_rule());
  279. continue;
  280. }
  281. reconsume_current_input_token();
  282. auto maybe_qualified = consume_a_qualified_rule();
  283. if (maybe_qualified.has_value()) {
  284. rules.append(maybe_qualified.value());
  285. }
  286. }
  287. return rules;
  288. }
  289. AtStyleRule Parser::consume_an_at_rule()
  290. {
  291. auto initial = next_token();
  292. AtStyleRule rule;
  293. rule.m_name = initial.m_value.to_string();
  294. for (;;) {
  295. auto token = next_token();
  296. if (token.is_semicolon()) {
  297. return rule;
  298. }
  299. if (token.is_eof()) {
  300. log_parse_error();
  301. return rule;
  302. }
  303. if (token.is_open_curly()) {
  304. rule.m_block = consume_a_simple_block();
  305. return rule;
  306. }
  307. // how is "simple block with an associated token of <{-token>" a valid token?
  308. reconsume_current_input_token();
  309. auto value = consume_a_component_value();
  310. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  311. if (value.m_token.is_whitespace()) {
  312. continue;
  313. }
  314. }
  315. rule.m_prelude.append(value.to_string());
  316. }
  317. }
  318. Optional<QualifiedStyleRule> Parser::consume_a_qualified_rule()
  319. {
  320. QualifiedStyleRule rule;
  321. for (;;) {
  322. auto token = next_token();
  323. if (token.is_eof()) {
  324. log_parse_error();
  325. return {};
  326. }
  327. if (token.is_open_curly()) {
  328. rule.m_block = consume_a_simple_block();
  329. return rule;
  330. }
  331. // how is "simple block with an associated token of <{-token>" a valid token?
  332. reconsume_current_input_token();
  333. auto value = consume_a_component_value();
  334. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  335. if (value.m_token.is_whitespace()) {
  336. continue;
  337. }
  338. }
  339. rule.m_prelude.append(value.to_string());
  340. }
  341. return rule;
  342. }
  343. StyleComponentValueRule Parser::consume_a_component_value()
  344. {
  345. auto token = next_token();
  346. if (token.is_open_curly() || token.is_open_square() || token.is_open_paren()) {
  347. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Block);
  348. component.m_block = consume_a_simple_block();
  349. return component;
  350. }
  351. if (token.is_function()) {
  352. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Function);
  353. component.m_function = consume_a_function();
  354. return component;
  355. }
  356. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  357. component.m_token = token;
  358. return component;
  359. }
  360. StyleBlockRule Parser::consume_a_simple_block()
  361. {
  362. auto ending_token = current_token().mirror_variant();
  363. StyleBlockRule block;
  364. block.m_token = current_token();
  365. for (;;) {
  366. auto token = next_token();
  367. if (token.m_type == ending_token) {
  368. return block;
  369. }
  370. if (token.is_eof()) {
  371. log_parse_error();
  372. return block;
  373. }
  374. reconsume_current_input_token();
  375. auto value = consume_a_component_value();
  376. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  377. if (value.m_token.is_whitespace()) {
  378. continue;
  379. }
  380. }
  381. block.m_values.append(value.to_string());
  382. }
  383. }
  384. StyleFunctionRule Parser::consume_a_function()
  385. {
  386. StyleFunctionRule function;
  387. function.m_name = current_token().m_value.to_string();
  388. for (;;) {
  389. auto token = next_token();
  390. if (token.is_close_paren()) {
  391. return function;
  392. }
  393. if (token.is_eof()) {
  394. log_parse_error();
  395. return function;
  396. }
  397. reconsume_current_input_token();
  398. auto value = consume_a_component_value();
  399. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  400. if (value.m_token.is_whitespace()) {
  401. continue;
  402. }
  403. }
  404. function.m_values.append(value.to_string());
  405. }
  406. return function;
  407. }
  408. Optional<StyleDeclarationRule> Parser::consume_a_declaration(Vector<StyleComponentValueRule>)
  409. {
  410. TODO();
  411. }
  412. Optional<StyleDeclarationRule> Parser::consume_a_declaration()
  413. {
  414. auto token = next_token();
  415. StyleDeclarationRule declaration;
  416. declaration.m_name = token.m_value.to_string();
  417. for (;;) {
  418. if (!peek_token().is_whitespace()) {
  419. break;
  420. }
  421. next_token();
  422. }
  423. auto colon = next_token();
  424. if (!colon.is_colon()) {
  425. log_parse_error();
  426. return {};
  427. }
  428. for (;;) {
  429. if (!peek_token().is_whitespace()) {
  430. break;
  431. }
  432. next_token();
  433. }
  434. for (;;) {
  435. if (peek_token().is_eof()) {
  436. break;
  437. }
  438. declaration.m_values.append(consume_a_component_value());
  439. }
  440. auto second_last = declaration.m_values.at(declaration.m_values.size() - 2);
  441. auto last = declaration.m_values.at(declaration.m_values.size() - 1);
  442. if (second_last.m_type == StyleComponentValueRule::ComponentType::Token && last.m_type == StyleComponentValueRule::ComponentType::Token) {
  443. auto last_token = last.m_token;
  444. auto second_last_token = second_last.m_token;
  445. if (second_last_token.is_delim() && second_last_token.m_value.to_string().equals_ignoring_case("!")) {
  446. if (last_token.is_ident() && last_token.m_value.to_string().equals_ignoring_case("important")) {
  447. declaration.m_values.remove(declaration.m_values.size() - 2);
  448. declaration.m_values.remove(declaration.m_values.size() - 1);
  449. declaration.m_important = true;
  450. }
  451. }
  452. }
  453. for (;;) {
  454. auto maybe_whitespace = declaration.m_values.at(declaration.m_values.size() - 1);
  455. if (!(maybe_whitespace.m_type == StyleComponentValueRule::ComponentType::Token && maybe_whitespace.m_token.is_whitespace())) {
  456. break;
  457. }
  458. declaration.m_values.remove(declaration.m_values.size() - 1);
  459. }
  460. return declaration;
  461. }
  462. Vector<DeclarationOrAtRule> Parser::consume_a_list_of_declarations()
  463. {
  464. Vector<DeclarationOrAtRule> list;
  465. for (;;) {
  466. auto token = next_token();
  467. if (token.is_whitespace() || token.is_semicolon()) {
  468. continue;
  469. }
  470. if (token.is_eof()) {
  471. return list;
  472. }
  473. if (token.is_at()) {
  474. reconsume_current_input_token();
  475. list.append(DeclarationOrAtRule(consume_an_at_rule()));
  476. continue;
  477. }
  478. if (token.is_ident()) {
  479. Vector<StyleComponentValueRule> temp;
  480. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  481. component.m_token = token;
  482. temp.append(component);
  483. for (;;) {
  484. auto peek = peek_token();
  485. if (peek.is_semicolon() || peek.is_eof()) {
  486. break;
  487. }
  488. temp.append(consume_a_component_value());
  489. }
  490. auto maybe_declaration = consume_a_declaration(temp);
  491. if (maybe_declaration.has_value()) {
  492. list.append(DeclarationOrAtRule(maybe_declaration.value()));
  493. }
  494. }
  495. log_parse_error();
  496. reconsume_current_input_token();
  497. auto peek = peek_token();
  498. if (!(peek.is_semicolon() || peek.is_eof())) {
  499. consume_a_component_value();
  500. }
  501. }
  502. return list;
  503. }
  504. Optional<QualifiedStyleRule> Parser::parse_as_rule()
  505. {
  506. Optional<QualifiedStyleRule> rule;
  507. for (;;) {
  508. auto maybe_whitespace = peek_token();
  509. if (!maybe_whitespace.is_whitespace()) {
  510. break;
  511. }
  512. next_token();
  513. }
  514. auto token = peek_token();
  515. if (token.is_eof()) {
  516. return {};
  517. }
  518. if (token.is_at()) {
  519. rule = consume_an_at_rule();
  520. } else {
  521. rule = consume_a_qualified_rule();
  522. }
  523. for (;;) {
  524. auto maybe_whitespace = peek_token();
  525. if (!maybe_whitespace.is_whitespace()) {
  526. break;
  527. }
  528. next_token();
  529. }
  530. auto maybe_eof = peek_token();
  531. if (maybe_eof.is_eof()) {
  532. return rule;
  533. }
  534. return {};
  535. }
  536. Vector<QualifiedStyleRule> Parser::parse_as_list_of_rules()
  537. {
  538. return consume_a_list_of_rules(false);
  539. }
  540. Optional<StyleDeclarationRule> Parser::parse_as_declaration()
  541. {
  542. for (;;) {
  543. auto maybe_whitespace = peek_token();
  544. if (!maybe_whitespace.is_whitespace()) {
  545. break;
  546. }
  547. next_token();
  548. }
  549. auto token = peek_token();
  550. if (!token.is_ident()) {
  551. return {};
  552. }
  553. return consume_a_declaration();
  554. }
  555. Vector<DeclarationOrAtRule> Parser::parse_as_list_of_declarations()
  556. {
  557. return consume_a_list_of_declarations();
  558. }
  559. Optional<StyleComponentValueRule> Parser::parse_as_component_value()
  560. {
  561. for (;;) {
  562. auto maybe_whitespace = peek_token();
  563. if (!maybe_whitespace.is_whitespace()) {
  564. break;
  565. }
  566. next_token();
  567. }
  568. auto token = peek_token();
  569. if (token.is_eof()) {
  570. return {};
  571. }
  572. auto value = consume_a_component_value();
  573. for (;;) {
  574. auto maybe_whitespace = peek_token();
  575. if (!maybe_whitespace.is_whitespace()) {
  576. break;
  577. }
  578. next_token();
  579. }
  580. auto maybe_eof = peek_token();
  581. if (maybe_eof.is_eof()) {
  582. return value;
  583. }
  584. return {};
  585. }
  586. Vector<StyleComponentValueRule> Parser::parse_as_list_of_component_values()
  587. {
  588. Vector<StyleComponentValueRule> rules;
  589. for (;;) {
  590. if (peek_token().is_eof()) {
  591. break;
  592. }
  593. rules.append(consume_a_component_value());
  594. }
  595. return rules;
  596. }
  597. Vector<StyleComponentValueRule> Parser::parse_as_list_of_comma_separated_component_values()
  598. {
  599. Vector<StyleComponentValueRule> rules;
  600. for (;;) {
  601. rules.append(consume_a_component_value());
  602. if (peek_token().is_comma())
  603. continue;
  604. if (peek_token().is_eof())
  605. break;
  606. }
  607. return rules;
  608. }
  609. }