Parser.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /*
  2. * Copyright (c) 2020-2021, SerenityOS developers
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/SourceLocation.h>
  7. #include <LibWeb/CSS/Parser/AtStyleRule.h>
  8. #include <LibWeb/CSS/Parser/DeclarationOrAtRule.h>
  9. #include <LibWeb/CSS/Parser/Parser.h>
  10. #include <LibWeb/CSS/Parser/QualifiedStyleRule.h>
  11. #include <LibWeb/CSS/Parser/StyleBlockRule.h>
  12. #include <LibWeb/CSS/Parser/StyleComponentValueRule.h>
  13. #include <LibWeb/CSS/Parser/StyleFunctionRule.h>
  14. #include <LibWeb/CSS/Selector.h>
  15. #define CSS_PARSER_TRACE 1
  16. static void log_parse_error(const SourceLocation& location = SourceLocation::current())
  17. {
  18. dbgln_if(CSS_PARSER_TRACE, "Parse error (CSS) {}", location);
  19. }
  20. namespace Web::CSS {
  21. Parser::Parser(const StringView& input, const String& encoding)
  22. : m_tokenizer(input, encoding)
  23. {
  24. m_tokens = m_tokenizer.parse();
  25. }
  26. Parser::~Parser()
  27. {
  28. }
  29. Token Parser::peek_token()
  30. {
  31. size_t next_offset = m_iterator_offset + 1;
  32. if (next_offset < m_tokens.size()) {
  33. return m_tokens.at(next_offset);
  34. }
  35. return m_tokens.at(m_iterator_offset);
  36. }
  37. Token Parser::next_token()
  38. {
  39. if (m_iterator_offset < (int)m_tokens.size()) {
  40. ++m_iterator_offset;
  41. }
  42. auto token = m_tokens.at(m_iterator_offset);
  43. return token;
  44. }
  45. Token Parser::current_token()
  46. {
  47. return m_tokens.at(m_iterator_offset);
  48. }
  49. Vector<QualifiedStyleRule> Parser::parse_as_stylesheet()
  50. {
  51. auto rules = consume_a_list_of_rules(true);
  52. dbgln("Printing rules:");
  53. for (auto& rule : rules) {
  54. dbgln("PRE:");
  55. for (auto& pre : rule.m_prelude) {
  56. dbgln("{}", pre);
  57. }
  58. dbgln("BLOCK:");
  59. dbgln("{}", rule.m_block.to_string());
  60. dbgln("");
  61. auto selectors = parse_selectors(rule.m_prelude);
  62. }
  63. return rules;
  64. }
  65. Vector<CSS::Selector::ComplexSelector> Parser::parse_selectors(Vector<String> parts)
  66. {
  67. (void)parts;
  68. Vector<CSS::Selector::ComplexSelector> selectors;
  69. return selectors;
  70. }
  71. void Parser::dump_all_tokens()
  72. {
  73. dbgln("Dumping all tokens:");
  74. for (auto& token : m_tokens)
  75. dbgln("{}", token.to_string());
  76. }
  77. void Parser::reconsume_current_input_token()
  78. {
  79. --m_iterator_offset;
  80. }
  81. Vector<QualifiedStyleRule> Parser::consume_a_list_of_rules(bool top_level)
  82. {
  83. Vector<QualifiedStyleRule> rules;
  84. for (;;) {
  85. auto token = next_token();
  86. if (token.is_whitespace()) {
  87. continue;
  88. }
  89. if (token.is_eof()) {
  90. break;
  91. }
  92. if (token.is_cdo() || token.is_cdc()) {
  93. if (top_level) {
  94. continue;
  95. }
  96. reconsume_current_input_token();
  97. auto maybe_qualified = consume_a_qualified_rule();
  98. if (maybe_qualified.has_value()) {
  99. rules.append(maybe_qualified.value());
  100. }
  101. continue;
  102. }
  103. if (token.is_at()) {
  104. reconsume_current_input_token();
  105. rules.append(consume_an_at_rule());
  106. continue;
  107. }
  108. reconsume_current_input_token();
  109. auto maybe_qualified = consume_a_qualified_rule();
  110. if (maybe_qualified.has_value()) {
  111. rules.append(maybe_qualified.value());
  112. }
  113. }
  114. return rules;
  115. }
  116. AtStyleRule Parser::consume_an_at_rule()
  117. {
  118. auto initial = next_token();
  119. AtStyleRule rule;
  120. rule.m_name = initial.m_value.to_string();
  121. for (;;) {
  122. auto token = next_token();
  123. if (token.is_semicolon()) {
  124. return rule;
  125. }
  126. if (token.is_eof()) {
  127. log_parse_error();
  128. return rule;
  129. }
  130. if (token.is_open_curly()) {
  131. rule.m_block = consume_a_simple_block();
  132. return rule;
  133. }
  134. // how is "simple block with an associated token of <{-token>" a valid token?
  135. reconsume_current_input_token();
  136. auto value = consume_a_component_value();
  137. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  138. if (value.m_token.is_whitespace()) {
  139. continue;
  140. }
  141. }
  142. rule.m_prelude.append(value.to_string());
  143. }
  144. }
  145. Optional<QualifiedStyleRule> Parser::consume_a_qualified_rule()
  146. {
  147. QualifiedStyleRule rule;
  148. for (;;) {
  149. auto token = next_token();
  150. if (token.is_eof()) {
  151. log_parse_error();
  152. return {};
  153. }
  154. if (token.is_open_curly()) {
  155. rule.m_block = consume_a_simple_block();
  156. return rule;
  157. }
  158. // how is "simple block with an associated token of <{-token>" a valid token?
  159. reconsume_current_input_token();
  160. auto value = consume_a_component_value();
  161. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  162. if (value.m_token.is_whitespace()) {
  163. continue;
  164. }
  165. }
  166. rule.m_prelude.append(value.to_string());
  167. }
  168. return rule;
  169. }
  170. StyleComponentValueRule Parser::consume_a_component_value()
  171. {
  172. auto token = next_token();
  173. if (token.is_open_curly() || token.is_open_square() || token.is_open_paren()) {
  174. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Block);
  175. component.m_block = consume_a_simple_block();
  176. return component;
  177. }
  178. if (token.is_function()) {
  179. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Function);
  180. component.m_function = consume_a_function();
  181. return component;
  182. }
  183. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  184. component.m_token = token;
  185. return component;
  186. }
  187. StyleBlockRule Parser::consume_a_simple_block()
  188. {
  189. auto ending_token = current_token().mirror_variant();
  190. StyleBlockRule block;
  191. block.m_token = current_token();
  192. for (;;) {
  193. auto token = next_token();
  194. if (token.m_type == ending_token) {
  195. return block;
  196. }
  197. if (token.is_eof()) {
  198. log_parse_error();
  199. return block;
  200. }
  201. reconsume_current_input_token();
  202. auto value = consume_a_component_value();
  203. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  204. if (value.m_token.is_whitespace()) {
  205. continue;
  206. }
  207. }
  208. block.m_values.append(value.to_string());
  209. }
  210. }
  211. StyleFunctionRule Parser::consume_a_function()
  212. {
  213. StyleFunctionRule function;
  214. function.m_name = current_token().m_value.to_string();
  215. for (;;) {
  216. auto token = next_token();
  217. if (token.is_close_paren()) {
  218. return function;
  219. }
  220. if (token.is_eof()) {
  221. log_parse_error();
  222. return function;
  223. }
  224. reconsume_current_input_token();
  225. auto value = consume_a_component_value();
  226. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  227. if (value.m_token.is_whitespace()) {
  228. continue;
  229. }
  230. }
  231. function.m_values.append(value.to_string());
  232. }
  233. return function;
  234. }
  235. Optional<StyleDeclarationRule> Parser::consume_a_declaration(Vector<StyleComponentValueRule>)
  236. {
  237. TODO();
  238. }
  239. Optional<StyleDeclarationRule> Parser::consume_a_declaration()
  240. {
  241. auto token = next_token();
  242. StyleDeclarationRule declaration;
  243. declaration.m_name = token.m_value.to_string();
  244. for (;;) {
  245. if (!peek_token().is_whitespace()) {
  246. break;
  247. }
  248. next_token();
  249. }
  250. auto colon = next_token();
  251. if (!colon.is_colon()) {
  252. log_parse_error();
  253. return {};
  254. }
  255. for (;;) {
  256. if (!peek_token().is_whitespace()) {
  257. break;
  258. }
  259. next_token();
  260. }
  261. for (;;) {
  262. if (peek_token().is_eof()) {
  263. break;
  264. }
  265. declaration.m_values.append(consume_a_component_value());
  266. }
  267. auto second_last = declaration.m_values.at(declaration.m_values.size() - 2);
  268. auto last = declaration.m_values.at(declaration.m_values.size() - 1);
  269. if (second_last.m_type == StyleComponentValueRule::ComponentType::Token && last.m_type == StyleComponentValueRule::ComponentType::Token) {
  270. auto last_token = last.m_token;
  271. auto second_last_token = second_last.m_token;
  272. if (second_last_token.is_delim() && second_last_token.m_value.to_string().equals_ignoring_case("!")) {
  273. if (last_token.is_ident() && last_token.m_value.to_string().equals_ignoring_case("important")) {
  274. declaration.m_values.remove(declaration.m_values.size() - 2);
  275. declaration.m_values.remove(declaration.m_values.size() - 1);
  276. declaration.m_important = true;
  277. }
  278. }
  279. }
  280. for (;;) {
  281. auto maybe_whitespace = declaration.m_values.at(declaration.m_values.size() - 1);
  282. if (!(maybe_whitespace.m_type == StyleComponentValueRule::ComponentType::Token && maybe_whitespace.m_token.is_whitespace())) {
  283. break;
  284. }
  285. declaration.m_values.remove(declaration.m_values.size() - 1);
  286. }
  287. return declaration;
  288. }
  289. Vector<DeclarationOrAtRule> Parser::consume_a_list_of_declarations()
  290. {
  291. Vector<DeclarationOrAtRule> list;
  292. for (;;) {
  293. auto token = next_token();
  294. if (token.is_whitespace() || token.is_semicolon()) {
  295. continue;
  296. }
  297. if (token.is_eof()) {
  298. return list;
  299. }
  300. if (token.is_at()) {
  301. reconsume_current_input_token();
  302. list.append(DeclarationOrAtRule(consume_an_at_rule()));
  303. continue;
  304. }
  305. if (token.is_ident()) {
  306. Vector<StyleComponentValueRule> temp;
  307. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  308. component.m_token = token;
  309. temp.append(component);
  310. for (;;) {
  311. auto peek = peek_token();
  312. if (peek.is_semicolon() || peek.is_eof()) {
  313. break;
  314. }
  315. temp.append(consume_a_component_value());
  316. }
  317. auto maybe_declaration = consume_a_declaration(temp);
  318. if (maybe_declaration.has_value()) {
  319. list.append(DeclarationOrAtRule(maybe_declaration.value()));
  320. }
  321. }
  322. log_parse_error();
  323. reconsume_current_input_token();
  324. auto peek = peek_token();
  325. if (!(peek.is_semicolon() || peek.is_eof())) {
  326. consume_a_component_value();
  327. }
  328. }
  329. return list;
  330. }
  331. Optional<QualifiedStyleRule> Parser::parse_as_rule()
  332. {
  333. Optional<QualifiedStyleRule> rule;
  334. for (;;) {
  335. auto maybe_whitespace = peek_token();
  336. if (!maybe_whitespace.is_whitespace()) {
  337. break;
  338. }
  339. next_token();
  340. }
  341. auto token = peek_token();
  342. if (token.is_eof()) {
  343. return {};
  344. }
  345. if (token.is_at()) {
  346. rule = consume_an_at_rule();
  347. } else {
  348. rule = consume_a_qualified_rule();
  349. }
  350. for (;;) {
  351. auto maybe_whitespace = peek_token();
  352. if (!token.is_whitespace()) {
  353. break;
  354. }
  355. next_token();
  356. }
  357. auto maybe_eof = peek_token();
  358. if (maybe_eof.is_eof()) {
  359. return rule;
  360. }
  361. return {};
  362. }
  363. Vector<QualifiedStyleRule> Parser::parse_as_list_of_rules()
  364. {
  365. return consume_a_list_of_rules(false);
  366. }
  367. Optional<StyleDeclarationRule> Parser::parse_as_declaration()
  368. {
  369. for (;;) {
  370. auto maybe_whitespace = peek_token();
  371. if (!maybe_whitespace.is_whitespace()) {
  372. break;
  373. }
  374. next_token();
  375. }
  376. auto token = peek_token();
  377. if (!token.is_ident()) {
  378. return {};
  379. }
  380. return consume_a_declaration();
  381. }
  382. Vector<DeclarationOrAtRule> Parser::parse_as_list_of_declarations()
  383. {
  384. return consume_a_list_of_declarations();
  385. }
  386. Optional<StyleComponentValueRule> Parser::parse_as_component_value()
  387. {
  388. for (;;) {
  389. auto maybe_whitespace = peek_token();
  390. if (!maybe_whitespace.is_whitespace()) {
  391. break;
  392. }
  393. next_token();
  394. }
  395. auto token = peek_token();
  396. if (token.is_eof()) {
  397. return {};
  398. }
  399. auto value = consume_a_component_value();
  400. for (;;) {
  401. auto maybe_whitespace = peek_token();
  402. if (!token.is_whitespace()) {
  403. break;
  404. }
  405. next_token();
  406. }
  407. auto maybe_eof = peek_token();
  408. if (maybe_eof.is_eof()) {
  409. return value;
  410. }
  411. return {};
  412. }
  413. Vector<StyleComponentValueRule> Parser::parse_as_list_of_component_values()
  414. {
  415. Vector<StyleComponentValueRule> rules;
  416. for (;;) {
  417. if (peek_token().is_eof()) {
  418. break;
  419. }
  420. rules.append(consume_a_component_value());
  421. }
  422. return rules;
  423. }
  424. Vector<StyleComponentValueRule> Parser::parse_as_list_of_comma_separated_component_values()
  425. {
  426. Vector<StyleComponentValueRule> rules;
  427. for (;;) {
  428. rules.append(consume_a_component_value());
  429. if (peek_token().is_comma())
  430. continue;
  431. if (peek_token().is_eof())
  432. break;
  433. }
  434. return rules;
  435. }
  436. }