Parser.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. /*
  2. * Copyright (c) 2020-2021, SerenityOS developers
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/NonnullOwnPtrVector.h>
  27. #include <AK/Vector.h>
  28. #include <LibWeb/CSS/CSSStyleRule.h>
  29. #include <LibWeb/CSS/Parser/AtStyleRule.h>
  30. #include <LibWeb/CSS/Parser/DeclarationOrAtRule.h>
  31. #include <LibWeb/CSS/Parser/Parser.h>
  32. #include <LibWeb/CSS/Parser/QualifiedStyleRule.h>
  33. #include <LibWeb/CSS/Parser/StyleBlockRule.h>
  34. #include <LibWeb/CSS/Parser/StyleComponentValueRule.h>
  35. #include <LibWeb/CSS/Parser/StyleFunctionRule.h>
  36. #include <LibWeb/CSS/Selector.h>
  37. #define CSS_PARSER_TRACE 1
  38. #define PARSE_ERROR() \
  39. do { \
  40. dbgln_if(CSS_PARSER_TRACE, "Parse error (CSS) {} @ {}", __PRETTY_FUNCTION__, __LINE__); \
  41. } while (0)
  42. namespace Web::CSS {
  43. Parser::Parser(const StringView& input, const String& encoding)
  44. : m_tokenizer(input, encoding)
  45. {
  46. m_tokens = m_tokenizer.parse();
  47. }
  48. Parser::~Parser()
  49. {
  50. }
  51. Token Parser::peek_token()
  52. {
  53. size_t next_offset = m_iterator_offset + 1;
  54. if (next_offset < m_tokens.size()) {
  55. return m_tokens.at(next_offset);
  56. }
  57. return m_tokens.at(m_iterator_offset);
  58. }
  59. Token Parser::next_token()
  60. {
  61. if (m_iterator_offset < (int)m_tokens.size()) {
  62. ++m_iterator_offset;
  63. }
  64. auto token = m_tokens.at(m_iterator_offset);
  65. return token;
  66. }
  67. Token Parser::current_token()
  68. {
  69. return m_tokens.at(m_iterator_offset);
  70. }
  71. Vector<QualifiedStyleRule> Parser::parse_as_stylesheet()
  72. {
  73. auto rules = consume_a_list_of_rules(true);
  74. dbgln("Printing rules:");
  75. for (auto& rule : rules) {
  76. dbgln("PRE:");
  77. for (auto& pre : rule.m_prelude) {
  78. dbgln("{}", pre);
  79. }
  80. dbgln("BLOCK:");
  81. dbgln("{}", rule.m_block.to_string());
  82. dbgln("");
  83. auto selectors = parse_selectors(rule.m_prelude);
  84. }
  85. return rules;
  86. }
  87. Vector<CSS::Selector::ComplexSelector> Parser::parse_selectors(Vector<String> parts)
  88. {
  89. (void)parts;
  90. Vector<CSS::Selector::ComplexSelector> selectors;
  91. return selectors;
  92. }
  93. void Parser::dump_all_tokens()
  94. {
  95. dbgln("Dumping all tokens:");
  96. for (auto& token : m_tokens)
  97. dbgln("{}", token.to_string());
  98. }
  99. void Parser::reconsume_current_input_token()
  100. {
  101. --m_iterator_offset;
  102. }
  103. Vector<QualifiedStyleRule> Parser::consume_a_list_of_rules(bool top_level)
  104. {
  105. Vector<QualifiedStyleRule> rules;
  106. for (;;) {
  107. auto token = next_token();
  108. if (token.is_whitespace()) {
  109. continue;
  110. }
  111. if (token.is_eof()) {
  112. break;
  113. }
  114. if (token.is_cdo() || token.is_cdc()) {
  115. if (top_level) {
  116. continue;
  117. }
  118. reconsume_current_input_token();
  119. auto maybe_qualified = consume_a_qualified_rule();
  120. if (maybe_qualified.has_value()) {
  121. rules.append(maybe_qualified.value());
  122. }
  123. continue;
  124. }
  125. if (token.is_at()) {
  126. reconsume_current_input_token();
  127. rules.append(consume_an_at_rule());
  128. continue;
  129. }
  130. reconsume_current_input_token();
  131. auto maybe_qualified = consume_a_qualified_rule();
  132. if (maybe_qualified.has_value()) {
  133. rules.append(maybe_qualified.value());
  134. }
  135. }
  136. return rules;
  137. }
  138. AtStyleRule Parser::consume_an_at_rule()
  139. {
  140. auto initial = next_token();
  141. AtStyleRule rule;
  142. rule.m_name = initial.m_value.to_string();
  143. for (;;) {
  144. auto token = next_token();
  145. if (token.is_semicolon()) {
  146. return rule;
  147. }
  148. if (token.is_eof()) {
  149. PARSE_ERROR();
  150. return rule;
  151. }
  152. if (token.is_open_curly()) {
  153. rule.m_block = consume_a_simple_block();
  154. return rule;
  155. }
  156. // how is "simple block with an associated token of <{-token>" a valid token?
  157. reconsume_current_input_token();
  158. auto value = consume_a_component_value();
  159. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  160. if (value.m_token.is_whitespace()) {
  161. continue;
  162. }
  163. }
  164. rule.m_prelude.append(value.to_string());
  165. }
  166. }
  167. Optional<QualifiedStyleRule> Parser::consume_a_qualified_rule()
  168. {
  169. QualifiedStyleRule rule;
  170. for (;;) {
  171. auto token = next_token();
  172. if (token.is_eof()) {
  173. PARSE_ERROR();
  174. return {};
  175. }
  176. if (token.is_open_curly()) {
  177. rule.m_block = consume_a_simple_block();
  178. return rule;
  179. }
  180. // how is "simple block with an associated token of <{-token>" a valid token?
  181. reconsume_current_input_token();
  182. auto value = consume_a_component_value();
  183. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  184. if (value.m_token.is_whitespace()) {
  185. continue;
  186. }
  187. }
  188. rule.m_prelude.append(value.to_string());
  189. }
  190. return rule;
  191. }
  192. StyleComponentValueRule Parser::consume_a_component_value()
  193. {
  194. auto token = next_token();
  195. if (token.is_open_curly() || token.is_open_square() || token.is_open_paren()) {
  196. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Block);
  197. component.m_block = consume_a_simple_block();
  198. return component;
  199. }
  200. if (token.is_function()) {
  201. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Function);
  202. component.m_function = consume_a_function();
  203. return component;
  204. }
  205. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  206. component.m_token = token;
  207. return component;
  208. }
  209. StyleBlockRule Parser::consume_a_simple_block()
  210. {
  211. auto ending_token = current_token().mirror_variant();
  212. StyleBlockRule block;
  213. block.m_token = current_token();
  214. for (;;) {
  215. auto token = next_token();
  216. if (token.m_type == ending_token) {
  217. return block;
  218. }
  219. if (token.is_eof()) {
  220. PARSE_ERROR();
  221. return block;
  222. }
  223. reconsume_current_input_token();
  224. auto value = consume_a_component_value();
  225. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  226. if (value.m_token.is_whitespace()) {
  227. continue;
  228. }
  229. }
  230. block.m_values.append(value.to_string());
  231. }
  232. }
  233. StyleFunctionRule Parser::consume_a_function()
  234. {
  235. StyleFunctionRule function;
  236. function.m_name = current_token().m_value.to_string();
  237. for (;;) {
  238. auto token = next_token();
  239. if (token.is_close_paren()) {
  240. return function;
  241. }
  242. if (token.is_eof()) {
  243. PARSE_ERROR();
  244. return function;
  245. }
  246. reconsume_current_input_token();
  247. auto value = consume_a_component_value();
  248. if (value.m_type == StyleComponentValueRule::ComponentType::Token) {
  249. if (value.m_token.is_whitespace()) {
  250. continue;
  251. }
  252. }
  253. function.m_values.append(value.to_string());
  254. }
  255. return function;
  256. }
  257. Optional<StyleDeclarationRule> Parser::consume_a_declaration(Vector<StyleComponentValueRule>)
  258. {
  259. TODO();
  260. }
  261. Optional<StyleDeclarationRule> Parser::consume_a_declaration()
  262. {
  263. auto token = next_token();
  264. StyleDeclarationRule declaration;
  265. declaration.m_name = token.m_value.to_string();
  266. for (;;) {
  267. if (!peek_token().is_whitespace()) {
  268. break;
  269. }
  270. next_token();
  271. }
  272. auto colon = next_token();
  273. if (!colon.is_colon()) {
  274. PARSE_ERROR();
  275. return {};
  276. }
  277. for (;;) {
  278. if (!peek_token().is_whitespace()) {
  279. break;
  280. }
  281. next_token();
  282. }
  283. for (;;) {
  284. if (peek_token().is_eof()) {
  285. break;
  286. }
  287. declaration.m_values.append(consume_a_component_value());
  288. }
  289. auto second_last = declaration.m_values.at(declaration.m_values.size() - 2);
  290. auto last = declaration.m_values.at(declaration.m_values.size() - 1);
  291. if (second_last.m_type == StyleComponentValueRule::ComponentType::Token && last.m_type == StyleComponentValueRule::ComponentType::Token) {
  292. auto last_token = last.m_token;
  293. auto second_last_token = second_last.m_token;
  294. if (second_last_token.is_delim() && second_last_token.m_value.to_string().equals_ignoring_case("!")) {
  295. if (last_token.is_ident() && last_token.m_value.to_string().equals_ignoring_case("important")) {
  296. declaration.m_values.remove(declaration.m_values.size() - 2);
  297. declaration.m_values.remove(declaration.m_values.size() - 1);
  298. declaration.m_important = true;
  299. }
  300. }
  301. }
  302. for (;;) {
  303. auto maybe_whitespace = declaration.m_values.at(declaration.m_values.size() - 1);
  304. if (!(maybe_whitespace.m_type == StyleComponentValueRule::ComponentType::Token && maybe_whitespace.m_token.is_whitespace())) {
  305. break;
  306. }
  307. declaration.m_values.remove(declaration.m_values.size() - 1);
  308. }
  309. return declaration;
  310. }
  311. Vector<DeclarationOrAtRule> Parser::consume_a_list_of_declarations()
  312. {
  313. Vector<DeclarationOrAtRule> list;
  314. for (;;) {
  315. auto token = next_token();
  316. if (token.is_whitespace() || token.is_semicolon()) {
  317. continue;
  318. }
  319. if (token.is_eof()) {
  320. return list;
  321. }
  322. if (token.is_at()) {
  323. reconsume_current_input_token();
  324. list.append(DeclarationOrAtRule(consume_an_at_rule()));
  325. continue;
  326. }
  327. if (token.is_ident()) {
  328. Vector<StyleComponentValueRule> temp;
  329. auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token);
  330. component.m_token = token;
  331. temp.append(component);
  332. for (;;) {
  333. auto peek = peek_token();
  334. if (peek.is_semicolon() || peek.is_eof()) {
  335. break;
  336. }
  337. temp.append(consume_a_component_value());
  338. }
  339. auto maybe_declaration = consume_a_declaration(temp);
  340. if (maybe_declaration.has_value()) {
  341. list.append(DeclarationOrAtRule(maybe_declaration.value()));
  342. }
  343. }
  344. PARSE_ERROR();
  345. reconsume_current_input_token();
  346. auto peek = peek_token();
  347. if (!(peek.is_semicolon() || peek.is_eof())) {
  348. consume_a_component_value();
  349. }
  350. }
  351. return list;
  352. }
  353. Optional<QualifiedStyleRule> Parser::parse_as_rule()
  354. {
  355. Optional<QualifiedStyleRule> rule;
  356. for (;;) {
  357. auto maybe_whitespace = peek_token();
  358. if (!maybe_whitespace.is_whitespace()) {
  359. break;
  360. }
  361. next_token();
  362. }
  363. auto token = peek_token();
  364. if (token.is_eof()) {
  365. return {};
  366. }
  367. if (token.is_at()) {
  368. rule = consume_an_at_rule();
  369. } else {
  370. rule = consume_a_qualified_rule();
  371. }
  372. for (;;) {
  373. auto maybe_whitespace = peek_token();
  374. if (!token.is_whitespace()) {
  375. break;
  376. }
  377. next_token();
  378. }
  379. auto maybe_eof = peek_token();
  380. if (maybe_eof.is_eof()) {
  381. return rule;
  382. }
  383. return {};
  384. }
  385. Vector<QualifiedStyleRule> Parser::parse_as_list_of_rules()
  386. {
  387. return consume_a_list_of_rules(false);
  388. }
  389. Optional<StyleDeclarationRule> Parser::parse_as_declaration()
  390. {
  391. for (;;) {
  392. auto maybe_whitespace = peek_token();
  393. if (!maybe_whitespace.is_whitespace()) {
  394. break;
  395. }
  396. next_token();
  397. }
  398. auto token = peek_token();
  399. if (!token.is_ident()) {
  400. return {};
  401. }
  402. return consume_a_declaration();
  403. }
  404. Vector<DeclarationOrAtRule> Parser::parse_as_list_of_declarations()
  405. {
  406. return consume_a_list_of_declarations();
  407. }
  408. Optional<StyleComponentValueRule> Parser::parse_as_component_value()
  409. {
  410. for (;;) {
  411. auto maybe_whitespace = peek_token();
  412. if (!maybe_whitespace.is_whitespace()) {
  413. break;
  414. }
  415. next_token();
  416. }
  417. auto token = peek_token();
  418. if (token.is_eof()) {
  419. return {};
  420. }
  421. auto value = consume_a_component_value();
  422. for (;;) {
  423. auto maybe_whitespace = peek_token();
  424. if (!token.is_whitespace()) {
  425. break;
  426. }
  427. next_token();
  428. }
  429. auto maybe_eof = peek_token();
  430. if (maybe_eof.is_eof()) {
  431. return value;
  432. }
  433. return {};
  434. }
  435. Vector<StyleComponentValueRule> Parser::parse_as_list_of_component_values()
  436. {
  437. Vector<StyleComponentValueRule> rules;
  438. for (;;) {
  439. if (peek_token().is_eof()) {
  440. break;
  441. }
  442. rules.append(consume_a_component_value());
  443. }
  444. return rules;
  445. }
  446. Vector<StyleComponentValueRule> Parser::parse_as_list_of_comma_separated_component_values()
  447. {
  448. Vector<StyleComponentValueRule> rules;
  449. for (;;) {
  450. rules.append(consume_a_component_value());
  451. if (peek_token().is_comma())
  452. continue;
  453. if (peek_token().is_eof())
  454. break;
  455. }
  456. return rules;
  457. }
  458. }