CSSParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. #include <AK/HashMap.h>
  2. #include <LibHTML/CSS/PropertyID.h>
  3. #include <LibHTML/CSS/StyleSheet.h>
  4. #include <LibHTML/Parser/CSSParser.h>
  5. #include <ctype.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #define PARSE_ASSERT(x) \
  9. if (!(x)) { \
  10. dbg() << "CSS PARSER ASSERTION FAILED: " << #x; \
  11. dbg() << "At character# " << index << " in CSS: _" << css << "_"; \
  12. ASSERT_NOT_REACHED(); \
  13. }
  14. static Optional<Color> parse_css_color(const StringView& view)
  15. {
  16. auto color = Color::from_string(view);
  17. if (color.has_value())
  18. return color;
  19. // FIXME: Parse all valid color strings :^)
  20. return {};
  21. }
  22. static Optional<float> try_parse_float(const StringView& string)
  23. {
  24. const char* str = string.characters_without_null_termination();
  25. size_t len = string.length();
  26. size_t weight = 1;
  27. int exp_val = 0;
  28. float value = 0.0f;
  29. float fraction = 0.0f;
  30. bool has_sign = false;
  31. bool is_negative = false;
  32. bool is_fractional = false;
  33. bool is_scientific = false;
  34. if (str[0] == '-') {
  35. is_negative = true;
  36. has_sign = true;
  37. }
  38. if (str[0] == '+') {
  39. has_sign = true;
  40. }
  41. for (size_t i = has_sign; i < len; i++) {
  42. // Looks like we're about to start working on the fractional part
  43. if (str[i] == '.') {
  44. is_fractional = true;
  45. continue;
  46. }
  47. if (str[i] == 'e' || str[i] == 'E') {
  48. if (str[i + 1] == '-' || str[i + 1] == '+')
  49. exp_val = atoi(str + i + 2);
  50. else
  51. exp_val = atoi(str + i + 1);
  52. is_scientific = true;
  53. continue;
  54. }
  55. if (str[i] < '0' || str[i] > '9' || exp_val != 0) {
  56. return {};
  57. continue;
  58. }
  59. if (is_fractional) {
  60. fraction *= 10;
  61. fraction += str[i] - '0';
  62. weight *= 10;
  63. } else {
  64. value = value * 10;
  65. value += str[i] - '0';
  66. }
  67. }
  68. fraction /= weight;
  69. value += fraction;
  70. if (is_scientific) {
  71. bool divide = exp_val < 0;
  72. if (divide)
  73. exp_val *= -1;
  74. for (int i = 0; i < exp_val; i++) {
  75. if (divide)
  76. value /= 10;
  77. else
  78. value *= 10;
  79. }
  80. }
  81. return is_negative ? -value : value;
  82. }
  83. static Optional<float> parse_number(const StringView& view)
  84. {
  85. if (view.length() >= 2 && view[view.length() - 2] == 'p' && view[view.length() - 1] == 'x')
  86. return parse_number(view.substring_view(0, view.length() - 2));
  87. return try_parse_float(view);
  88. }
  89. NonnullRefPtr<StyleValue> parse_css_value(const StringView& string)
  90. {
  91. auto number = parse_number(string);
  92. if (number.has_value())
  93. return LengthStyleValue::create(Length(number.value(), Length::Type::Absolute));
  94. if (string == "inherit")
  95. return InheritStyleValue::create();
  96. if (string == "initial")
  97. return InitialStyleValue::create();
  98. if (string == "auto")
  99. return LengthStyleValue::create(Length());
  100. auto color = parse_css_color(string);
  101. if (color.has_value())
  102. return ColorStyleValue::create(color.value());
  103. if (string == "-libhtml-link")
  104. return IdentifierStyleValue::create(CSS::ValueID::VendorSpecificLink);
  105. return StringStyleValue::create(string);
  106. }
  107. class CSSParser {
  108. public:
  109. CSSParser(const StringView& input)
  110. : css(input)
  111. {
  112. }
  113. bool next_is(const char* str) const
  114. {
  115. int len = strlen(str);
  116. for (int i = 0; i < len; ++i) {
  117. if (peek(i) != str[i])
  118. return false;
  119. }
  120. return true;
  121. }
  122. char peek(int offset = 0) const
  123. {
  124. if ((index + offset) < css.length())
  125. return css[index + offset];
  126. return 0;
  127. }
  128. char consume_specific(char ch)
  129. {
  130. if (peek() != ch) {
  131. dbg() << "peek() != '" << ch << "'";
  132. }
  133. PARSE_ASSERT(peek() == ch);
  134. PARSE_ASSERT(index < css.length());
  135. ++index;
  136. return ch;
  137. }
  138. char consume_one()
  139. {
  140. PARSE_ASSERT(index < css.length());
  141. return css[index++];
  142. };
  143. bool consume_whitespace_or_comments()
  144. {
  145. int original_index = index;
  146. bool in_comment = false;
  147. for (; index < css.length(); ++index) {
  148. char ch = peek();
  149. if (isspace(ch))
  150. continue;
  151. if (!in_comment && ch == '/' && peek(1) == '*') {
  152. in_comment = true;
  153. ++index;
  154. continue;
  155. }
  156. if (in_comment && ch == '*' && peek(1) == '/') {
  157. in_comment = false;
  158. ++index;
  159. continue;
  160. }
  161. if (in_comment)
  162. continue;
  163. break;
  164. }
  165. return original_index != index;
  166. }
  167. bool is_valid_selector_char(char ch) const
  168. {
  169. return isalnum(ch) || ch == '-' || ch == '_' || ch == '(' || ch == ')' || ch == '@';
  170. }
  171. bool is_combinator(char ch) const
  172. {
  173. return ch == '~' || ch == '>' || ch == '+';
  174. }
  175. Optional<Selector::SimpleSelector> parse_simple_selector()
  176. {
  177. if (consume_whitespace_or_comments())
  178. return {};
  179. if (peek() == '{' || peek() == ',' || is_combinator(peek()))
  180. return {};
  181. Selector::SimpleSelector::Type type;
  182. if (peek() == '*') {
  183. type = Selector::SimpleSelector::Type::Universal;
  184. consume_one();
  185. return Selector::SimpleSelector {
  186. type,
  187. Selector::SimpleSelector::PseudoClass::None,
  188. String(),
  189. Selector::SimpleSelector::AttributeMatchType::None,
  190. String(),
  191. String()
  192. };
  193. }
  194. if (peek() == '.') {
  195. type = Selector::SimpleSelector::Type::Class;
  196. consume_one();
  197. } else if (peek() == '#') {
  198. type = Selector::SimpleSelector::Type::Id;
  199. consume_one();
  200. } else if (isalpha(peek())) {
  201. type = Selector::SimpleSelector::Type::TagName;
  202. } else {
  203. type = Selector::SimpleSelector::Type::Universal;
  204. }
  205. if (type != Selector::SimpleSelector::Type::Universal) {
  206. while (is_valid_selector_char(peek()))
  207. buffer.append(consume_one());
  208. PARSE_ASSERT(!buffer.is_null());
  209. }
  210. Selector::SimpleSelector simple_selector {
  211. type,
  212. Selector::SimpleSelector::PseudoClass::None,
  213. String::copy(buffer),
  214. Selector::SimpleSelector::AttributeMatchType::None,
  215. String(),
  216. String()
  217. };
  218. buffer.clear();
  219. if (peek() == '[') {
  220. Selector::SimpleSelector::AttributeMatchType attribute_match_type = Selector::SimpleSelector::AttributeMatchType::HasAttribute;
  221. String attribute_name;
  222. String attribute_value;
  223. bool in_value = false;
  224. consume_specific('[');
  225. char expected_end_of_attribute_selector = ']';
  226. while (peek() != expected_end_of_attribute_selector) {
  227. char ch = consume_one();
  228. if (ch == '=') {
  229. attribute_match_type = Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
  230. attribute_name = String::copy(buffer);
  231. buffer.clear();
  232. in_value = true;
  233. consume_whitespace_or_comments();
  234. if (peek() == '\'') {
  235. expected_end_of_attribute_selector = '\'';
  236. consume_one();
  237. } else if (peek() == '"') {
  238. expected_end_of_attribute_selector = '"';
  239. consume_one();
  240. }
  241. continue;
  242. }
  243. buffer.append(ch);
  244. }
  245. if (in_value)
  246. attribute_value = String::copy(buffer);
  247. else
  248. attribute_name = String::copy(buffer);
  249. buffer.clear();
  250. simple_selector.attribute_match_type = attribute_match_type;
  251. simple_selector.attribute_name = attribute_name;
  252. simple_selector.attribute_value = attribute_value;
  253. if (expected_end_of_attribute_selector != ']')
  254. consume_specific(expected_end_of_attribute_selector);
  255. consume_whitespace_or_comments();
  256. consume_specific(']');
  257. }
  258. if (peek() == ':') {
  259. // FIXME: Implement pseudo elements.
  260. [[maybe_unused]] bool is_pseudo_element = false;
  261. consume_one();
  262. if (peek() == ':') {
  263. is_pseudo_element = true;
  264. consume_one();
  265. }
  266. if (next_is("not")) {
  267. buffer.append(consume_one());
  268. buffer.append(consume_one());
  269. buffer.append(consume_one());
  270. buffer.append(consume_specific('('));
  271. while (peek() != ')')
  272. buffer.append(consume_one());
  273. buffer.append(consume_specific(')'));
  274. } else {
  275. while (is_valid_selector_char(peek()))
  276. buffer.append(consume_one());
  277. }
  278. auto pseudo_name = String::copy(buffer);
  279. buffer.clear();
  280. if (pseudo_name == "link")
  281. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Link;
  282. else if (pseudo_name == "hover")
  283. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Hover;
  284. }
  285. return simple_selector;
  286. }
  287. Optional<Selector::ComplexSelector> parse_complex_selector()
  288. {
  289. auto relation = Selector::ComplexSelector::Relation::Descendant;
  290. if (peek() == '{' || peek() == ',')
  291. return {};
  292. if (is_combinator(peek())) {
  293. switch (peek()) {
  294. case '>':
  295. relation = Selector::ComplexSelector::Relation::ImmediateChild;
  296. break;
  297. case '+':
  298. relation = Selector::ComplexSelector::Relation::AdjacentSibling;
  299. break;
  300. case '~':
  301. relation = Selector::ComplexSelector::Relation::GeneralSibling;
  302. break;
  303. }
  304. consume_one();
  305. consume_whitespace_or_comments();
  306. }
  307. consume_whitespace_or_comments();
  308. Vector<Selector::SimpleSelector> simple_selectors;
  309. for (;;) {
  310. auto component = parse_simple_selector();
  311. if (!component.has_value())
  312. break;
  313. simple_selectors.append(component.value());
  314. // If this assert triggers, we're most likely up to no good.
  315. PARSE_ASSERT(simple_selectors.size() < 100);
  316. }
  317. return Selector::ComplexSelector { relation, move(simple_selectors) };
  318. }
  319. void parse_selector()
  320. {
  321. Vector<Selector::ComplexSelector> complex_selectors;
  322. for (;;) {
  323. auto complex_selector = parse_complex_selector();
  324. if (complex_selector.has_value())
  325. complex_selectors.append(complex_selector.value());
  326. consume_whitespace_or_comments();
  327. if (peek() == ',' || peek() == '{')
  328. break;
  329. }
  330. if (complex_selectors.is_empty())
  331. return;
  332. complex_selectors.first().relation = Selector::ComplexSelector::Relation::None;
  333. current_rule.selectors.append(Selector(move(complex_selectors)));
  334. };
  335. void parse_selector_list()
  336. {
  337. for (;;) {
  338. parse_selector();
  339. consume_whitespace_or_comments();
  340. if (peek() == ',') {
  341. consume_one();
  342. continue;
  343. }
  344. if (peek() == '{')
  345. break;
  346. }
  347. }
  348. bool is_valid_property_name_char(char ch) const
  349. {
  350. return ch && !isspace(ch) && ch != ':';
  351. }
  352. bool is_valid_property_value_char(char ch) const
  353. {
  354. return ch && ch != '!' && ch != ';' && ch != '}';
  355. }
  356. struct ValueAndImportant {
  357. String value;
  358. bool important { false };
  359. };
  360. ValueAndImportant consume_css_value()
  361. {
  362. buffer.clear();
  363. int paren_nesting_level = 0;
  364. bool important = false;
  365. for (;;) {
  366. char ch = peek();
  367. if (ch == '(') {
  368. ++paren_nesting_level;
  369. buffer.append(consume_one());
  370. continue;
  371. }
  372. if (ch == ')') {
  373. PARSE_ASSERT(paren_nesting_level > 0);
  374. --paren_nesting_level;
  375. buffer.append(consume_one());
  376. continue;
  377. }
  378. if (paren_nesting_level > 0) {
  379. buffer.append(consume_one());
  380. continue;
  381. }
  382. if (next_is("!important")) {
  383. consume_specific('!');
  384. consume_specific('i');
  385. consume_specific('m');
  386. consume_specific('p');
  387. consume_specific('o');
  388. consume_specific('r');
  389. consume_specific('t');
  390. consume_specific('a');
  391. consume_specific('n');
  392. consume_specific('t');
  393. important = true;
  394. continue;
  395. }
  396. if (next_is("/*")) {
  397. consume_whitespace_or_comments();
  398. continue;
  399. }
  400. if (!ch)
  401. break;
  402. if (ch == '}')
  403. break;
  404. if (ch == ';')
  405. break;
  406. buffer.append(consume_one());
  407. }
  408. // Remove trailing whitespace.
  409. while (!buffer.is_empty() && isspace(buffer.last()))
  410. buffer.take_last();
  411. auto string = String::copy(buffer);
  412. buffer.clear();
  413. return { string, important };
  414. }
  415. Optional<StyleProperty> parse_property()
  416. {
  417. consume_whitespace_or_comments();
  418. if (peek() == ';') {
  419. consume_one();
  420. return {};
  421. }
  422. if (peek() == '}')
  423. return {};
  424. buffer.clear();
  425. while (is_valid_property_name_char(peek()))
  426. buffer.append(consume_one());
  427. auto property_name = String::copy(buffer);
  428. buffer.clear();
  429. consume_whitespace_or_comments();
  430. consume_specific(':');
  431. consume_whitespace_or_comments();
  432. auto [property_value, important] = consume_css_value();
  433. consume_whitespace_or_comments();
  434. if (peek() && peek() != '}')
  435. consume_specific(';');
  436. auto property_id = CSS::property_id_from_string(property_name);
  437. return StyleProperty { property_id, parse_css_value(property_value), important };
  438. }
  439. void parse_declaration()
  440. {
  441. for (;;) {
  442. auto property = parse_property();
  443. if (property.has_value())
  444. current_rule.properties.append(property.value());
  445. consume_whitespace_or_comments();
  446. if (peek() == '}')
  447. break;
  448. }
  449. }
  450. void parse_rule()
  451. {
  452. consume_whitespace_or_comments();
  453. if (index >= css.length())
  454. return;
  455. // FIXME: We ignore @media rules for now.
  456. if (next_is("@media")) {
  457. while (peek() != '{')
  458. consume_one();
  459. int level = 0;
  460. for (;;) {
  461. auto ch = consume_one();
  462. if (ch == '{') {
  463. ++level;
  464. } else if (ch == '}') {
  465. --level;
  466. if (level == 0)
  467. break;
  468. }
  469. }
  470. consume_whitespace_or_comments();
  471. return;
  472. }
  473. parse_selector_list();
  474. consume_specific('{');
  475. parse_declaration();
  476. consume_specific('}');
  477. rules.append(StyleRule::create(move(current_rule.selectors), StyleDeclaration::create(move(current_rule.properties))));
  478. consume_whitespace_or_comments();
  479. }
  480. RefPtr<StyleSheet> parse_sheet()
  481. {
  482. while (index < css.length()) {
  483. parse_rule();
  484. }
  485. return StyleSheet::create(move(rules));
  486. }
  487. RefPtr<StyleDeclaration> parse_standalone_declaration()
  488. {
  489. consume_whitespace_or_comments();
  490. for (;;) {
  491. auto property = parse_property();
  492. if (property.has_value())
  493. current_rule.properties.append(property.value());
  494. consume_whitespace_or_comments();
  495. if (!peek())
  496. break;
  497. }
  498. return StyleDeclaration::create(move(current_rule.properties));
  499. }
  500. private:
  501. NonnullRefPtrVector<StyleRule> rules;
  502. struct CurrentRule {
  503. Vector<Selector> selectors;
  504. Vector<StyleProperty> properties;
  505. };
  506. CurrentRule current_rule;
  507. Vector<char> buffer;
  508. int index = 0;
  509. StringView css;
  510. };
  511. RefPtr<StyleSheet> parse_css(const StringView& css)
  512. {
  513. CSSParser parser(css);
  514. return parser.parse_sheet();
  515. }
  516. RefPtr<StyleDeclaration> parse_css_declaration(const StringView& css)
  517. {
  518. CSSParser parser(css);
  519. return parser.parse_standalone_declaration();
  520. }