CSSParser.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. #include <AK/HashMap.h>
  2. #include <LibHTML/CSS/PropertyID.h>
  3. #include <LibHTML/CSS/StyleSheet.h>
  4. #include <LibHTML/Parser/CSSParser.h>
  5. #include <ctype.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #define PARSE_ASSERT(x) \
  9. if (!(x)) { \
  10. dbg() << "CSS PARSER ASSERTION FAILED: " << #x; \
  11. dbg() << "At character# " << index << " in CSS: _" << css << "_"; \
  12. ASSERT_NOT_REACHED(); \
  13. }
  14. static Optional<Color> parse_css_color(const StringView& view)
  15. {
  16. auto color = Color::from_string(view);
  17. if (color.has_value())
  18. return color;
  19. // FIXME: Parse all valid color strings :^)
  20. return {};
  21. }
  22. static Optional<float> try_parse_float(const StringView& string)
  23. {
  24. const char* str = string.characters_without_null_termination();
  25. size_t len = string.length();
  26. size_t weight = 1;
  27. int exp_val = 0;
  28. float value = 0.0f;
  29. float fraction = 0.0f;
  30. bool has_sign = false;
  31. bool is_negative = false;
  32. bool is_fractional = false;
  33. bool is_scientific = false;
  34. if (str[0] == '-') {
  35. is_negative = true;
  36. has_sign = true;
  37. }
  38. if (str[0] == '+') {
  39. has_sign = true;
  40. }
  41. for (size_t i = has_sign; i < len; i++) {
  42. // Looks like we're about to start working on the fractional part
  43. if (str[i] == '.') {
  44. is_fractional = true;
  45. continue;
  46. }
  47. if (str[i] == 'e' || str[i] == 'E') {
  48. if (str[i + 1] == '-' || str[i + 1] == '+')
  49. exp_val = atoi(str + i + 2);
  50. else
  51. exp_val = atoi(str + i + 1);
  52. is_scientific = true;
  53. continue;
  54. }
  55. if (str[i] < '0' || str[i] > '9' || exp_val != 0) {
  56. return {};
  57. continue;
  58. }
  59. if (is_fractional) {
  60. fraction *= 10;
  61. fraction += str[i] - '0';
  62. weight *= 10;
  63. } else {
  64. value = value * 10;
  65. value += str[i] - '0';
  66. }
  67. }
  68. fraction /= weight;
  69. value += fraction;
  70. if (is_scientific) {
  71. bool divide = exp_val < 0;
  72. if (divide)
  73. exp_val *= -1;
  74. for (int i = 0; i < exp_val; i++) {
  75. if (divide)
  76. value /= 10;
  77. else
  78. value *= 10;
  79. }
  80. }
  81. return is_negative ? -value : value;
  82. }
  83. static Optional<float> parse_number(const StringView& view)
  84. {
  85. if (view.length() >= 2 && view[view.length() - 2] == 'p' && view[view.length() - 1] == 'x')
  86. return parse_number(view.substring_view(0, view.length() - 2));
  87. return try_parse_float(view);
  88. }
  89. NonnullRefPtr<StyleValue> parse_css_value(const StringView& string)
  90. {
  91. auto number = parse_number(string);
  92. if (number.has_value())
  93. return LengthStyleValue::create(Length(number.value(), Length::Type::Absolute));
  94. if (string == "inherit")
  95. return InheritStyleValue::create();
  96. if (string == "initial")
  97. return InitialStyleValue::create();
  98. if (string == "auto")
  99. return LengthStyleValue::create(Length());
  100. auto color = parse_css_color(string);
  101. if (color.has_value())
  102. return ColorStyleValue::create(color.value());
  103. if (string == "-libhtml-link")
  104. return IdentifierStyleValue::create(CSS::ValueID::VendorSpecificLink);
  105. return StringStyleValue::create(string);
  106. }
  107. class CSSParser {
  108. public:
  109. CSSParser(const StringView& input)
  110. : css(input)
  111. {
  112. }
  113. bool next_is(const char* str) const
  114. {
  115. size_t len = strlen(str);
  116. for (size_t i = 0; i < len; ++i) {
  117. if (peek(i) != str[i])
  118. return false;
  119. }
  120. return true;
  121. }
  122. char peek(size_t offset = 0) const
  123. {
  124. if ((index + offset) < css.length())
  125. return css[index + offset];
  126. return 0;
  127. }
  128. char consume_specific(char ch)
  129. {
  130. if (peek() != ch) {
  131. dbg() << "peek() != '" << ch << "'";
  132. }
  133. PARSE_ASSERT(peek() == ch);
  134. PARSE_ASSERT(index < css.length());
  135. ++index;
  136. return ch;
  137. }
  138. char consume_one()
  139. {
  140. PARSE_ASSERT(index < css.length());
  141. return css[index++];
  142. };
  143. bool consume_whitespace_or_comments()
  144. {
  145. size_t original_index = index;
  146. bool in_comment = false;
  147. for (; index < css.length(); ++index) {
  148. char ch = peek();
  149. if (isspace(ch))
  150. continue;
  151. if (!in_comment && ch == '/' && peek(1) == '*') {
  152. in_comment = true;
  153. ++index;
  154. continue;
  155. }
  156. if (in_comment && ch == '*' && peek(1) == '/') {
  157. in_comment = false;
  158. ++index;
  159. continue;
  160. }
  161. if (in_comment)
  162. continue;
  163. break;
  164. }
  165. return original_index != index;
  166. }
  167. bool is_valid_selector_char(char ch) const
  168. {
  169. return isalnum(ch) || ch == '-' || ch == '_' || ch == '(' || ch == ')' || ch == '@';
  170. }
  171. bool is_combinator(char ch) const
  172. {
  173. return ch == '~' || ch == '>' || ch == '+';
  174. }
  175. Optional<Selector::SimpleSelector> parse_simple_selector()
  176. {
  177. if (consume_whitespace_or_comments())
  178. return {};
  179. if (peek() == '{' || peek() == ',' || is_combinator(peek()))
  180. return {};
  181. Selector::SimpleSelector::Type type;
  182. if (peek() == '*') {
  183. type = Selector::SimpleSelector::Type::Universal;
  184. consume_one();
  185. return Selector::SimpleSelector {
  186. type,
  187. Selector::SimpleSelector::PseudoClass::None,
  188. String(),
  189. Selector::SimpleSelector::AttributeMatchType::None,
  190. String(),
  191. String()
  192. };
  193. }
  194. if (peek() == '.') {
  195. type = Selector::SimpleSelector::Type::Class;
  196. consume_one();
  197. } else if (peek() == '#') {
  198. type = Selector::SimpleSelector::Type::Id;
  199. consume_one();
  200. } else if (isalpha(peek())) {
  201. type = Selector::SimpleSelector::Type::TagName;
  202. } else {
  203. type = Selector::SimpleSelector::Type::Universal;
  204. }
  205. if (type != Selector::SimpleSelector::Type::Universal) {
  206. while (is_valid_selector_char(peek()))
  207. buffer.append(consume_one());
  208. PARSE_ASSERT(!buffer.is_null());
  209. }
  210. Selector::SimpleSelector simple_selector {
  211. type,
  212. Selector::SimpleSelector::PseudoClass::None,
  213. String::copy(buffer),
  214. Selector::SimpleSelector::AttributeMatchType::None,
  215. String(),
  216. String()
  217. };
  218. buffer.clear();
  219. if (peek() == '[') {
  220. Selector::SimpleSelector::AttributeMatchType attribute_match_type = Selector::SimpleSelector::AttributeMatchType::HasAttribute;
  221. String attribute_name;
  222. String attribute_value;
  223. bool in_value = false;
  224. consume_specific('[');
  225. char expected_end_of_attribute_selector = ']';
  226. while (peek() != expected_end_of_attribute_selector) {
  227. char ch = consume_one();
  228. if (ch == '=') {
  229. attribute_match_type = Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
  230. attribute_name = String::copy(buffer);
  231. buffer.clear();
  232. in_value = true;
  233. consume_whitespace_or_comments();
  234. if (peek() == '\'') {
  235. expected_end_of_attribute_selector = '\'';
  236. consume_one();
  237. } else if (peek() == '"') {
  238. expected_end_of_attribute_selector = '"';
  239. consume_one();
  240. }
  241. continue;
  242. }
  243. buffer.append(ch);
  244. }
  245. if (in_value)
  246. attribute_value = String::copy(buffer);
  247. else
  248. attribute_name = String::copy(buffer);
  249. buffer.clear();
  250. simple_selector.attribute_match_type = attribute_match_type;
  251. simple_selector.attribute_name = attribute_name;
  252. simple_selector.attribute_value = attribute_value;
  253. if (expected_end_of_attribute_selector != ']')
  254. consume_specific(expected_end_of_attribute_selector);
  255. consume_whitespace_or_comments();
  256. consume_specific(']');
  257. }
  258. if (peek() == ':') {
  259. // FIXME: Implement pseudo elements.
  260. [[maybe_unused]] bool is_pseudo_element = false;
  261. consume_one();
  262. if (peek() == ':') {
  263. is_pseudo_element = true;
  264. consume_one();
  265. }
  266. if (next_is("not")) {
  267. buffer.append(consume_one());
  268. buffer.append(consume_one());
  269. buffer.append(consume_one());
  270. buffer.append(consume_specific('('));
  271. while (peek() != ')')
  272. buffer.append(consume_one());
  273. buffer.append(consume_specific(')'));
  274. } else {
  275. while (is_valid_selector_char(peek()))
  276. buffer.append(consume_one());
  277. }
  278. auto pseudo_name = String::copy(buffer);
  279. buffer.clear();
  280. if (pseudo_name == "link")
  281. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Link;
  282. else if (pseudo_name == "hover")
  283. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Hover;
  284. else if (pseudo_name == "first-child")
  285. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::FirstChild;
  286. else if (pseudo_name == "last-child")
  287. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::LastChild;
  288. else if (pseudo_name == "only-child")
  289. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::OnlyChild;
  290. else if (pseudo_name == "empty")
  291. simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Empty;
  292. }
  293. return simple_selector;
  294. }
  295. Optional<Selector::ComplexSelector> parse_complex_selector()
  296. {
  297. auto relation = Selector::ComplexSelector::Relation::Descendant;
  298. if (peek() == '{' || peek() == ',')
  299. return {};
  300. if (is_combinator(peek())) {
  301. switch (peek()) {
  302. case '>':
  303. relation = Selector::ComplexSelector::Relation::ImmediateChild;
  304. break;
  305. case '+':
  306. relation = Selector::ComplexSelector::Relation::AdjacentSibling;
  307. break;
  308. case '~':
  309. relation = Selector::ComplexSelector::Relation::GeneralSibling;
  310. break;
  311. }
  312. consume_one();
  313. consume_whitespace_or_comments();
  314. }
  315. consume_whitespace_or_comments();
  316. Vector<Selector::SimpleSelector> simple_selectors;
  317. for (;;) {
  318. auto component = parse_simple_selector();
  319. if (!component.has_value())
  320. break;
  321. simple_selectors.append(component.value());
  322. // If this assert triggers, we're most likely up to no good.
  323. PARSE_ASSERT(simple_selectors.size() < 100);
  324. }
  325. return Selector::ComplexSelector { relation, move(simple_selectors) };
  326. }
  327. void parse_selector()
  328. {
  329. Vector<Selector::ComplexSelector> complex_selectors;
  330. for (;;) {
  331. auto complex_selector = parse_complex_selector();
  332. if (complex_selector.has_value())
  333. complex_selectors.append(complex_selector.value());
  334. consume_whitespace_or_comments();
  335. if (peek() == ',' || peek() == '{')
  336. break;
  337. }
  338. if (complex_selectors.is_empty())
  339. return;
  340. complex_selectors.first().relation = Selector::ComplexSelector::Relation::None;
  341. current_rule.selectors.append(Selector(move(complex_selectors)));
  342. };
  343. void parse_selector_list()
  344. {
  345. for (;;) {
  346. parse_selector();
  347. consume_whitespace_or_comments();
  348. if (peek() == ',') {
  349. consume_one();
  350. continue;
  351. }
  352. if (peek() == '{')
  353. break;
  354. }
  355. }
  356. bool is_valid_property_name_char(char ch) const
  357. {
  358. return ch && !isspace(ch) && ch != ':';
  359. }
  360. bool is_valid_property_value_char(char ch) const
  361. {
  362. return ch && ch != '!' && ch != ';' && ch != '}';
  363. }
  364. struct ValueAndImportant {
  365. String value;
  366. bool important { false };
  367. };
  368. ValueAndImportant consume_css_value()
  369. {
  370. buffer.clear();
  371. int paren_nesting_level = 0;
  372. bool important = false;
  373. for (;;) {
  374. char ch = peek();
  375. if (ch == '(') {
  376. ++paren_nesting_level;
  377. buffer.append(consume_one());
  378. continue;
  379. }
  380. if (ch == ')') {
  381. PARSE_ASSERT(paren_nesting_level > 0);
  382. --paren_nesting_level;
  383. buffer.append(consume_one());
  384. continue;
  385. }
  386. if (paren_nesting_level > 0) {
  387. buffer.append(consume_one());
  388. continue;
  389. }
  390. if (next_is("!important")) {
  391. consume_specific('!');
  392. consume_specific('i');
  393. consume_specific('m');
  394. consume_specific('p');
  395. consume_specific('o');
  396. consume_specific('r');
  397. consume_specific('t');
  398. consume_specific('a');
  399. consume_specific('n');
  400. consume_specific('t');
  401. important = true;
  402. continue;
  403. }
  404. if (next_is("/*")) {
  405. consume_whitespace_or_comments();
  406. continue;
  407. }
  408. if (!ch)
  409. break;
  410. if (ch == '}')
  411. break;
  412. if (ch == ';')
  413. break;
  414. buffer.append(consume_one());
  415. }
  416. // Remove trailing whitespace.
  417. while (!buffer.is_empty() && isspace(buffer.last()))
  418. buffer.take_last();
  419. auto string = String::copy(buffer);
  420. buffer.clear();
  421. return { string, important };
  422. }
  423. Optional<StyleProperty> parse_property()
  424. {
  425. consume_whitespace_or_comments();
  426. if (peek() == ';') {
  427. consume_one();
  428. return {};
  429. }
  430. if (peek() == '}')
  431. return {};
  432. buffer.clear();
  433. while (is_valid_property_name_char(peek()))
  434. buffer.append(consume_one());
  435. auto property_name = String::copy(buffer);
  436. buffer.clear();
  437. consume_whitespace_or_comments();
  438. consume_specific(':');
  439. consume_whitespace_or_comments();
  440. auto [property_value, important] = consume_css_value();
  441. consume_whitespace_or_comments();
  442. if (peek() && peek() != '}')
  443. consume_specific(';');
  444. auto property_id = CSS::property_id_from_string(property_name);
  445. return StyleProperty { property_id, parse_css_value(property_value), important };
  446. }
  447. void parse_declaration()
  448. {
  449. for (;;) {
  450. auto property = parse_property();
  451. if (property.has_value())
  452. current_rule.properties.append(property.value());
  453. consume_whitespace_or_comments();
  454. if (peek() == '}')
  455. break;
  456. }
  457. }
  458. void parse_rule()
  459. {
  460. consume_whitespace_or_comments();
  461. if (index >= css.length())
  462. return;
  463. // FIXME: We ignore @media rules for now.
  464. if (next_is("@media")) {
  465. while (peek() != '{')
  466. consume_one();
  467. int level = 0;
  468. for (;;) {
  469. auto ch = consume_one();
  470. if (ch == '{') {
  471. ++level;
  472. } else if (ch == '}') {
  473. --level;
  474. if (level == 0)
  475. break;
  476. }
  477. }
  478. consume_whitespace_or_comments();
  479. return;
  480. }
  481. parse_selector_list();
  482. consume_specific('{');
  483. parse_declaration();
  484. consume_specific('}');
  485. rules.append(StyleRule::create(move(current_rule.selectors), StyleDeclaration::create(move(current_rule.properties))));
  486. consume_whitespace_or_comments();
  487. }
  488. RefPtr<StyleSheet> parse_sheet()
  489. {
  490. while (index < css.length()) {
  491. parse_rule();
  492. }
  493. return StyleSheet::create(move(rules));
  494. }
  495. RefPtr<StyleDeclaration> parse_standalone_declaration()
  496. {
  497. consume_whitespace_or_comments();
  498. for (;;) {
  499. auto property = parse_property();
  500. if (property.has_value())
  501. current_rule.properties.append(property.value());
  502. consume_whitespace_or_comments();
  503. if (!peek())
  504. break;
  505. }
  506. return StyleDeclaration::create(move(current_rule.properties));
  507. }
  508. private:
  509. NonnullRefPtrVector<StyleRule> rules;
  510. struct CurrentRule {
  511. Vector<Selector> selectors;
  512. Vector<StyleProperty> properties;
  513. };
  514. CurrentRule current_rule;
  515. Vector<char> buffer;
  516. size_t index = 0;
  517. StringView css;
  518. };
  519. RefPtr<StyleSheet> parse_css(const StringView& css)
  520. {
  521. CSSParser parser(css);
  522. return parser.parse_sheet();
  523. }
  524. RefPtr<StyleDeclaration> parse_css_declaration(const StringView& css)
  525. {
  526. CSSParser parser(css);
  527. return parser.parse_standalone_declaration();
  528. }