CSSParser.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. #include <AK/HashMap.h>
  2. #include <LibHTML/CSS/PropertyID.h>
  3. #include <LibHTML/CSS/StyleSheet.h>
  4. #include <LibHTML/Parser/CSSParser.h>
  5. #include <ctype.h>
  6. #include <stdio.h>
  7. #define PARSE_ASSERT(x) \
  8. if (!(x)) { \
  9. dbg() << "CSS PARSER ASSERTION FAILED: " << #x; \
  10. dbg() << "At character# " << index << " in CSS: _" << css << "_"; \
  11. ASSERT_NOT_REACHED(); \
  12. }
  13. static Optional<Color> parse_css_color(const StringView& view)
  14. {
  15. auto color = Color::from_string(view);
  16. if (color.has_value())
  17. return color;
  18. // FIXME: Parse all valid color strings :^)
  19. return {};
  20. }
  21. static Optional<float> try_parse_float(const StringView& string)
  22. {
  23. const char* str = string.characters_without_null_termination();
  24. size_t len = string.length();
  25. size_t weight = 1;
  26. int exp_val = 0;
  27. float value = 0.0f;
  28. float fraction = 0.0f;
  29. bool has_sign = false;
  30. bool is_negative = false;
  31. bool is_fractional = false;
  32. bool is_scientific = false;
  33. if (str[0] == '-') {
  34. is_negative = true;
  35. has_sign = true;
  36. }
  37. if (str[0] == '+') {
  38. has_sign = true;
  39. }
  40. for (size_t i = has_sign; i < len; i++) {
  41. // Looks like we're about to start working on the fractional part
  42. if (str[i] == '.') {
  43. is_fractional = true;
  44. continue;
  45. }
  46. if (str[i] == 'e' || str[i] == 'E') {
  47. if (str[i + 1] == '-' || str[i + 1] == '+')
  48. exp_val = atoi(str + i + 2);
  49. else
  50. exp_val = atoi(str + i + 1);
  51. is_scientific = true;
  52. continue;
  53. }
  54. if (str[i] < '0' || str[i] > '9' || exp_val != 0) {
  55. return {};
  56. continue;
  57. }
  58. if (is_fractional) {
  59. fraction *= 10;
  60. fraction += str[i] - '0';
  61. weight *= 10;
  62. } else {
  63. value = value * 10;
  64. value += str[i] - '0';
  65. }
  66. }
  67. fraction /= weight;
  68. value += fraction;
  69. if (is_scientific) {
  70. bool divide = exp_val < 0;
  71. if (divide)
  72. exp_val *= -1;
  73. for (int i = 0; i < exp_val; i++) {
  74. if (divide)
  75. value /= 10;
  76. else
  77. value *= 10;
  78. }
  79. }
  80. return is_negative ? -value : value;
  81. }
  82. static Optional<float> parse_number(const StringView& view)
  83. {
  84. if (view.length() >= 2 && view[view.length() - 2] == 'p' && view[view.length() - 1] == 'x')
  85. return parse_number(view.substring_view(0, view.length() - 2));
  86. return try_parse_float(view);
  87. }
  88. NonnullRefPtr<StyleValue> parse_css_value(const StringView& string)
  89. {
  90. auto number = parse_number(string);
  91. if (number.has_value())
  92. return LengthStyleValue::create(Length(number.value(), Length::Type::Absolute));
  93. if (string == "inherit")
  94. return InheritStyleValue::create();
  95. if (string == "initial")
  96. return InitialStyleValue::create();
  97. if (string == "auto")
  98. return LengthStyleValue::create(Length());
  99. auto color = parse_css_color(string);
  100. if (color.has_value())
  101. return ColorStyleValue::create(color.value());
  102. if (string == "-libhtml-link")
  103. return IdentifierStyleValue::create(CSS::ValueID::VendorSpecificLink);
  104. return StringStyleValue::create(string);
  105. }
  106. class CSSParser {
  107. public:
  108. CSSParser(const StringView& input)
  109. : css(input)
  110. {
  111. }
  112. bool next_is(const char* str) const
  113. {
  114. int len = strlen(str);
  115. for (int i = 0; i < len; ++i) {
  116. if (peek(i) != str[i])
  117. return false;
  118. }
  119. return true;
  120. }
  121. char peek(int offset = 0) const
  122. {
  123. if ((index + offset) < css.length())
  124. return css[index + offset];
  125. return 0;
  126. }
  127. char consume_specific(char ch)
  128. {
  129. if (peek() != ch) {
  130. dbg() << "peek() != '" << ch << "'";
  131. }
  132. PARSE_ASSERT(peek() == ch);
  133. PARSE_ASSERT(index < css.length());
  134. ++index;
  135. return ch;
  136. }
  137. char consume_one()
  138. {
  139. PARSE_ASSERT(index < css.length());
  140. return css[index++];
  141. };
  142. void consume_whitespace_or_comments()
  143. {
  144. bool in_comment = false;
  145. for (; index < css.length(); ++index) {
  146. char ch = peek();
  147. if (isspace(ch))
  148. continue;
  149. if (!in_comment && ch == '/' && peek(1) == '*') {
  150. in_comment = true;
  151. ++index;
  152. continue;
  153. }
  154. if (in_comment && ch == '*' && peek(1) == '/') {
  155. in_comment = false;
  156. ++index;
  157. continue;
  158. }
  159. if (in_comment)
  160. continue;
  161. break;
  162. }
  163. }
  164. bool is_valid_selector_char(char ch) const
  165. {
  166. return isalnum(ch) || ch == '-' || ch == '_' || ch == '(' || ch == ')' || ch == '@';
  167. }
  168. bool is_combinator(char ch) const
  169. {
  170. return ch == '~' || ch == '>' || ch == '+';
  171. }
  172. Optional<Selector::Component> parse_selector_component()
  173. {
  174. consume_whitespace_or_comments();
  175. Selector::Component::Type type;
  176. Selector::Component::Relation relation = Selector::Component::Relation::Descendant;
  177. if (peek() == '{')
  178. return {};
  179. if (is_combinator(peek())) {
  180. switch (peek()) {
  181. case '>':
  182. relation = Selector::Component::Relation::ImmediateChild;
  183. break;
  184. case '+':
  185. relation = Selector::Component::Relation::AdjacentSibling;
  186. break;
  187. case '~':
  188. relation = Selector::Component::Relation::GeneralSibling;
  189. break;
  190. }
  191. consume_one();
  192. consume_whitespace_or_comments();
  193. }
  194. if (peek() == '*') {
  195. type = Selector::Component::Type::Universal;
  196. consume_one();
  197. return Selector::Component {
  198. type,
  199. Selector::Component::PseudoClass::None,
  200. relation,
  201. String(),
  202. Selector::Component::AttributeMatchType::None,
  203. String(),
  204. String()
  205. };
  206. }
  207. if (peek() == '.') {
  208. type = Selector::Component::Type::Class;
  209. consume_one();
  210. } else if (peek() == '#') {
  211. type = Selector::Component::Type::Id;
  212. consume_one();
  213. } else if (isalpha(peek())) {
  214. type = Selector::Component::Type::TagName;
  215. } else {
  216. type = Selector::Component::Type::Universal;
  217. }
  218. if (type != Selector::Component::Type::Universal) {
  219. while (is_valid_selector_char(peek()))
  220. buffer.append(consume_one());
  221. PARSE_ASSERT(!buffer.is_null());
  222. }
  223. Selector::Component component {
  224. type,
  225. Selector::Component::PseudoClass::None,
  226. relation,
  227. String::copy(buffer),
  228. Selector::Component::AttributeMatchType::None,
  229. String(),
  230. String()
  231. };
  232. buffer.clear();
  233. if (peek() == '[') {
  234. Selector::Component::AttributeMatchType attribute_match_type = Selector::Component::AttributeMatchType::HasAttribute;
  235. String attribute_name;
  236. String attribute_value;
  237. bool in_value = false;
  238. consume_specific('[');
  239. char expected_end_of_attribute_selector = ']';
  240. while (peek() != expected_end_of_attribute_selector) {
  241. char ch = consume_one();
  242. if (ch == '=') {
  243. attribute_match_type = Selector::Component::AttributeMatchType::ExactValueMatch;
  244. attribute_name = String::copy(buffer);
  245. buffer.clear();
  246. in_value = true;
  247. consume_whitespace_or_comments();
  248. if (peek() == '\'') {
  249. expected_end_of_attribute_selector = '\'';
  250. consume_one();
  251. } else if (peek() == '"') {
  252. expected_end_of_attribute_selector = '"';
  253. consume_one();
  254. }
  255. continue;
  256. }
  257. buffer.append(ch);
  258. }
  259. if (in_value)
  260. attribute_value = String::copy(buffer);
  261. else
  262. attribute_name = String::copy(buffer);
  263. buffer.clear();
  264. component.attribute_match_type = attribute_match_type;
  265. component.attribute_name = attribute_name;
  266. component.attribute_value = attribute_value;
  267. if (expected_end_of_attribute_selector != ']')
  268. consume_specific(expected_end_of_attribute_selector);
  269. consume_whitespace_or_comments();
  270. consume_specific(']');
  271. }
  272. if (peek() == ':') {
  273. // FIXME: Implement pseudo elements.
  274. [[maybe_unused]] bool is_pseudo_element = false;
  275. consume_one();
  276. if (peek() == ':') {
  277. is_pseudo_element = true;
  278. consume_one();
  279. }
  280. while (is_valid_selector_char(peek()))
  281. buffer.append(consume_one());
  282. auto pseudo_name = String::copy(buffer);
  283. buffer.clear();
  284. if (pseudo_name == "link")
  285. component.pseudo_class = Selector::Component::PseudoClass::Link;
  286. else if (pseudo_name == "hover")
  287. component.pseudo_class = Selector::Component::PseudoClass::Hover;
  288. }
  289. return component;
  290. }
  291. void parse_selector()
  292. {
  293. Vector<Selector::Component> components;
  294. for (;;) {
  295. auto component = parse_selector_component();
  296. if (component.has_value())
  297. components.append(component.value());
  298. consume_whitespace_or_comments();
  299. if (peek() == ',' || peek() == '{')
  300. break;
  301. }
  302. if (components.is_empty())
  303. return;
  304. components.first().relation = Selector::Component::Relation::None;
  305. current_rule.selectors.append(Selector(move(components)));
  306. };
  307. void parse_selector_list()
  308. {
  309. for (;;) {
  310. parse_selector();
  311. consume_whitespace_or_comments();
  312. if (peek() == ',') {
  313. consume_one();
  314. continue;
  315. }
  316. if (peek() == '{')
  317. break;
  318. }
  319. }
  320. bool is_valid_property_name_char(char ch) const
  321. {
  322. return ch && !isspace(ch) && ch != ':';
  323. }
  324. bool is_valid_property_value_char(char ch) const
  325. {
  326. return ch && ch != '!' && ch != ';' && ch != '}';
  327. }
  328. struct ValueAndImportant {
  329. String value;
  330. bool important { false };
  331. };
  332. ValueAndImportant consume_css_value()
  333. {
  334. buffer.clear();
  335. int paren_nesting_level = 0;
  336. bool important = false;
  337. for (;;) {
  338. char ch = peek();
  339. if (ch == '(') {
  340. ++paren_nesting_level;
  341. buffer.append(consume_one());
  342. continue;
  343. }
  344. if (ch == ')') {
  345. PARSE_ASSERT(paren_nesting_level > 0);
  346. --paren_nesting_level;
  347. buffer.append(consume_one());
  348. continue;
  349. }
  350. if (paren_nesting_level > 0) {
  351. buffer.append(consume_one());
  352. continue;
  353. }
  354. if (next_is("!important")) {
  355. consume_specific('!');
  356. consume_specific('i');
  357. consume_specific('m');
  358. consume_specific('p');
  359. consume_specific('o');
  360. consume_specific('r');
  361. consume_specific('t');
  362. consume_specific('a');
  363. consume_specific('n');
  364. consume_specific('t');
  365. important = true;
  366. continue;
  367. }
  368. if (next_is("/*")) {
  369. consume_whitespace_or_comments();
  370. continue;
  371. }
  372. if (!ch)
  373. break;
  374. if (ch == '}')
  375. break;
  376. if (ch == ';')
  377. break;
  378. buffer.append(consume_one());
  379. }
  380. // Remove trailing whitespace.
  381. while (!buffer.is_empty() && isspace(buffer.last()))
  382. buffer.take_last();
  383. auto string = String::copy(buffer);
  384. buffer.clear();
  385. return { string, important };
  386. }
  387. Optional<StyleProperty> parse_property()
  388. {
  389. consume_whitespace_or_comments();
  390. if (peek() == ';') {
  391. consume_one();
  392. return {};
  393. }
  394. if (peek() == '}')
  395. return {};
  396. buffer.clear();
  397. while (is_valid_property_name_char(peek()))
  398. buffer.append(consume_one());
  399. auto property_name = String::copy(buffer);
  400. buffer.clear();
  401. consume_whitespace_or_comments();
  402. consume_specific(':');
  403. consume_whitespace_or_comments();
  404. auto [property_value, important] = consume_css_value();
  405. consume_whitespace_or_comments();
  406. if (peek() && peek() != '}')
  407. consume_specific(';');
  408. auto property_id = CSS::property_id_from_string(property_name);
  409. return StyleProperty { property_id, parse_css_value(property_value), important };
  410. }
  411. void parse_declaration()
  412. {
  413. for (;;) {
  414. auto property = parse_property();
  415. if (property.has_value())
  416. current_rule.properties.append(property.value());
  417. consume_whitespace_or_comments();
  418. if (peek() == '}')
  419. break;
  420. }
  421. }
  422. void parse_rule()
  423. {
  424. consume_whitespace_or_comments();
  425. if (index >= css.length())
  426. return;
  427. // FIXME: We ignore @media rules for now.
  428. if (next_is("@media")) {
  429. while (peek() != '{')
  430. consume_one();
  431. int level = 0;
  432. for (;;) {
  433. auto ch = consume_one();
  434. if (ch == '{') {
  435. ++level;
  436. } else if (ch == '}') {
  437. --level;
  438. if (level == 0)
  439. break;
  440. }
  441. }
  442. consume_whitespace_or_comments();
  443. return;
  444. }
  445. parse_selector_list();
  446. consume_specific('{');
  447. parse_declaration();
  448. consume_specific('}');
  449. rules.append(StyleRule::create(move(current_rule.selectors), StyleDeclaration::create(move(current_rule.properties))));
  450. consume_whitespace_or_comments();
  451. }
  452. RefPtr<StyleSheet> parse_sheet()
  453. {
  454. while (index < css.length()) {
  455. parse_rule();
  456. }
  457. return StyleSheet::create(move(rules));
  458. }
  459. RefPtr<StyleDeclaration> parse_standalone_declaration()
  460. {
  461. consume_whitespace_or_comments();
  462. for (;;) {
  463. auto property = parse_property();
  464. if (property.has_value())
  465. current_rule.properties.append(property.value());
  466. consume_whitespace_or_comments();
  467. if (!peek())
  468. break;
  469. }
  470. return StyleDeclaration::create(move(current_rule.properties));
  471. }
  472. private:
  473. NonnullRefPtrVector<StyleRule> rules;
  474. struct CurrentRule {
  475. Vector<Selector> selectors;
  476. Vector<StyleProperty> properties;
  477. };
  478. CurrentRule current_rule;
  479. Vector<char> buffer;
  480. int index = 0;
  481. StringView css;
  482. };
  483. RefPtr<StyleSheet> parse_css(const StringView& css)
  484. {
  485. CSSParser parser(css);
  486. return parser.parse_sheet();
  487. }
  488. RefPtr<StyleDeclaration> parse_css_declaration(const StringView& css)
  489. {
  490. CSSParser parser(css);
  491. return parser.parse_standalone_declaration();
  492. }