CSSParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. #include <AK/HashMap.h>
  2. #include <LibHTML/CSS/PropertyID.h>
  3. #include <LibHTML/CSS/StyleSheet.h>
  4. #include <LibHTML/Parser/CSSParser.h>
  5. #include <ctype.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #define PARSE_ASSERT(x) \
  9. if (!(x)) { \
  10. dbg() << "CSS PARSER ASSERTION FAILED: " << #x; \
  11. dbg() << "At character# " << index << " in CSS: _" << css << "_"; \
  12. ASSERT_NOT_REACHED(); \
  13. }
  14. static Optional<Color> parse_css_color(const StringView& view)
  15. {
  16. auto color = Color::from_string(view);
  17. if (color.has_value())
  18. return color;
  19. // FIXME: Parse all valid color strings :^)
  20. return {};
  21. }
  22. static Optional<float> try_parse_float(const StringView& string)
  23. {
  24. const char* str = string.characters_without_null_termination();
  25. size_t len = string.length();
  26. size_t weight = 1;
  27. int exp_val = 0;
  28. float value = 0.0f;
  29. float fraction = 0.0f;
  30. bool has_sign = false;
  31. bool is_negative = false;
  32. bool is_fractional = false;
  33. bool is_scientific = false;
  34. if (str[0] == '-') {
  35. is_negative = true;
  36. has_sign = true;
  37. }
  38. if (str[0] == '+') {
  39. has_sign = true;
  40. }
  41. for (size_t i = has_sign; i < len; i++) {
  42. // Looks like we're about to start working on the fractional part
  43. if (str[i] == '.') {
  44. is_fractional = true;
  45. continue;
  46. }
  47. if (str[i] == 'e' || str[i] == 'E') {
  48. if (str[i + 1] == '-' || str[i + 1] == '+')
  49. exp_val = atoi(str + i + 2);
  50. else
  51. exp_val = atoi(str + i + 1);
  52. is_scientific = true;
  53. continue;
  54. }
  55. if (str[i] < '0' || str[i] > '9' || exp_val != 0) {
  56. return {};
  57. continue;
  58. }
  59. if (is_fractional) {
  60. fraction *= 10;
  61. fraction += str[i] - '0';
  62. weight *= 10;
  63. } else {
  64. value = value * 10;
  65. value += str[i] - '0';
  66. }
  67. }
  68. fraction /= weight;
  69. value += fraction;
  70. if (is_scientific) {
  71. bool divide = exp_val < 0;
  72. if (divide)
  73. exp_val *= -1;
  74. for (int i = 0; i < exp_val; i++) {
  75. if (divide)
  76. value /= 10;
  77. else
  78. value *= 10;
  79. }
  80. }
  81. return is_negative ? -value : value;
  82. }
  83. static Optional<float> parse_number(const StringView& view)
  84. {
  85. if (view.length() >= 2 && view[view.length() - 2] == 'p' && view[view.length() - 1] == 'x')
  86. return parse_number(view.substring_view(0, view.length() - 2));
  87. return try_parse_float(view);
  88. }
  89. NonnullRefPtr<StyleValue> parse_css_value(const StringView& string)
  90. {
  91. auto number = parse_number(string);
  92. if (number.has_value())
  93. return LengthStyleValue::create(Length(number.value(), Length::Type::Absolute));
  94. if (string == "inherit")
  95. return InheritStyleValue::create();
  96. if (string == "initial")
  97. return InitialStyleValue::create();
  98. if (string == "auto")
  99. return LengthStyleValue::create(Length());
  100. auto color = parse_css_color(string);
  101. if (color.has_value())
  102. return ColorStyleValue::create(color.value());
  103. if (string == "-libhtml-link")
  104. return IdentifierStyleValue::create(CSS::ValueID::VendorSpecificLink);
  105. return StringStyleValue::create(string);
  106. }
  107. class CSSParser {
  108. public:
  109. CSSParser(const StringView& input)
  110. : css(input)
  111. {
  112. }
  113. bool next_is(const char* str) const
  114. {
  115. int len = strlen(str);
  116. for (int i = 0; i < len; ++i) {
  117. if (peek(i) != str[i])
  118. return false;
  119. }
  120. return true;
  121. }
  122. char peek(int offset = 0) const
  123. {
  124. if ((index + offset) < css.length())
  125. return css[index + offset];
  126. return 0;
  127. }
  128. char consume_specific(char ch)
  129. {
  130. if (peek() != ch) {
  131. dbg() << "peek() != '" << ch << "'";
  132. }
  133. PARSE_ASSERT(peek() == ch);
  134. PARSE_ASSERT(index < css.length());
  135. ++index;
  136. return ch;
  137. }
  138. char consume_one()
  139. {
  140. PARSE_ASSERT(index < css.length());
  141. return css[index++];
  142. };
  143. bool consume_whitespace_or_comments()
  144. {
  145. int original_index = index;
  146. bool in_comment = false;
  147. for (; index < css.length(); ++index) {
  148. char ch = peek();
  149. if (isspace(ch))
  150. continue;
  151. if (!in_comment && ch == '/' && peek(1) == '*') {
  152. in_comment = true;
  153. ++index;
  154. continue;
  155. }
  156. if (in_comment && ch == '*' && peek(1) == '/') {
  157. in_comment = false;
  158. ++index;
  159. continue;
  160. }
  161. if (in_comment)
  162. continue;
  163. break;
  164. }
  165. return original_index != index;
  166. }
  167. bool is_valid_selector_char(char ch) const
  168. {
  169. return isalnum(ch) || ch == '-' || ch == '_' || ch == '(' || ch == ')' || ch == '@';
  170. }
  171. bool is_combinator(char ch) const
  172. {
  173. return ch == '~' || ch == '>' || ch == '+';
  174. }
  175. Optional<Selector::SimpleSelector> parse_selector_component()
  176. {
  177. if (consume_whitespace_or_comments())
  178. return {};
  179. if (peek() == '{' || peek() == ',')
  180. return {};
  181. Selector::SimpleSelector::Type type;
  182. if (peek() == '*') {
  183. type = Selector::SimpleSelector::Type::Universal;
  184. consume_one();
  185. return Selector::SimpleSelector {
  186. type,
  187. Selector::SimpleSelector::PseudoClass::None,
  188. String(),
  189. Selector::SimpleSelector::AttributeMatchType::None,
  190. String(),
  191. String()
  192. };
  193. }
  194. if (peek() == '.') {
  195. type = Selector::SimpleSelector::Type::Class;
  196. consume_one();
  197. } else if (peek() == '#') {
  198. type = Selector::SimpleSelector::Type::Id;
  199. consume_one();
  200. } else if (isalpha(peek())) {
  201. type = Selector::SimpleSelector::Type::TagName;
  202. } else {
  203. type = Selector::SimpleSelector::Type::Universal;
  204. }
  205. if (type != Selector::SimpleSelector::Type::Universal) {
  206. while (is_valid_selector_char(peek()))
  207. buffer.append(consume_one());
  208. PARSE_ASSERT(!buffer.is_null());
  209. }
  210. Selector::SimpleSelector component {
  211. type,
  212. Selector::SimpleSelector::PseudoClass::None,
  213. String::copy(buffer),
  214. Selector::SimpleSelector::AttributeMatchType::None,
  215. String(),
  216. String()
  217. };
  218. buffer.clear();
  219. if (peek() == '[') {
  220. Selector::SimpleSelector::AttributeMatchType attribute_match_type = Selector::SimpleSelector::AttributeMatchType::HasAttribute;
  221. String attribute_name;
  222. String attribute_value;
  223. bool in_value = false;
  224. consume_specific('[');
  225. char expected_end_of_attribute_selector = ']';
  226. while (peek() != expected_end_of_attribute_selector) {
  227. char ch = consume_one();
  228. if (ch == '=') {
  229. attribute_match_type = Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
  230. attribute_name = String::copy(buffer);
  231. buffer.clear();
  232. in_value = true;
  233. consume_whitespace_or_comments();
  234. if (peek() == '\'') {
  235. expected_end_of_attribute_selector = '\'';
  236. consume_one();
  237. } else if (peek() == '"') {
  238. expected_end_of_attribute_selector = '"';
  239. consume_one();
  240. }
  241. continue;
  242. }
  243. buffer.append(ch);
  244. }
  245. if (in_value)
  246. attribute_value = String::copy(buffer);
  247. else
  248. attribute_name = String::copy(buffer);
  249. buffer.clear();
  250. component.attribute_match_type = attribute_match_type;
  251. component.attribute_name = attribute_name;
  252. component.attribute_value = attribute_value;
  253. if (expected_end_of_attribute_selector != ']')
  254. consume_specific(expected_end_of_attribute_selector);
  255. consume_whitespace_or_comments();
  256. consume_specific(']');
  257. }
  258. if (peek() == ':') {
  259. // FIXME: Implement pseudo elements.
  260. [[maybe_unused]] bool is_pseudo_element = false;
  261. consume_one();
  262. if (peek() == ':') {
  263. is_pseudo_element = true;
  264. consume_one();
  265. }
  266. while (is_valid_selector_char(peek()))
  267. buffer.append(consume_one());
  268. auto pseudo_name = String::copy(buffer);
  269. buffer.clear();
  270. if (pseudo_name == "link")
  271. component.pseudo_class = Selector::SimpleSelector::PseudoClass::Link;
  272. else if (pseudo_name == "hover")
  273. component.pseudo_class = Selector::SimpleSelector::PseudoClass::Hover;
  274. }
  275. return component;
  276. }
  277. Optional<Selector::ComplexSelector> parse_selector_component_list()
  278. {
  279. auto relation = Selector::ComplexSelector::Relation::Descendant;
  280. if (peek() == '{' || peek() == ',')
  281. return {};
  282. if (is_combinator(peek())) {
  283. switch (peek()) {
  284. case '>':
  285. relation = Selector::ComplexSelector::Relation::ImmediateChild;
  286. break;
  287. case '+':
  288. relation = Selector::ComplexSelector::Relation::AdjacentSibling;
  289. break;
  290. case '~':
  291. relation = Selector::ComplexSelector::Relation::GeneralSibling;
  292. break;
  293. }
  294. consume_one();
  295. consume_whitespace_or_comments();
  296. }
  297. consume_whitespace_or_comments();
  298. Vector<Selector::SimpleSelector> components;
  299. for (;;) {
  300. dbg() << "calling parse_selector_component at index " << index << ", peek=" << peek();
  301. auto component = parse_selector_component();
  302. if (!component.has_value())
  303. break;
  304. components.append(component.value());
  305. PARSE_ASSERT(components.size() < 10);
  306. }
  307. return Selector::ComplexSelector { relation, move(components) };
  308. }
  309. void parse_selector()
  310. {
  311. Vector<Selector::ComplexSelector> component_lists;
  312. for (;;) {
  313. auto component_list = parse_selector_component_list();
  314. if (component_list.has_value())
  315. component_lists.append(component_list.value());
  316. consume_whitespace_or_comments();
  317. if (peek() == ',' || peek() == '{')
  318. break;
  319. }
  320. if (component_lists.is_empty())
  321. return;
  322. component_lists.first().relation = Selector::ComplexSelector::Relation::None;
  323. current_rule.selectors.append(Selector(move(component_lists)));
  324. };
  325. void parse_selector_list()
  326. {
  327. for (;;) {
  328. parse_selector();
  329. consume_whitespace_or_comments();
  330. if (peek() == ',') {
  331. consume_one();
  332. continue;
  333. }
  334. if (peek() == '{')
  335. break;
  336. }
  337. }
  338. bool is_valid_property_name_char(char ch) const
  339. {
  340. return ch && !isspace(ch) && ch != ':';
  341. }
  342. bool is_valid_property_value_char(char ch) const
  343. {
  344. return ch && ch != '!' && ch != ';' && ch != '}';
  345. }
  346. struct ValueAndImportant {
  347. String value;
  348. bool important { false };
  349. };
  350. ValueAndImportant consume_css_value()
  351. {
  352. buffer.clear();
  353. int paren_nesting_level = 0;
  354. bool important = false;
  355. for (;;) {
  356. char ch = peek();
  357. if (ch == '(') {
  358. ++paren_nesting_level;
  359. buffer.append(consume_one());
  360. continue;
  361. }
  362. if (ch == ')') {
  363. PARSE_ASSERT(paren_nesting_level > 0);
  364. --paren_nesting_level;
  365. buffer.append(consume_one());
  366. continue;
  367. }
  368. if (paren_nesting_level > 0) {
  369. buffer.append(consume_one());
  370. continue;
  371. }
  372. if (next_is("!important")) {
  373. consume_specific('!');
  374. consume_specific('i');
  375. consume_specific('m');
  376. consume_specific('p');
  377. consume_specific('o');
  378. consume_specific('r');
  379. consume_specific('t');
  380. consume_specific('a');
  381. consume_specific('n');
  382. consume_specific('t');
  383. important = true;
  384. continue;
  385. }
  386. if (next_is("/*")) {
  387. consume_whitespace_or_comments();
  388. continue;
  389. }
  390. if (!ch)
  391. break;
  392. if (ch == '}')
  393. break;
  394. if (ch == ';')
  395. break;
  396. buffer.append(consume_one());
  397. }
  398. // Remove trailing whitespace.
  399. while (!buffer.is_empty() && isspace(buffer.last()))
  400. buffer.take_last();
  401. auto string = String::copy(buffer);
  402. buffer.clear();
  403. return { string, important };
  404. }
  405. Optional<StyleProperty> parse_property()
  406. {
  407. consume_whitespace_or_comments();
  408. if (peek() == ';') {
  409. consume_one();
  410. return {};
  411. }
  412. if (peek() == '}')
  413. return {};
  414. buffer.clear();
  415. while (is_valid_property_name_char(peek()))
  416. buffer.append(consume_one());
  417. auto property_name = String::copy(buffer);
  418. buffer.clear();
  419. consume_whitespace_or_comments();
  420. consume_specific(':');
  421. consume_whitespace_or_comments();
  422. auto [property_value, important] = consume_css_value();
  423. consume_whitespace_or_comments();
  424. if (peek() && peek() != '}')
  425. consume_specific(';');
  426. auto property_id = CSS::property_id_from_string(property_name);
  427. return StyleProperty { property_id, parse_css_value(property_value), important };
  428. }
  429. void parse_declaration()
  430. {
  431. for (;;) {
  432. auto property = parse_property();
  433. if (property.has_value())
  434. current_rule.properties.append(property.value());
  435. consume_whitespace_or_comments();
  436. if (peek() == '}')
  437. break;
  438. }
  439. }
  440. void parse_rule()
  441. {
  442. consume_whitespace_or_comments();
  443. if (index >= css.length())
  444. return;
  445. // FIXME: We ignore @media rules for now.
  446. if (next_is("@media")) {
  447. while (peek() != '{')
  448. consume_one();
  449. int level = 0;
  450. for (;;) {
  451. auto ch = consume_one();
  452. if (ch == '{') {
  453. ++level;
  454. } else if (ch == '}') {
  455. --level;
  456. if (level == 0)
  457. break;
  458. }
  459. }
  460. consume_whitespace_or_comments();
  461. return;
  462. }
  463. parse_selector_list();
  464. consume_specific('{');
  465. parse_declaration();
  466. consume_specific('}');
  467. rules.append(StyleRule::create(move(current_rule.selectors), StyleDeclaration::create(move(current_rule.properties))));
  468. consume_whitespace_or_comments();
  469. }
  470. RefPtr<StyleSheet> parse_sheet()
  471. {
  472. while (index < css.length()) {
  473. parse_rule();
  474. }
  475. return StyleSheet::create(move(rules));
  476. }
  477. RefPtr<StyleDeclaration> parse_standalone_declaration()
  478. {
  479. consume_whitespace_or_comments();
  480. for (;;) {
  481. auto property = parse_property();
  482. if (property.has_value())
  483. current_rule.properties.append(property.value());
  484. consume_whitespace_or_comments();
  485. if (!peek())
  486. break;
  487. }
  488. return StyleDeclaration::create(move(current_rule.properties));
  489. }
  490. private:
  491. NonnullRefPtrVector<StyleRule> rules;
  492. struct CurrentRule {
  493. Vector<Selector> selectors;
  494. Vector<StyleProperty> properties;
  495. };
  496. CurrentRule current_rule;
  497. Vector<char> buffer;
  498. int index = 0;
  499. StringView css;
  500. };
  501. RefPtr<StyleSheet> parse_css(const StringView& css)
  502. {
  503. CSSParser parser(css);
  504. return parser.parse_sheet();
  505. }
  506. RefPtr<StyleDeclaration> parse_css_declaration(const StringView& css)
  507. {
  508. CSSParser parser(css);
  509. return parser.parse_standalone_declaration();
  510. }