Tokenizer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/SourceLocation.h>
  8. #include <AK/Vector.h>
  9. #include <LibTextCodec/Decoder.h>
  10. #include <LibWeb/CSS/Parser/Tokenizer.h>
  11. #define CSS_TOKENIZER_TRACE 0
  12. //U+FFFD REPLACEMENT CHARACTER (�)
  13. #define REPLACEMENT_CHARACTER 0xFFFD
  14. static inline void log_parse_error(const SourceLocation& location = SourceLocation::current())
  15. {
  16. dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location);
  17. }
  18. static inline bool is_quotation_mark(u32 code_point)
  19. {
  20. return code_point == 0x22;
  21. }
  22. static inline bool is_greater_than_maximum_allowed_code_point(u32 code_point)
  23. {
  24. return code_point > 0x10FFFF;
  25. }
  26. static inline bool is_low_line(u32 code_point)
  27. {
  28. return code_point == 0x5F;
  29. }
  30. static inline bool is_name_start_code_point(u32 code_point)
  31. {
  32. return is_ascii_alpha(code_point) || !is_ascii(code_point) || is_low_line(code_point);
  33. }
  34. static inline bool is_hyphen_minus(u32 code_point)
  35. {
  36. return code_point == 0x2D;
  37. }
  38. static inline bool is_name_code_point(u32 code_point)
  39. {
  40. return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
  41. }
  42. static inline bool is_non_printable(u32 code_point)
  43. {
  44. return code_point <= 0x8 || code_point == 0xB || (code_point >= 0xE && code_point <= 0x1F) || code_point == 0x7F;
  45. }
  46. static inline bool is_number_sign(u32 code_point)
  47. {
  48. return code_point == 0x23;
  49. }
  50. static inline bool is_reverse_solidus(u32 code_point)
  51. {
  52. return code_point == 0x5C;
  53. }
  54. static inline bool is_apostrophe(u32 code_point)
  55. {
  56. return code_point == 0x27;
  57. }
  58. static inline bool is_left_paren(u32 code_point)
  59. {
  60. return code_point == 0x28;
  61. }
  62. static inline bool is_right_paren(u32 code_point)
  63. {
  64. return code_point == 0x29;
  65. }
  66. static inline bool is_plus_sign(u32 code_point)
  67. {
  68. return code_point == 0x2B;
  69. }
  70. static inline bool is_comma(u32 code_point)
  71. {
  72. return code_point == 0x2C;
  73. }
  74. static inline bool is_full_stop(u32 code_point)
  75. {
  76. return code_point == 0x2E;
  77. }
  78. static inline bool is_newline(u32 code_point)
  79. {
  80. return code_point == 0xA;
  81. }
  82. static inline bool is_asterisk(u32 code_point)
  83. {
  84. return code_point == 0x2A;
  85. }
  86. static inline bool is_solidus(u32 code_point)
  87. {
  88. return code_point == 0x2F;
  89. }
  90. static inline bool is_colon(u32 code_point)
  91. {
  92. return code_point == 0x3A;
  93. }
  94. static inline bool is_semicolon(u32 code_point)
  95. {
  96. return code_point == 0x3B;
  97. }
  98. static inline bool is_less_than_sign(u32 code_point)
  99. {
  100. return code_point == 0x3C;
  101. }
  102. static inline bool is_greater_than_sign(u32 code_point)
  103. {
  104. return code_point == 0x3E;
  105. }
  106. static inline bool is_at(u32 code_point)
  107. {
  108. return code_point == 0x40;
  109. }
  110. static inline bool is_open_square_bracket(u32 code_point)
  111. {
  112. return code_point == 0x5B;
  113. }
  114. static inline bool is_closed_square_bracket(u32 code_point)
  115. {
  116. return code_point == 0x5D;
  117. }
  118. static inline bool is_open_curly_bracket(u32 code_point)
  119. {
  120. return code_point == 0x7B;
  121. }
  122. static inline bool is_closed_curly_bracket(u32 code_point)
  123. {
  124. return code_point == 0x7D;
  125. }
  126. static inline bool is_whitespace(u32 code_point)
  127. {
  128. return code_point == 0x9 || code_point == 0xA || code_point == 0x20;
  129. }
  130. static inline bool is_percent(u32 code_point)
  131. {
  132. return code_point == 0x25;
  133. }
  134. static inline bool is_exclamation_mark(u32 code_point)
  135. {
  136. return code_point == 0x21;
  137. }
  138. static inline bool is_e(u32 code_point)
  139. {
  140. return code_point == 0x65;
  141. }
  142. static inline bool is_E(u32 code_point)
  143. {
  144. return code_point == 0x45;
  145. }
  146. namespace Web::CSS {
  147. Tokenizer::Tokenizer(const StringView& input, const String& encoding)
  148. {
  149. auto* decoder = TextCodec::decoder_for(encoding);
  150. VERIFY(decoder);
  151. // FIXME: preprocess the stream
  152. // https://www.w3.org/TR/css-syntax-3/#input-preprocessing
  153. m_decoded_input = decoder->to_utf8(input);
  154. m_utf8_view = Utf8View(m_decoded_input);
  155. m_utf8_iterator = m_utf8_view.begin();
  156. }
  157. Vector<Token> Tokenizer::parse()
  158. {
  159. Vector<Token> tokens;
  160. for (;;) {
  161. auto token = consume_a_token();
  162. tokens.append(token);
  163. if (token.is_eof()) {
  164. return tokens;
  165. }
  166. }
  167. }
  168. Optional<u32> Tokenizer::next_code_point()
  169. {
  170. if (m_utf8_iterator == m_utf8_view.end())
  171. return {};
  172. m_prev_utf8_iterator = m_utf8_iterator;
  173. ++m_utf8_iterator;
  174. dbgln_if(CSS_TOKENIZER_TRACE, "(Tokenizer) Next code_point: {:c}", (char)*m_prev_utf8_iterator);
  175. return *m_prev_utf8_iterator;
  176. }
  177. Optional<u32> Tokenizer::peek_code_point(size_t offset) const
  178. {
  179. auto it = m_utf8_iterator;
  180. for (size_t i = 0; i < offset && it != m_utf8_view.end(); ++i)
  181. ++it;
  182. if (it == m_utf8_view.end())
  183. return {};
  184. return *it;
  185. }
  186. Optional<U32Twin> Tokenizer::peek_twin() const
  187. {
  188. U32Twin values;
  189. auto it = m_utf8_iterator;
  190. for (size_t i = 0; i < 2 && it != m_utf8_view.end(); ++i) {
  191. if (it == m_utf8_view.end())
  192. return {};
  193. values.set(i, *it);
  194. ++it;
  195. }
  196. return values;
  197. }
  198. Optional<U32Triplet> Tokenizer::peek_triplet() const
  199. {
  200. U32Triplet values;
  201. auto it = m_utf8_iterator;
  202. for (size_t i = 0; i < 3 && it != m_utf8_view.end(); ++i) {
  203. if (it == m_utf8_view.end())
  204. return {};
  205. values.set(i, *it);
  206. ++it;
  207. }
  208. return values;
  209. }
  210. Token Tokenizer::create_new_token(Token::TokenType type)
  211. {
  212. Token token = {};
  213. token.m_type = type;
  214. return token;
  215. }
  216. Token Tokenizer::create_value_token(Token::TokenType type, String value)
  217. {
  218. Token token;
  219. token.m_type = type;
  220. token.m_value.append(move(value));
  221. return token;
  222. }
  223. Token Tokenizer::create_value_token(Token::TokenType type, u32 value)
  224. {
  225. Token token = {};
  226. token.m_type = type;
  227. token.m_value.append_code_point(value);
  228. return token;
  229. }
  230. u32 Tokenizer::consume_escaped_code_point()
  231. {
  232. auto code_point = next_code_point();
  233. if (!code_point.has_value()) {
  234. log_parse_error();
  235. return REPLACEMENT_CHARACTER;
  236. }
  237. auto input = code_point.value();
  238. if (is_ascii_hex_digit(input)) {
  239. StringBuilder builder;
  240. builder.append_code_point(input);
  241. size_t counter = 0;
  242. while (is_ascii_hex_digit(peek_code_point().value()) && counter++ < 5) {
  243. builder.append_code_point(next_code_point().value());
  244. }
  245. if (is_whitespace(peek_code_point().value())) {
  246. (void)next_code_point();
  247. }
  248. auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16);
  249. if (unhexed == 0 || is_unicode_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
  250. return REPLACEMENT_CHARACTER;
  251. }
  252. return unhexed;
  253. }
  254. if (!input) {
  255. log_parse_error();
  256. return REPLACEMENT_CHARACTER;
  257. }
  258. return input;
  259. }
  260. Token Tokenizer::consume_an_ident_like_token()
  261. {
  262. auto string = consume_a_name();
  263. if (string.equals_ignoring_case("url") && is_left_paren(peek_code_point().value())) {
  264. (void)next_code_point();
  265. for (;;) {
  266. auto maybe_whitespace = peek_twin().value();
  267. if (!(is_whitespace(maybe_whitespace.first) && is_whitespace(maybe_whitespace.second))) {
  268. break;
  269. }
  270. (void)next_code_point();
  271. }
  272. auto next_two = peek_twin().value();
  273. // if one of these ", ', ' "', " '"
  274. if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) {
  275. return create_value_token(Token::TokenType::Function, string);
  276. }
  277. return consume_a_url_token();
  278. }
  279. if (is_left_paren(peek_code_point().value())) {
  280. (void)next_code_point();
  281. return create_value_token(Token::TokenType::Function, string);
  282. }
  283. return create_value_token(Token::TokenType::Ident, string);
  284. }
  285. CSSNumber Tokenizer::consume_a_number()
  286. {
  287. StringBuilder repr;
  288. Token::NumberType type = Token::NumberType::Integer;
  289. auto next_input = peek_code_point().value();
  290. if (is_plus_sign(next_input) || is_hyphen_minus(next_input)) {
  291. repr.append_code_point(next_code_point().value());
  292. }
  293. for (;;) {
  294. auto digits = peek_code_point().value();
  295. if (!is_ascii_digit(digits))
  296. break;
  297. repr.append_code_point(next_code_point().value());
  298. }
  299. auto maybe_number = peek_twin().value();
  300. if (is_full_stop(maybe_number.first) && is_ascii_digit(maybe_number.second)) {
  301. repr.append_code_point(next_code_point().value());
  302. repr.append_code_point(next_code_point().value());
  303. type = Token::NumberType::Number;
  304. for (;;) {
  305. auto digits = peek_code_point();
  306. if (digits.has_value() && !is_ascii_digit(digits.value()))
  307. break;
  308. repr.append_code_point(next_code_point().value());
  309. }
  310. }
  311. auto maybe_exp = peek_triplet().value();
  312. if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) {
  313. if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) {
  314. if (is_ascii_digit(maybe_exp.third)) {
  315. repr.append_code_point(next_code_point().value());
  316. repr.append_code_point(next_code_point().value());
  317. repr.append_code_point(next_code_point().value());
  318. }
  319. } else if (is_ascii_digit(maybe_exp.second)) {
  320. repr.append_code_point(next_code_point().value());
  321. repr.append_code_point(next_code_point().value());
  322. }
  323. type = Token::NumberType::Number;
  324. for (;;) {
  325. auto digits = peek_code_point().value();
  326. if (!is_ascii_digit(digits))
  327. break;
  328. repr.append_code_point(next_code_point().value());
  329. }
  330. }
  331. return { repr.to_string(), type };
  332. }
  333. String Tokenizer::consume_a_name()
  334. {
  335. StringBuilder result;
  336. for (;;) {
  337. auto input = next_code_point().value();
  338. if (is_name_code_point(input)) {
  339. result.append_code_point(input);
  340. continue;
  341. }
  342. auto next = peek_code_point();
  343. if (next.has_value() && is_valid_escape_sequence({ input, next.value() })) {
  344. result.append_code_point(consume_escaped_code_point());
  345. continue;
  346. }
  347. break;
  348. }
  349. reconsume_current_input_code_point();
  350. return result.to_string();
  351. }
  352. Token Tokenizer::consume_a_url_token()
  353. {
  354. auto token = create_new_token(Token::TokenType::Url);
  355. for (;;) {
  356. if (!is_whitespace(peek_code_point().value())) {
  357. break;
  358. }
  359. (void)next_code_point();
  360. }
  361. for (;;) {
  362. auto code_point = peek_code_point();
  363. if (!code_point.has_value()) {
  364. log_parse_error();
  365. return token;
  366. }
  367. auto input = code_point.value();
  368. if (is_right_paren(input)) {
  369. (void)next_code_point();
  370. return token;
  371. }
  372. if (is_whitespace(input)) {
  373. for (;;) {
  374. if (!is_whitespace(peek_code_point().value())) {
  375. break;
  376. }
  377. code_point = next_code_point();
  378. }
  379. if (!code_point.has_value()) {
  380. log_parse_error();
  381. return token;
  382. }
  383. input = code_point.value();
  384. if (is_right_paren(input)) {
  385. return token;
  386. }
  387. consume_the_remnants_of_a_bad_url();
  388. return create_new_token(Token::TokenType::BadUrl);
  389. }
  390. if (is_quotation_mark(input) || is_apostrophe(input) || is_left_paren(input) || is_non_printable(input)) {
  391. log_parse_error();
  392. (void)next_code_point();
  393. consume_the_remnants_of_a_bad_url();
  394. return create_new_token(Token::TokenType::BadUrl);
  395. }
  396. if (is_reverse_solidus(input)) {
  397. if (is_valid_escape_sequence()) {
  398. token.m_value.append_code_point(consume_escaped_code_point());
  399. } else {
  400. log_parse_error();
  401. (void)next_code_point();
  402. consume_the_remnants_of_a_bad_url();
  403. return create_new_token(Token::TokenType::BadUrl);
  404. }
  405. }
  406. token.m_value.append_code_point(next_code_point().value());
  407. }
  408. }
  409. void Tokenizer::consume_the_remnants_of_a_bad_url()
  410. {
  411. for (;;) {
  412. auto next = peek_code_point();
  413. if (!next.has_value()) {
  414. return;
  415. }
  416. auto input = next.value();
  417. if (is_right_paren(input)) {
  418. (void)next_code_point();
  419. return;
  420. }
  421. if (is_valid_escape_sequence()) {
  422. [[maybe_unused]] auto cp = consume_escaped_code_point();
  423. }
  424. (void)next_code_point();
  425. }
  426. }
  427. void Tokenizer::reconsume_current_input_code_point()
  428. {
  429. m_utf8_iterator = m_prev_utf8_iterator;
  430. }
  431. Token Tokenizer::consume_a_numeric_token()
  432. {
  433. auto number = consume_a_number();
  434. if (would_start_an_identifier()) {
  435. auto token = create_new_token(Token::TokenType::Dimension);
  436. token.m_value.append(number.value);
  437. token.m_number_type = number.type;
  438. auto unit = consume_a_name();
  439. token.m_unit.append(unit);
  440. return token;
  441. }
  442. if (is_percent(peek_code_point().value())) {
  443. (void)next_code_point();
  444. auto token = create_new_token(Token::TokenType::Percentage);
  445. token.m_value.append(number.value);
  446. return token;
  447. }
  448. auto token = create_new_token(Token::TokenType::Number);
  449. token.m_value.append(number.value);
  450. token.m_number_type = number.type;
  451. return token;
  452. }
  453. bool Tokenizer::starts_with_a_number() const
  454. {
  455. return starts_with_a_number(peek_triplet().value());
  456. }
  457. bool Tokenizer::starts_with_a_number(U32Triplet values)
  458. {
  459. if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) {
  460. if (is_ascii_digit(values.second))
  461. return true;
  462. if (is_full_stop(values.second) && is_ascii_digit(values.third))
  463. return true;
  464. return false;
  465. }
  466. if (is_full_stop(values.first))
  467. return is_ascii_digit(values.second);
  468. if (is_ascii_digit(values.first))
  469. return true;
  470. return false;
  471. }
  472. bool Tokenizer::is_valid_escape_sequence()
  473. {
  474. return is_valid_escape_sequence(peek_twin().value());
  475. }
  476. bool Tokenizer::is_valid_escape_sequence(U32Twin values)
  477. {
  478. if (!is_reverse_solidus(values.first)) {
  479. return false;
  480. }
  481. if (is_newline(values.second)) {
  482. return false;
  483. }
  484. return true;
  485. }
  486. bool Tokenizer::would_start_an_identifier()
  487. {
  488. return would_start_an_identifier(peek_triplet().value());
  489. }
  490. bool Tokenizer::would_start_an_identifier(U32Triplet values)
  491. {
  492. if (is_hyphen_minus(values.first)) {
  493. if (is_name_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23()))
  494. return true;
  495. return false;
  496. }
  497. if (is_name_start_code_point(values.first)) {
  498. return true;
  499. }
  500. if (is_reverse_solidus(values.first)) {
  501. if (is_valid_escape_sequence(values.to_twin_12()))
  502. return true;
  503. return false;
  504. }
  505. return false;
  506. }
  507. Token Tokenizer::consume_string_token(u32 ending_code_point)
  508. {
  509. auto token = create_new_token(Token::TokenType::String);
  510. for (;;) {
  511. auto code_point = next_code_point();
  512. if (!code_point.has_value()) {
  513. log_parse_error();
  514. return token;
  515. }
  516. auto input = code_point.value();
  517. if (input == ending_code_point)
  518. return token;
  519. if (is_newline(input)) {
  520. reconsume_current_input_code_point();
  521. return create_new_token(Token::TokenType::BadString);
  522. }
  523. if (is_reverse_solidus(input)) {
  524. auto next_input = peek_code_point();
  525. if (!next_input.has_value())
  526. continue;
  527. if (is_newline(next_input.value())) {
  528. (void)next_code_point();
  529. continue;
  530. }
  531. auto escaped = consume_escaped_code_point();
  532. token.m_value.append_code_point(escaped);
  533. }
  534. token.m_value.append_code_point(input);
  535. }
  536. }
  537. void Tokenizer::consume_comments()
  538. {
  539. start:
  540. auto peek = peek_twin();
  541. if (!peek.has_value()) {
  542. log_parse_error();
  543. return;
  544. }
  545. auto twin = peek.value();
  546. if (!(is_solidus(twin.first) && is_asterisk(twin.second)))
  547. return;
  548. (void)next_code_point();
  549. (void)next_code_point();
  550. for (;;) {
  551. auto peek_inner = peek_twin();
  552. if (!peek_inner.has_value()) {
  553. log_parse_error();
  554. return;
  555. }
  556. auto twin_inner = peek_inner.value();
  557. if (is_asterisk(twin_inner.first) && is_solidus(twin_inner.second)) {
  558. (void)next_code_point();
  559. (void)next_code_point();
  560. goto start;
  561. }
  562. (void)next_code_point();
  563. }
  564. }
  565. Token Tokenizer::consume_a_token()
  566. {
  567. consume_comments();
  568. auto code_point = next_code_point();
  569. if (!code_point.has_value()) {
  570. return create_new_token(Token::TokenType::EndOfFile);
  571. }
  572. auto input = code_point.value();
  573. if (is_whitespace(input)) {
  574. dbgln_if(CSS_TOKENIZER_TRACE, "is whitespace");
  575. while (is_whitespace(peek_code_point().value()))
  576. (void)next_code_point();
  577. return create_new_token(Token::TokenType::Whitespace);
  578. }
  579. if (is_quotation_mark(input)) {
  580. dbgln_if(CSS_TOKENIZER_TRACE, "is quotation mark");
  581. return consume_string_token(input);
  582. }
  583. if (is_number_sign(input)) {
  584. dbgln_if(CSS_TOKENIZER_TRACE, "is number sign");
  585. auto next_input = peek_code_point().value();
  586. auto maybe_escape = peek_twin().value();
  587. if (is_name_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
  588. auto token = create_new_token(Token::TokenType::Hash);
  589. if (would_start_an_identifier())
  590. token.m_hash_type = Token::HashType::Id;
  591. auto name = consume_a_name();
  592. token.m_value.append(name);
  593. return token;
  594. }
  595. return create_value_token(Token::TokenType::Delim, input);
  596. }
  597. if (is_apostrophe(input)) {
  598. dbgln_if(CSS_TOKENIZER_TRACE, "is apostrophe");
  599. return consume_string_token(input);
  600. }
  601. if (is_left_paren(input)) {
  602. dbgln_if(CSS_TOKENIZER_TRACE, "is left paren");
  603. return create_new_token(Token::TokenType::OpenParen);
  604. }
  605. if (is_right_paren(input)) {
  606. dbgln_if(CSS_TOKENIZER_TRACE, "is right paren");
  607. return create_new_token(Token::TokenType::CloseParen);
  608. }
  609. if (is_plus_sign(input)) {
  610. dbgln_if(CSS_TOKENIZER_TRACE, "is plus sign");
  611. if (starts_with_a_number()) {
  612. reconsume_current_input_code_point();
  613. return consume_a_numeric_token();
  614. }
  615. return create_value_token(Token::TokenType::Delim, input);
  616. }
  617. if (is_comma(input)) {
  618. dbgln_if(CSS_TOKENIZER_TRACE, "is comma");
  619. return create_new_token(Token::TokenType::Comma);
  620. }
  621. if (is_hyphen_minus(input)) {
  622. dbgln_if(CSS_TOKENIZER_TRACE, "is hyphen minus");
  623. if (starts_with_a_number()) {
  624. reconsume_current_input_code_point();
  625. return consume_a_numeric_token();
  626. }
  627. auto next_twin = peek_twin().value();
  628. if (is_hyphen_minus(next_twin.first) && is_greater_than_sign(next_twin.second)) {
  629. (void)next_code_point();
  630. (void)next_code_point();
  631. return create_new_token(Token::TokenType::CDC);
  632. }
  633. if (would_start_an_identifier()) {
  634. reconsume_current_input_code_point();
  635. return consume_an_ident_like_token();
  636. }
  637. return create_value_token(Token::TokenType::Delim, input);
  638. }
  639. if (is_full_stop(input)) {
  640. dbgln_if(CSS_TOKENIZER_TRACE, "is full stop");
  641. if (starts_with_a_number()) {
  642. reconsume_current_input_code_point();
  643. return consume_a_numeric_token();
  644. }
  645. return create_value_token(Token::TokenType::Delim, input);
  646. }
  647. if (is_colon(input)) {
  648. dbgln_if(CSS_TOKENIZER_TRACE, "is colon");
  649. return create_new_token(Token::TokenType::Colon);
  650. }
  651. if (is_semicolon(input)) {
  652. dbgln_if(CSS_TOKENIZER_TRACE, "is semicolon");
  653. return create_new_token(Token::TokenType::Semicolon);
  654. }
  655. if (is_less_than_sign(input)) {
  656. dbgln_if(CSS_TOKENIZER_TRACE, "is less than");
  657. auto maybe_cdo = peek_triplet().value();
  658. if (is_exclamation_mark(maybe_cdo.first) && is_hyphen_minus(maybe_cdo.second) && is_hyphen_minus(maybe_cdo.third)) {
  659. (void)next_code_point();
  660. (void)next_code_point();
  661. (void)next_code_point();
  662. return create_new_token(Token::TokenType::CDO);
  663. }
  664. return create_value_token(Token::TokenType::Delim, input);
  665. }
  666. if (is_at(input)) {
  667. dbgln_if(CSS_TOKENIZER_TRACE, "is at");
  668. if (would_start_an_identifier()) {
  669. auto name = consume_a_name();
  670. return create_value_token(Token::TokenType::AtKeyword, input);
  671. }
  672. return create_value_token(Token::TokenType::Delim, input);
  673. }
  674. if (is_open_square_bracket(input)) {
  675. dbgln_if(CSS_TOKENIZER_TRACE, "is open square");
  676. return create_new_token(Token::TokenType::OpenSquare);
  677. }
  678. if (is_reverse_solidus(input)) {
  679. dbgln_if(CSS_TOKENIZER_TRACE, "is reverse solidus");
  680. if (is_valid_escape_sequence()) {
  681. reconsume_current_input_code_point();
  682. return consume_an_ident_like_token();
  683. }
  684. log_parse_error();
  685. return create_value_token(Token::TokenType::Delim, input);
  686. }
  687. if (is_closed_square_bracket(input)) {
  688. dbgln_if(CSS_TOKENIZER_TRACE, "is closed square");
  689. return create_new_token(Token::TokenType::CloseSquare);
  690. }
  691. if (is_open_curly_bracket(input)) {
  692. dbgln_if(CSS_TOKENIZER_TRACE, "is open curly");
  693. return create_new_token(Token::TokenType::OpenCurly);
  694. }
  695. if (is_closed_curly_bracket(input)) {
  696. dbgln_if(CSS_TOKENIZER_TRACE, "is closed curly");
  697. return create_new_token(Token::TokenType::CloseCurly);
  698. }
  699. if (is_ascii_digit(input)) {
  700. dbgln_if(CSS_TOKENIZER_TRACE, "is digit");
  701. reconsume_current_input_code_point();
  702. return consume_a_numeric_token();
  703. }
  704. if (is_name_start_code_point(input)) {
  705. dbgln_if(CSS_TOKENIZER_TRACE, "is name start");
  706. reconsume_current_input_code_point();
  707. return consume_an_ident_like_token();
  708. }
  709. dbgln_if(CSS_TOKENIZER_TRACE, "is delimiter");
  710. return create_value_token(Token::TokenType::Delim, input);
  711. }
  712. }