Tokenizer.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/SourceLocation.h>
  8. #include <AK/Vector.h>
  9. #include <LibTextCodec/Decoder.h>
  10. #include <LibWeb/CSS/Parser/Tokenizer.h>
  11. #define CSS_TOKENIZER_TRACE 0
  12. //U+FFFD REPLACEMENT CHARACTER (�)
  13. #define REPLACEMENT_CHARACTER 0xFFFD
  14. static const u32 TOKENIZER_EOF = 0xFFFFFFFF;
  15. static inline void log_parse_error(const SourceLocation& location = SourceLocation::current())
  16. {
  17. dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location);
  18. }
  19. static inline bool is_eof(u32 code_point)
  20. {
  21. return code_point == TOKENIZER_EOF;
  22. }
  23. static inline bool is_quotation_mark(u32 code_point)
  24. {
  25. return code_point == 0x22;
  26. }
  27. static inline bool is_greater_than_maximum_allowed_code_point(u32 code_point)
  28. {
  29. return code_point > 0x10FFFF;
  30. }
  31. static inline bool is_low_line(u32 code_point)
  32. {
  33. return code_point == 0x5F;
  34. }
  35. static inline bool is_name_start_code_point(u32 code_point)
  36. {
  37. return is_ascii_alpha(code_point) || !is_ascii(code_point) || is_low_line(code_point);
  38. }
  39. static inline bool is_hyphen_minus(u32 code_point)
  40. {
  41. return code_point == 0x2D;
  42. }
  43. static inline bool is_name_code_point(u32 code_point)
  44. {
  45. return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
  46. }
  47. static inline bool is_non_printable(u32 code_point)
  48. {
  49. return code_point <= 0x8 || code_point == 0xB || (code_point >= 0xE && code_point <= 0x1F) || code_point == 0x7F;
  50. }
  51. static inline bool is_number_sign(u32 code_point)
  52. {
  53. return code_point == 0x23;
  54. }
  55. static inline bool is_reverse_solidus(u32 code_point)
  56. {
  57. return code_point == 0x5C;
  58. }
  59. static inline bool is_apostrophe(u32 code_point)
  60. {
  61. return code_point == 0x27;
  62. }
  63. static inline bool is_left_paren(u32 code_point)
  64. {
  65. return code_point == 0x28;
  66. }
  67. static inline bool is_right_paren(u32 code_point)
  68. {
  69. return code_point == 0x29;
  70. }
  71. static inline bool is_plus_sign(u32 code_point)
  72. {
  73. return code_point == 0x2B;
  74. }
  75. static inline bool is_comma(u32 code_point)
  76. {
  77. return code_point == 0x2C;
  78. }
  79. static inline bool is_full_stop(u32 code_point)
  80. {
  81. return code_point == 0x2E;
  82. }
  83. static inline bool is_newline(u32 code_point)
  84. {
  85. return code_point == 0xA;
  86. }
  87. static inline bool is_asterisk(u32 code_point)
  88. {
  89. return code_point == 0x2A;
  90. }
  91. static inline bool is_solidus(u32 code_point)
  92. {
  93. return code_point == 0x2F;
  94. }
  95. static inline bool is_colon(u32 code_point)
  96. {
  97. return code_point == 0x3A;
  98. }
  99. static inline bool is_semicolon(u32 code_point)
  100. {
  101. return code_point == 0x3B;
  102. }
  103. static inline bool is_less_than_sign(u32 code_point)
  104. {
  105. return code_point == 0x3C;
  106. }
  107. static inline bool is_greater_than_sign(u32 code_point)
  108. {
  109. return code_point == 0x3E;
  110. }
  111. static inline bool is_at(u32 code_point)
  112. {
  113. return code_point == 0x40;
  114. }
  115. static inline bool is_open_square_bracket(u32 code_point)
  116. {
  117. return code_point == 0x5B;
  118. }
  119. static inline bool is_closed_square_bracket(u32 code_point)
  120. {
  121. return code_point == 0x5D;
  122. }
  123. static inline bool is_open_curly_bracket(u32 code_point)
  124. {
  125. return code_point == 0x7B;
  126. }
  127. static inline bool is_closed_curly_bracket(u32 code_point)
  128. {
  129. return code_point == 0x7D;
  130. }
  131. static inline bool is_whitespace(u32 code_point)
  132. {
  133. return code_point == 0x9 || code_point == 0xA || code_point == 0x20;
  134. }
  135. static inline bool is_percent(u32 code_point)
  136. {
  137. return code_point == 0x25;
  138. }
  139. static inline bool is_exclamation_mark(u32 code_point)
  140. {
  141. return code_point == 0x21;
  142. }
  143. static inline bool is_e(u32 code_point)
  144. {
  145. return code_point == 0x65;
  146. }
  147. static inline bool is_E(u32 code_point)
  148. {
  149. return code_point == 0x45;
  150. }
  151. namespace Web::CSS {
  152. Tokenizer::Tokenizer(const StringView& input, const String& encoding)
  153. {
  154. auto* decoder = TextCodec::decoder_for(encoding);
  155. VERIFY(decoder);
  156. // FIXME: preprocess the stream
  157. // https://www.w3.org/TR/css-syntax-3/#input-preprocessing
  158. m_decoded_input = decoder->to_utf8(input);
  159. m_utf8_view = Utf8View(m_decoded_input);
  160. m_utf8_iterator = m_utf8_view.begin();
  161. }
  162. Vector<Token> Tokenizer::parse()
  163. {
  164. Vector<Token> tokens;
  165. for (;;) {
  166. auto token = consume_a_token();
  167. tokens.append(token);
  168. if (token.is(Token::Type::EndOfFile)) {
  169. return tokens;
  170. }
  171. }
  172. }
  173. u32 Tokenizer::next_code_point()
  174. {
  175. if (m_utf8_iterator == m_utf8_view.end())
  176. return TOKENIZER_EOF;
  177. m_prev_utf8_iterator = m_utf8_iterator;
  178. ++m_utf8_iterator;
  179. dbgln_if(CSS_TOKENIZER_TRACE, "(Tokenizer) Next code_point: {:c}", (char)*m_prev_utf8_iterator);
  180. return *m_prev_utf8_iterator;
  181. }
  182. u32 Tokenizer::peek_code_point(size_t offset) const
  183. {
  184. auto it = m_utf8_iterator;
  185. for (size_t i = 0; i < offset && it != m_utf8_view.end(); ++i)
  186. ++it;
  187. if (it == m_utf8_view.end())
  188. return TOKENIZER_EOF;
  189. return *it;
  190. }
  191. U32Twin Tokenizer::peek_twin() const
  192. {
  193. U32Twin values;
  194. auto it = m_utf8_iterator;
  195. for (size_t i = 0; i < 2 && it != m_utf8_view.end(); ++i) {
  196. if (it == m_utf8_view.end())
  197. values.set(i, TOKENIZER_EOF);
  198. else
  199. values.set(i, *it);
  200. ++it;
  201. }
  202. return values;
  203. }
  204. U32Triplet Tokenizer::peek_triplet() const
  205. {
  206. U32Triplet values;
  207. auto it = m_utf8_iterator;
  208. for (size_t i = 0; i < 3 && it != m_utf8_view.end(); ++i) {
  209. if (it == m_utf8_view.end())
  210. values.set(i, TOKENIZER_EOF);
  211. else
  212. values.set(i, *it);
  213. ++it;
  214. }
  215. return values;
  216. }
  217. Token Tokenizer::create_new_token(Token::Type type)
  218. {
  219. Token token = {};
  220. token.m_type = type;
  221. return token;
  222. }
  223. Token Tokenizer::create_eof_token()
  224. {
  225. return create_new_token(Token::Type::EndOfFile);
  226. }
  227. Token Tokenizer::create_value_token(Token::Type type, String value)
  228. {
  229. Token token;
  230. token.m_type = type;
  231. token.m_value.append(move(value));
  232. return token;
  233. }
  234. Token Tokenizer::create_value_token(Token::Type type, u32 value)
  235. {
  236. Token token = {};
  237. token.m_type = type;
  238. token.m_value.append_code_point(value);
  239. return token;
  240. }
  241. u32 Tokenizer::consume_escaped_code_point()
  242. {
  243. auto input = next_code_point();
  244. if (is_eof(input)) {
  245. log_parse_error();
  246. return REPLACEMENT_CHARACTER;
  247. }
  248. if (is_ascii_hex_digit(input)) {
  249. StringBuilder builder;
  250. builder.append_code_point(input);
  251. size_t counter = 0;
  252. while (is_ascii_hex_digit(peek_code_point()) && counter++ < 5) {
  253. builder.append_code_point(next_code_point());
  254. }
  255. if (is_whitespace(peek_code_point())) {
  256. (void)next_code_point();
  257. }
  258. auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16);
  259. if (unhexed == 0 || is_unicode_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
  260. return REPLACEMENT_CHARACTER;
  261. }
  262. return unhexed;
  263. }
  264. if (!input) {
  265. log_parse_error();
  266. return REPLACEMENT_CHARACTER;
  267. }
  268. return input;
  269. }
  270. Token Tokenizer::consume_an_ident_like_token()
  271. {
  272. auto string = consume_a_name();
  273. if (string.equals_ignoring_case("url") && is_left_paren(peek_code_point())) {
  274. (void)next_code_point();
  275. for (;;) {
  276. auto maybe_whitespace = peek_twin();
  277. if (!(is_whitespace(maybe_whitespace.first) && is_whitespace(maybe_whitespace.second))) {
  278. break;
  279. }
  280. (void)next_code_point();
  281. }
  282. auto next_two = peek_twin();
  283. // if one of these ", ', ' "', " '"
  284. if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) {
  285. return create_value_token(Token::Type::Function, string);
  286. }
  287. return consume_a_url_token();
  288. }
  289. if (is_left_paren(peek_code_point())) {
  290. (void)next_code_point();
  291. return create_value_token(Token::Type::Function, string);
  292. }
  293. return create_value_token(Token::Type::Ident, string);
  294. }
  295. CSSNumber Tokenizer::consume_a_number()
  296. {
  297. StringBuilder repr;
  298. Token::NumberType type = Token::NumberType::Integer;
  299. auto next_input = peek_code_point();
  300. if (is_plus_sign(next_input) || is_hyphen_minus(next_input)) {
  301. repr.append_code_point(next_code_point());
  302. }
  303. for (;;) {
  304. auto digits = peek_code_point();
  305. if (!is_ascii_digit(digits))
  306. break;
  307. repr.append_code_point(next_code_point());
  308. }
  309. auto maybe_number = peek_twin();
  310. if (is_full_stop(maybe_number.first) && is_ascii_digit(maybe_number.second)) {
  311. repr.append_code_point(next_code_point());
  312. repr.append_code_point(next_code_point());
  313. type = Token::NumberType::Number;
  314. for (;;) {
  315. auto digit = peek_code_point();
  316. if (!is_ascii_digit(digit))
  317. break;
  318. repr.append_code_point(next_code_point());
  319. }
  320. }
  321. auto maybe_exp = peek_triplet();
  322. if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) {
  323. if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) {
  324. if (is_ascii_digit(maybe_exp.third)) {
  325. repr.append_code_point(next_code_point());
  326. repr.append_code_point(next_code_point());
  327. repr.append_code_point(next_code_point());
  328. }
  329. } else if (is_ascii_digit(maybe_exp.second)) {
  330. repr.append_code_point(next_code_point());
  331. repr.append_code_point(next_code_point());
  332. }
  333. type = Token::NumberType::Number;
  334. for (;;) {
  335. auto digits = peek_code_point();
  336. if (!is_ascii_digit(digits))
  337. break;
  338. repr.append_code_point(next_code_point());
  339. }
  340. }
  341. return { repr.to_string(), type };
  342. }
  343. String Tokenizer::consume_a_name()
  344. {
  345. StringBuilder result;
  346. for (;;) {
  347. auto input = next_code_point();
  348. if (is_eof(input))
  349. break;
  350. if (is_name_code_point(input)) {
  351. result.append_code_point(input);
  352. continue;
  353. }
  354. auto next = peek_code_point();
  355. if (!is_eof(next) && is_valid_escape_sequence({ input, next })) {
  356. result.append_code_point(consume_escaped_code_point());
  357. continue;
  358. }
  359. reconsume_current_input_code_point();
  360. break;
  361. }
  362. return result.to_string();
  363. }
  364. Token Tokenizer::consume_a_url_token()
  365. {
  366. auto token = create_new_token(Token::Type::Url);
  367. for (;;) {
  368. if (!is_whitespace(peek_code_point())) {
  369. break;
  370. }
  371. (void)next_code_point();
  372. }
  373. for (;;) {
  374. auto input = peek_code_point();
  375. if (is_eof(input)) {
  376. log_parse_error();
  377. return token;
  378. }
  379. if (is_right_paren(input)) {
  380. (void)next_code_point();
  381. return token;
  382. }
  383. if (is_whitespace(input)) {
  384. for (;;) {
  385. if (!is_whitespace(peek_code_point())) {
  386. break;
  387. }
  388. input = next_code_point();
  389. }
  390. if (is_eof(input)) {
  391. log_parse_error();
  392. return token;
  393. }
  394. if (is_right_paren(input)) {
  395. return token;
  396. }
  397. consume_the_remnants_of_a_bad_url();
  398. return create_new_token(Token::Type::BadUrl);
  399. }
  400. if (is_quotation_mark(input) || is_apostrophe(input) || is_left_paren(input) || is_non_printable(input)) {
  401. log_parse_error();
  402. (void)next_code_point();
  403. consume_the_remnants_of_a_bad_url();
  404. return create_new_token(Token::Type::BadUrl);
  405. }
  406. if (is_reverse_solidus(input)) {
  407. if (is_valid_escape_sequence(peek_twin())) {
  408. token.m_value.append_code_point(consume_escaped_code_point());
  409. } else {
  410. log_parse_error();
  411. (void)next_code_point();
  412. consume_the_remnants_of_a_bad_url();
  413. return create_new_token(Token::Type::BadUrl);
  414. }
  415. }
  416. token.m_value.append_code_point(next_code_point());
  417. }
  418. }
  419. void Tokenizer::consume_the_remnants_of_a_bad_url()
  420. {
  421. for (;;) {
  422. auto next = peek_code_point();
  423. if (is_eof(next)) {
  424. return;
  425. }
  426. auto input = next;
  427. if (is_right_paren(input)) {
  428. (void)next_code_point();
  429. return;
  430. }
  431. if (is_valid_escape_sequence(peek_twin())) {
  432. [[maybe_unused]] auto cp = consume_escaped_code_point();
  433. }
  434. (void)next_code_point();
  435. }
  436. }
  437. void Tokenizer::reconsume_current_input_code_point()
  438. {
  439. m_utf8_iterator = m_prev_utf8_iterator;
  440. }
  441. Token Tokenizer::consume_a_numeric_token()
  442. {
  443. auto number = consume_a_number();
  444. if (would_start_an_identifier()) {
  445. auto token = create_new_token(Token::Type::Dimension);
  446. token.m_value.append(number.value);
  447. token.m_number_type = number.type;
  448. auto unit = consume_a_name();
  449. token.m_unit.append(unit);
  450. return token;
  451. }
  452. if (is_percent(peek_code_point())) {
  453. (void)next_code_point();
  454. auto token = create_new_token(Token::Type::Percentage);
  455. token.m_value.append(number.value);
  456. return token;
  457. }
  458. auto token = create_new_token(Token::Type::Number);
  459. token.m_value.append(number.value);
  460. token.m_number_type = number.type;
  461. return token;
  462. }
  463. bool Tokenizer::starts_with_a_number() const
  464. {
  465. return starts_with_a_number(peek_triplet());
  466. }
  467. bool Tokenizer::starts_with_a_number(U32Triplet values)
  468. {
  469. if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) {
  470. if (is_ascii_digit(values.second))
  471. return true;
  472. if (is_full_stop(values.second) && is_ascii_digit(values.third))
  473. return true;
  474. return false;
  475. }
  476. if (is_full_stop(values.first))
  477. return is_ascii_digit(values.second);
  478. if (is_ascii_digit(values.first))
  479. return true;
  480. return false;
  481. }
  482. bool Tokenizer::is_valid_escape_sequence(U32Twin values)
  483. {
  484. if (!is_reverse_solidus(values.first)) {
  485. return false;
  486. }
  487. if (is_newline(values.second)) {
  488. return false;
  489. }
  490. return true;
  491. }
  492. bool Tokenizer::would_start_an_identifier()
  493. {
  494. return would_start_an_identifier(peek_triplet());
  495. }
  496. bool Tokenizer::would_start_an_identifier(U32Triplet values)
  497. {
  498. if (is_hyphen_minus(values.first)) {
  499. if (is_name_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23()))
  500. return true;
  501. return false;
  502. }
  503. if (is_name_start_code_point(values.first)) {
  504. return true;
  505. }
  506. if (is_reverse_solidus(values.first)) {
  507. if (is_valid_escape_sequence(values.to_twin_12()))
  508. return true;
  509. return false;
  510. }
  511. return false;
  512. }
  513. Token Tokenizer::consume_string_token(u32 ending_code_point)
  514. {
  515. auto token = create_new_token(Token::Type::String);
  516. for (;;) {
  517. auto input = next_code_point();
  518. if (is_eof(input)) {
  519. log_parse_error();
  520. return token;
  521. }
  522. if (input == ending_code_point)
  523. return token;
  524. if (is_newline(input)) {
  525. reconsume_current_input_code_point();
  526. return create_new_token(Token::Type::BadString);
  527. }
  528. if (is_reverse_solidus(input)) {
  529. auto next_input = peek_code_point();
  530. if (is_eof(next_input))
  531. continue;
  532. if (is_newline(next_input)) {
  533. (void)next_code_point();
  534. continue;
  535. }
  536. auto escaped = consume_escaped_code_point();
  537. token.m_value.append_code_point(escaped);
  538. }
  539. token.m_value.append_code_point(input);
  540. }
  541. }
  542. void Tokenizer::consume_comments()
  543. {
  544. start:
  545. auto twin = peek_twin();
  546. if (!(is_solidus(twin.first) && is_asterisk(twin.second)))
  547. return;
  548. (void)next_code_point();
  549. (void)next_code_point();
  550. for (;;) {
  551. auto twin_inner = peek_twin();
  552. if (is_eof(twin_inner.first) || is_eof(twin_inner.second)) {
  553. log_parse_error();
  554. return;
  555. }
  556. if (is_asterisk(twin_inner.first) && is_solidus(twin_inner.second)) {
  557. (void)next_code_point();
  558. (void)next_code_point();
  559. goto start;
  560. }
  561. (void)next_code_point();
  562. }
  563. }
  564. Token Tokenizer::consume_a_token()
  565. {
  566. consume_comments();
  567. auto input = next_code_point();
  568. if (is_eof(input)) {
  569. return create_new_token(Token::Type::EndOfFile);
  570. }
  571. if (is_whitespace(input)) {
  572. dbgln_if(CSS_TOKENIZER_TRACE, "is whitespace");
  573. auto next = peek_code_point();
  574. while (is_whitespace(next)) {
  575. (void)next_code_point();
  576. next = peek_code_point();
  577. }
  578. return create_new_token(Token::Type::Whitespace);
  579. }
  580. if (is_quotation_mark(input)) {
  581. dbgln_if(CSS_TOKENIZER_TRACE, "is quotation mark");
  582. return consume_string_token(input);
  583. }
  584. if (is_number_sign(input)) {
  585. dbgln_if(CSS_TOKENIZER_TRACE, "is number sign");
  586. auto next_input = peek_code_point();
  587. auto maybe_escape = peek_twin();
  588. if (is_name_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
  589. auto token = create_new_token(Token::Type::Hash);
  590. if (would_start_an_identifier())
  591. token.m_hash_type = Token::HashType::Id;
  592. auto name = consume_a_name();
  593. token.m_value.append(name);
  594. return token;
  595. }
  596. return create_value_token(Token::Type::Delim, input);
  597. }
  598. if (is_apostrophe(input)) {
  599. dbgln_if(CSS_TOKENIZER_TRACE, "is apostrophe");
  600. return consume_string_token(input);
  601. }
  602. if (is_left_paren(input)) {
  603. dbgln_if(CSS_TOKENIZER_TRACE, "is left paren");
  604. return create_new_token(Token::Type::OpenParen);
  605. }
  606. if (is_right_paren(input)) {
  607. dbgln_if(CSS_TOKENIZER_TRACE, "is right paren");
  608. return create_new_token(Token::Type::CloseParen);
  609. }
  610. if (is_plus_sign(input)) {
  611. dbgln_if(CSS_TOKENIZER_TRACE, "is plus sign");
  612. if (starts_with_a_number()) {
  613. reconsume_current_input_code_point();
  614. return consume_a_numeric_token();
  615. }
  616. return create_value_token(Token::Type::Delim, input);
  617. }
  618. if (is_comma(input)) {
  619. dbgln_if(CSS_TOKENIZER_TRACE, "is comma");
  620. return create_new_token(Token::Type::Comma);
  621. }
  622. if (is_hyphen_minus(input)) {
  623. dbgln_if(CSS_TOKENIZER_TRACE, "is hyphen minus");
  624. if (starts_with_a_number()) {
  625. reconsume_current_input_code_point();
  626. return consume_a_numeric_token();
  627. }
  628. auto next_twin = peek_twin();
  629. if (is_hyphen_minus(next_twin.first) && is_greater_than_sign(next_twin.second)) {
  630. (void)next_code_point();
  631. (void)next_code_point();
  632. return create_new_token(Token::Type::CDC);
  633. }
  634. if (would_start_an_identifier()) {
  635. reconsume_current_input_code_point();
  636. return consume_an_ident_like_token();
  637. }
  638. return create_value_token(Token::Type::Delim, input);
  639. }
  640. if (is_full_stop(input)) {
  641. dbgln_if(CSS_TOKENIZER_TRACE, "is full stop");
  642. if (starts_with_a_number()) {
  643. reconsume_current_input_code_point();
  644. return consume_a_numeric_token();
  645. }
  646. return create_value_token(Token::Type::Delim, input);
  647. }
  648. if (is_colon(input)) {
  649. dbgln_if(CSS_TOKENIZER_TRACE, "is colon");
  650. return create_new_token(Token::Type::Colon);
  651. }
  652. if (is_semicolon(input)) {
  653. dbgln_if(CSS_TOKENIZER_TRACE, "is semicolon");
  654. return create_new_token(Token::Type::Semicolon);
  655. }
  656. if (is_less_than_sign(input)) {
  657. dbgln_if(CSS_TOKENIZER_TRACE, "is less than");
  658. auto maybe_cdo = peek_triplet();
  659. if (is_exclamation_mark(maybe_cdo.first) && is_hyphen_minus(maybe_cdo.second) && is_hyphen_minus(maybe_cdo.third)) {
  660. (void)next_code_point();
  661. (void)next_code_point();
  662. (void)next_code_point();
  663. return create_new_token(Token::Type::CDO);
  664. }
  665. return create_value_token(Token::Type::Delim, input);
  666. }
  667. if (is_at(input)) {
  668. dbgln_if(CSS_TOKENIZER_TRACE, "is at");
  669. if (would_start_an_identifier()) {
  670. auto name = consume_a_name();
  671. return create_value_token(Token::Type::AtKeyword, input);
  672. }
  673. return create_value_token(Token::Type::Delim, input);
  674. }
  675. if (is_open_square_bracket(input)) {
  676. dbgln_if(CSS_TOKENIZER_TRACE, "is open square");
  677. return create_new_token(Token::Type::OpenSquare);
  678. }
  679. if (is_reverse_solidus(input)) {
  680. dbgln_if(CSS_TOKENIZER_TRACE, "is reverse solidus");
  681. if (is_valid_escape_sequence({ input, peek_code_point() })) {
  682. reconsume_current_input_code_point();
  683. return consume_an_ident_like_token();
  684. }
  685. log_parse_error();
  686. return create_value_token(Token::Type::Delim, input);
  687. }
  688. if (is_closed_square_bracket(input)) {
  689. dbgln_if(CSS_TOKENIZER_TRACE, "is closed square");
  690. return create_new_token(Token::Type::CloseSquare);
  691. }
  692. if (is_open_curly_bracket(input)) {
  693. dbgln_if(CSS_TOKENIZER_TRACE, "is open curly");
  694. return create_new_token(Token::Type::OpenCurly);
  695. }
  696. if (is_closed_curly_bracket(input)) {
  697. dbgln_if(CSS_TOKENIZER_TRACE, "is closed curly");
  698. return create_new_token(Token::Type::CloseCurly);
  699. }
  700. if (is_ascii_digit(input)) {
  701. dbgln_if(CSS_TOKENIZER_TRACE, "is digit");
  702. reconsume_current_input_code_point();
  703. return consume_a_numeric_token();
  704. }
  705. if (is_name_start_code_point(input)) {
  706. dbgln_if(CSS_TOKENIZER_TRACE, "is name start");
  707. reconsume_current_input_code_point();
  708. return consume_an_ident_like_token();
  709. }
  710. dbgln_if(CSS_TOKENIZER_TRACE, "is delimiter");
  711. return create_value_token(Token::Type::Delim, input);
  712. }
  713. }