Tokenizer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/SourceLocation.h>
  7. #include <AK/Vector.h>
  8. #include <LibTextCodec/Decoder.h>
  9. #include <LibWeb/CSS/Parser/Tokenizer.h>
  10. #include <ctype.h>
  11. #define CSS_TOKENIZER_TRACE 0
  12. //U+FFFD REPLACEMENT CHARACTER (�)
  13. #define REPLACEMENT_CHARACTER 0xFFFD
  14. static inline void log_parse_error(const SourceLocation& location = SourceLocation::current())
  15. {
  16. dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location);
  17. }
  18. static inline bool is_surrogate(u32 code_point)
  19. {
  20. return (code_point & 0xfffff800) == 0xd800;
  21. }
  22. static inline bool is_quotation_mark(u32 code_point)
  23. {
  24. return code_point == 0x22;
  25. }
  26. static inline bool is_greater_than_maximum_allowed_code_point(u32 code_point)
  27. {
  28. return code_point > 0x10FFFF;
  29. }
  30. static inline bool is_hex_digit(u32 code_point)
  31. {
  32. return isxdigit(code_point);
  33. }
  34. static inline bool is_low_line(u32 code_point)
  35. {
  36. return code_point == 0x5F;
  37. }
  38. static inline bool is_non_ascii(u32 code_point)
  39. {
  40. return code_point >= 0x80;
  41. }
  42. static inline bool is_name_start_code_point(u32 code_point)
  43. {
  44. return isalpha(code_point) || is_non_ascii(code_point) || is_low_line(code_point);
  45. }
  46. static inline bool is_hyphen_minus(u32 code_point)
  47. {
  48. return code_point == 0x2D;
  49. }
  50. static inline bool is_name_code_point(u32 code_point)
  51. {
  52. return is_name_start_code_point(code_point) || isdigit(code_point) || is_hyphen_minus(code_point);
  53. }
  54. static inline bool is_non_printable(u32 code_point)
  55. {
  56. return code_point <= 0x8 || code_point == 0xB || (code_point >= 0xE && code_point <= 0x1F) || code_point == 0x7F;
  57. }
  58. static inline bool is_number_sign(u32 code_point)
  59. {
  60. return code_point == 0x23;
  61. }
  62. static inline bool is_reverse_solidus(u32 code_point)
  63. {
  64. return code_point == 0x5C;
  65. }
  66. static inline bool is_apostrophe(u32 code_point)
  67. {
  68. return code_point == 0x27;
  69. }
  70. static inline bool is_left_paren(u32 code_point)
  71. {
  72. return code_point == 0x28;
  73. }
  74. static inline bool is_right_paren(u32 code_point)
  75. {
  76. return code_point == 0x29;
  77. }
  78. static inline bool is_plus_sign(u32 code_point)
  79. {
  80. return code_point == 0x2B;
  81. }
  82. static inline bool is_comma(u32 code_point)
  83. {
  84. return code_point == 0x2C;
  85. }
  86. static inline bool is_full_stop(u32 code_point)
  87. {
  88. return code_point == 0x2E;
  89. }
  90. static inline bool is_newline(u32 code_point)
  91. {
  92. return code_point == 0xA;
  93. }
  94. static inline bool is_asterisk(u32 code_point)
  95. {
  96. return code_point == 0x2A;
  97. }
  98. static inline bool is_solidus(u32 code_point)
  99. {
  100. return code_point == 0x2F;
  101. }
  102. static inline bool is_colon(u32 code_point)
  103. {
  104. return code_point == 0x3A;
  105. }
  106. static inline bool is_semicolon(u32 code_point)
  107. {
  108. return code_point == 0x3B;
  109. }
  110. static inline bool is_less_than_sign(u32 code_point)
  111. {
  112. return code_point == 0x3C;
  113. }
  114. static inline bool is_greater_than_sign(u32 code_point)
  115. {
  116. return code_point == 0x3E;
  117. }
  118. static inline bool is_at(u32 code_point)
  119. {
  120. return code_point == 0x40;
  121. }
  122. static inline bool is_open_square_bracket(u32 code_point)
  123. {
  124. return code_point == 0x5B;
  125. }
  126. static inline bool is_closed_square_bracket(u32 code_point)
  127. {
  128. return code_point == 0x5D;
  129. }
  130. static inline bool is_open_curly_bracket(u32 code_point)
  131. {
  132. return code_point == 0x7B;
  133. }
  134. static inline bool is_closed_curly_bracket(u32 code_point)
  135. {
  136. return code_point == 0x7D;
  137. }
  138. static inline bool is_whitespace(u32 code_point)
  139. {
  140. return code_point == 0x9 || code_point == 0xA || code_point == 0x20;
  141. }
  142. static inline bool is_percent(u32 code_point)
  143. {
  144. return code_point == 0x25;
  145. }
  146. static inline bool is_exclamation_mark(u32 code_point)
  147. {
  148. return code_point == 0x21;
  149. }
  150. static inline bool is_e(u32 code_point)
  151. {
  152. return code_point == 0x65;
  153. }
  154. static inline bool is_E(u32 code_point)
  155. {
  156. return code_point == 0x45;
  157. }
  158. namespace Web::CSS {
  159. Tokenizer::Tokenizer(const StringView& input, const String& encoding)
  160. {
  161. auto* decoder = TextCodec::decoder_for(encoding);
  162. VERIFY(decoder);
  163. // FIXME: preprocess the stream
  164. // https://www.w3.org/TR/css-syntax-3/#input-preprocessing
  165. m_decoded_input = decoder->to_utf8(input);
  166. m_utf8_view = Utf8View(m_decoded_input);
  167. m_utf8_iterator = m_utf8_view.begin();
  168. }
  169. Vector<Token> Tokenizer::parse()
  170. {
  171. Vector<Token> tokens;
  172. for (;;) {
  173. auto token = consume_a_token();
  174. tokens.append(token);
  175. if (token.is_eof()) {
  176. return tokens;
  177. }
  178. }
  179. }
  180. Optional<u32> Tokenizer::next_code_point()
  181. {
  182. if (m_utf8_iterator == m_utf8_view.end())
  183. return {};
  184. m_prev_utf8_iterator = m_utf8_iterator;
  185. ++m_utf8_iterator;
  186. dbgln_if(CSS_TOKENIZER_TRACE, "(Tokenizer) Next code_point: {:c}", (char)*m_prev_utf8_iterator);
  187. return *m_prev_utf8_iterator;
  188. }
  189. Optional<u32> Tokenizer::peek_code_point(size_t offset) const
  190. {
  191. auto it = m_utf8_iterator;
  192. for (size_t i = 0; i < offset && it != m_utf8_view.end(); ++i)
  193. ++it;
  194. if (it == m_utf8_view.end())
  195. return {};
  196. return *it;
  197. }
  198. Optional<U32Twin> Tokenizer::peek_twin() const
  199. {
  200. U32Twin values;
  201. auto it = m_utf8_iterator;
  202. for (size_t i = 0; i < 2 && it != m_utf8_view.end(); ++i) {
  203. if (it == m_utf8_view.end())
  204. return {};
  205. values.set(i, *it);
  206. ++it;
  207. }
  208. return values;
  209. }
  210. Optional<U32Triplet> Tokenizer::peek_triplet() const
  211. {
  212. U32Triplet values;
  213. auto it = m_utf8_iterator;
  214. for (size_t i = 0; i < 3 && it != m_utf8_view.end(); ++i) {
  215. if (it == m_utf8_view.end())
  216. return {};
  217. values.set(i, *it);
  218. ++it;
  219. }
  220. return values;
  221. }
  222. Token Tokenizer::create_new_token(Token::TokenType type)
  223. {
  224. Token token = {};
  225. token.m_type = type;
  226. return token;
  227. }
  228. Token Tokenizer::create_value_token(Token::TokenType type, String value)
  229. {
  230. Token token;
  231. token.m_type = type;
  232. token.m_value.append(move(value));
  233. return token;
  234. }
  235. Token Tokenizer::create_value_token(Token::TokenType type, u32 value)
  236. {
  237. Token token = {};
  238. token.m_type = type;
  239. token.m_value.append_code_point(value);
  240. return token;
  241. }
  242. u32 Tokenizer::consume_escaped_code_point()
  243. {
  244. auto code_point = next_code_point();
  245. if (!code_point.has_value()) {
  246. log_parse_error();
  247. return REPLACEMENT_CHARACTER;
  248. }
  249. auto input = code_point.value();
  250. if (is_hex_digit(input)) {
  251. StringBuilder builder;
  252. builder.append_code_point(input);
  253. size_t counter = 0;
  254. while (is_hex_digit(peek_code_point().value()) && counter++ < 5) {
  255. builder.append_code_point(next_code_point().value());
  256. }
  257. if (is_whitespace(peek_code_point().value())) {
  258. (void)next_code_point();
  259. }
  260. auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16);
  261. if (unhexed == 0 || is_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
  262. return REPLACEMENT_CHARACTER;
  263. }
  264. return unhexed;
  265. }
  266. if (!input) {
  267. log_parse_error();
  268. return REPLACEMENT_CHARACTER;
  269. }
  270. return input;
  271. }
  272. Token Tokenizer::consume_an_ident_like_token()
  273. {
  274. auto string = consume_a_name();
  275. if (string.equals_ignoring_case("url") && is_left_paren(peek_code_point().value())) {
  276. (void)next_code_point();
  277. for (;;) {
  278. auto maybe_whitespace = peek_twin().value();
  279. if (!(is_whitespace(maybe_whitespace.first) && is_whitespace(maybe_whitespace.second))) {
  280. break;
  281. }
  282. (void)next_code_point();
  283. }
  284. auto next_two = peek_twin().value();
  285. // if one of these ", ', ' "', " '"
  286. if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) {
  287. return create_value_token(Token::TokenType::Function, string);
  288. }
  289. return consume_a_url_token();
  290. }
  291. if (is_left_paren(peek_code_point().value())) {
  292. (void)next_code_point();
  293. return create_value_token(Token::TokenType::Function, string);
  294. }
  295. return create_value_token(Token::TokenType::Ident, string);
  296. }
  297. CSSNumber Tokenizer::consume_a_number()
  298. {
  299. StringBuilder repr;
  300. Token::NumberType type = Token::NumberType::Integer;
  301. auto next_input = peek_code_point().value();
  302. if (is_plus_sign(next_input) || is_hyphen_minus(next_input)) {
  303. repr.append_code_point(next_code_point().value());
  304. }
  305. for (;;) {
  306. auto digits = peek_code_point().value();
  307. if (!isdigit(digits))
  308. break;
  309. repr.append_code_point(next_code_point().value());
  310. }
  311. auto maybe_number = peek_twin().value();
  312. if (is_full_stop(maybe_number.first) && isdigit(maybe_number.second)) {
  313. repr.append_code_point(next_code_point().value());
  314. repr.append_code_point(next_code_point().value());
  315. type = Token::NumberType::Number;
  316. for (;;) {
  317. auto digits = peek_code_point();
  318. if (digits.has_value() && !isdigit(digits.value()))
  319. break;
  320. repr.append_code_point(next_code_point().value());
  321. }
  322. }
  323. auto maybe_exp = peek_triplet().value();
  324. if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) {
  325. if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) {
  326. if (isdigit(maybe_exp.third)) {
  327. repr.append_code_point(next_code_point().value());
  328. repr.append_code_point(next_code_point().value());
  329. repr.append_code_point(next_code_point().value());
  330. }
  331. } else if (isdigit(maybe_exp.second)) {
  332. repr.append_code_point(next_code_point().value());
  333. repr.append_code_point(next_code_point().value());
  334. }
  335. type = Token::NumberType::Number;
  336. for (;;) {
  337. auto digits = peek_code_point().value();
  338. if (!isdigit(digits))
  339. break;
  340. repr.append_code_point(next_code_point().value());
  341. }
  342. }
  343. return { repr.to_string(), type };
  344. }
  345. String Tokenizer::consume_a_name()
  346. {
  347. StringBuilder result;
  348. for (;;) {
  349. auto input = next_code_point().value();
  350. if (is_name_code_point(input)) {
  351. result.append_code_point(input);
  352. continue;
  353. }
  354. auto next = peek_code_point();
  355. if (next.has_value() && is_valid_escape_sequence({ input, next.value() })) {
  356. result.append_code_point(consume_escaped_code_point());
  357. continue;
  358. }
  359. break;
  360. }
  361. reconsume_current_input_code_point();
  362. return result.to_string();
  363. }
  364. Token Tokenizer::consume_a_url_token()
  365. {
  366. auto token = create_new_token(Token::TokenType::Url);
  367. for (;;) {
  368. if (!is_whitespace(peek_code_point().value())) {
  369. break;
  370. }
  371. (void)next_code_point();
  372. }
  373. for (;;) {
  374. auto code_point = peek_code_point();
  375. if (!code_point.has_value()) {
  376. log_parse_error();
  377. return token;
  378. }
  379. auto input = code_point.value();
  380. if (is_right_paren(input)) {
  381. (void)next_code_point();
  382. return token;
  383. }
  384. if (is_whitespace(input)) {
  385. for (;;) {
  386. if (!is_whitespace(peek_code_point().value())) {
  387. break;
  388. }
  389. code_point = next_code_point();
  390. }
  391. if (!code_point.has_value()) {
  392. log_parse_error();
  393. return token;
  394. }
  395. input = code_point.value();
  396. if (is_right_paren(input)) {
  397. return token;
  398. }
  399. consume_the_remnants_of_a_bad_url();
  400. return create_new_token(Token::TokenType::BadUrl);
  401. }
  402. if (is_quotation_mark(input) || is_apostrophe(input) || is_left_paren(input) || is_non_printable(input)) {
  403. log_parse_error();
  404. (void)next_code_point();
  405. consume_the_remnants_of_a_bad_url();
  406. return create_new_token(Token::TokenType::BadUrl);
  407. }
  408. if (is_reverse_solidus(input)) {
  409. if (is_valid_escape_sequence()) {
  410. token.m_value.append_code_point(consume_escaped_code_point());
  411. } else {
  412. log_parse_error();
  413. (void)next_code_point();
  414. consume_the_remnants_of_a_bad_url();
  415. return create_new_token(Token::TokenType::BadUrl);
  416. }
  417. }
  418. token.m_value.append_code_point(next_code_point().value());
  419. }
  420. }
  421. void Tokenizer::consume_the_remnants_of_a_bad_url()
  422. {
  423. for (;;) {
  424. auto next = peek_code_point();
  425. if (!next.has_value()) {
  426. return;
  427. }
  428. auto input = next.value();
  429. if (is_right_paren(input)) {
  430. (void)next_code_point();
  431. return;
  432. }
  433. if (is_valid_escape_sequence()) {
  434. [[maybe_unused]] auto cp = consume_escaped_code_point();
  435. }
  436. (void)next_code_point();
  437. }
  438. }
  439. void Tokenizer::reconsume_current_input_code_point()
  440. {
  441. m_utf8_iterator = m_prev_utf8_iterator;
  442. }
  443. Token Tokenizer::consume_a_numeric_token()
  444. {
  445. auto number = consume_a_number();
  446. if (would_start_an_identifier()) {
  447. auto token = create_new_token(Token::TokenType::Dimension);
  448. token.m_value.append(number.value);
  449. token.m_number_type = number.type;
  450. auto unit = consume_a_name();
  451. token.m_unit.append(unit);
  452. return token;
  453. }
  454. if (is_percent(peek_code_point().value())) {
  455. (void)next_code_point();
  456. auto token = create_new_token(Token::TokenType::Percentage);
  457. token.m_value.append(number.value);
  458. return token;
  459. }
  460. auto token = create_new_token(Token::TokenType::Number);
  461. token.m_value.append(number.value);
  462. token.m_number_type = number.type;
  463. return token;
  464. }
  465. bool Tokenizer::starts_with_a_number() const
  466. {
  467. return starts_with_a_number(peek_triplet().value());
  468. }
  469. bool Tokenizer::starts_with_a_number(U32Triplet values)
  470. {
  471. if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) {
  472. if (isdigit(values.second))
  473. return true;
  474. if (is_full_stop(values.second) && isdigit(values.third))
  475. return true;
  476. return false;
  477. }
  478. if (is_full_stop(values.first))
  479. return isdigit(values.second);
  480. if (isdigit(values.first))
  481. return true;
  482. return false;
  483. }
  484. bool Tokenizer::is_valid_escape_sequence()
  485. {
  486. return is_valid_escape_sequence(peek_twin().value());
  487. }
  488. bool Tokenizer::is_valid_escape_sequence(U32Twin values)
  489. {
  490. if (!is_reverse_solidus(values.first)) {
  491. return false;
  492. }
  493. if (is_newline(values.second)) {
  494. return false;
  495. }
  496. return true;
  497. }
  498. bool Tokenizer::would_start_an_identifier()
  499. {
  500. return would_start_an_identifier(peek_triplet().value());
  501. }
  502. bool Tokenizer::would_start_an_identifier(U32Triplet values)
  503. {
  504. if (is_hyphen_minus(values.first)) {
  505. if (is_name_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23()))
  506. return true;
  507. return false;
  508. }
  509. if (is_name_start_code_point(values.first)) {
  510. return true;
  511. }
  512. if (is_reverse_solidus(values.first)) {
  513. if (is_valid_escape_sequence(values.to_twin_12()))
  514. return true;
  515. return false;
  516. }
  517. return false;
  518. }
  519. Token Tokenizer::consume_string_token(u32 ending_code_point)
  520. {
  521. auto token = create_new_token(Token::TokenType::String);
  522. for (;;) {
  523. auto code_point = next_code_point();
  524. if (!code_point.has_value()) {
  525. log_parse_error();
  526. return token;
  527. }
  528. auto input = code_point.value();
  529. if (input == ending_code_point)
  530. return token;
  531. if (is_newline(input)) {
  532. reconsume_current_input_code_point();
  533. return create_new_token(Token::TokenType::BadString);
  534. }
  535. if (is_reverse_solidus(input)) {
  536. auto next_input = peek_code_point();
  537. if (!next_input.has_value())
  538. continue;
  539. if (is_newline(next_input.value())) {
  540. (void)next_code_point();
  541. continue;
  542. }
  543. auto escaped = consume_escaped_code_point();
  544. token.m_value.append_code_point(escaped);
  545. }
  546. token.m_value.append_code_point(input);
  547. }
  548. }
  549. void Tokenizer::consume_comments()
  550. {
  551. start:
  552. auto peek = peek_twin();
  553. if (!peek.has_value()) {
  554. log_parse_error();
  555. return;
  556. }
  557. auto twin = peek.value();
  558. if (!(is_solidus(twin.first) && is_asterisk(twin.second)))
  559. return;
  560. (void)next_code_point();
  561. (void)next_code_point();
  562. for (;;) {
  563. auto peek_inner = peek_twin();
  564. if (!peek_inner.has_value()) {
  565. log_parse_error();
  566. return;
  567. }
  568. auto twin_inner = peek_inner.value();
  569. if (is_asterisk(twin_inner.first) && is_solidus(twin_inner.second)) {
  570. (void)next_code_point();
  571. (void)next_code_point();
  572. goto start;
  573. }
  574. (void)next_code_point();
  575. }
  576. }
  577. Token Tokenizer::consume_a_token()
  578. {
  579. consume_comments();
  580. auto code_point = next_code_point();
  581. if (!code_point.has_value()) {
  582. return create_new_token(Token::TokenType::EndOfFile);
  583. }
  584. auto input = code_point.value();
  585. if (is_whitespace(input)) {
  586. dbgln_if(CSS_TOKENIZER_TRACE, "is whitespace");
  587. while (is_whitespace(peek_code_point().value()))
  588. (void)next_code_point();
  589. return create_new_token(Token::TokenType::Whitespace);
  590. }
  591. if (is_quotation_mark(input)) {
  592. dbgln_if(CSS_TOKENIZER_TRACE, "is quotation mark");
  593. return consume_string_token(input);
  594. }
  595. if (is_number_sign(input)) {
  596. dbgln_if(CSS_TOKENIZER_TRACE, "is number sign");
  597. auto next_input = peek_code_point().value();
  598. auto maybe_escape = peek_twin().value();
  599. if (is_name_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) {
  600. auto token = create_new_token(Token::TokenType::Hash);
  601. if (would_start_an_identifier())
  602. token.m_hash_type = Token::HashType::Id;
  603. auto name = consume_a_name();
  604. token.m_value.append(name);
  605. return token;
  606. }
  607. return create_value_token(Token::TokenType::Delim, input);
  608. }
  609. if (is_apostrophe(input)) {
  610. dbgln_if(CSS_TOKENIZER_TRACE, "is apostrophe");
  611. return consume_string_token(input);
  612. }
  613. if (is_left_paren(input)) {
  614. dbgln_if(CSS_TOKENIZER_TRACE, "is left paren");
  615. return create_new_token(Token::TokenType::OpenParen);
  616. }
  617. if (is_right_paren(input)) {
  618. dbgln_if(CSS_TOKENIZER_TRACE, "is right paren");
  619. return create_new_token(Token::TokenType::CloseParen);
  620. }
  621. if (is_plus_sign(input)) {
  622. dbgln_if(CSS_TOKENIZER_TRACE, "is plus sign");
  623. if (starts_with_a_number()) {
  624. reconsume_current_input_code_point();
  625. return consume_a_numeric_token();
  626. }
  627. return create_value_token(Token::TokenType::Delim, input);
  628. }
  629. if (is_comma(input)) {
  630. dbgln_if(CSS_TOKENIZER_TRACE, "is comma");
  631. return create_new_token(Token::TokenType::Comma);
  632. }
  633. if (is_hyphen_minus(input)) {
  634. dbgln_if(CSS_TOKENIZER_TRACE, "is hyphen minus");
  635. if (starts_with_a_number()) {
  636. reconsume_current_input_code_point();
  637. return consume_a_numeric_token();
  638. }
  639. auto next_twin = peek_twin().value();
  640. if (is_hyphen_minus(next_twin.first) && is_greater_than_sign(next_twin.second)) {
  641. (void)next_code_point();
  642. (void)next_code_point();
  643. return create_new_token(Token::TokenType::CDC);
  644. }
  645. if (would_start_an_identifier()) {
  646. reconsume_current_input_code_point();
  647. return consume_an_ident_like_token();
  648. }
  649. return create_value_token(Token::TokenType::Delim, input);
  650. }
  651. if (is_full_stop(input)) {
  652. dbgln_if(CSS_TOKENIZER_TRACE, "is full stop");
  653. if (starts_with_a_number()) {
  654. reconsume_current_input_code_point();
  655. return consume_a_numeric_token();
  656. }
  657. return create_value_token(Token::TokenType::Delim, input);
  658. }
  659. if (is_colon(input)) {
  660. dbgln_if(CSS_TOKENIZER_TRACE, "is colon");
  661. return create_new_token(Token::TokenType::Colon);
  662. }
  663. if (is_semicolon(input)) {
  664. dbgln_if(CSS_TOKENIZER_TRACE, "is semicolon");
  665. return create_new_token(Token::TokenType::Semicolon);
  666. }
  667. if (is_less_than_sign(input)) {
  668. dbgln_if(CSS_TOKENIZER_TRACE, "is less than");
  669. auto maybe_cdo = peek_triplet().value();
  670. if (is_exclamation_mark(maybe_cdo.first) && is_hyphen_minus(maybe_cdo.second) && is_hyphen_minus(maybe_cdo.third)) {
  671. (void)next_code_point();
  672. (void)next_code_point();
  673. (void)next_code_point();
  674. return create_new_token(Token::TokenType::CDO);
  675. }
  676. return create_value_token(Token::TokenType::Delim, input);
  677. }
  678. if (is_at(input)) {
  679. dbgln_if(CSS_TOKENIZER_TRACE, "is at");
  680. if (would_start_an_identifier()) {
  681. auto name = consume_a_name();
  682. return create_value_token(Token::TokenType::AtKeyword, input);
  683. }
  684. return create_value_token(Token::TokenType::Delim, input);
  685. }
  686. if (is_open_square_bracket(input)) {
  687. dbgln_if(CSS_TOKENIZER_TRACE, "is open square");
  688. return create_new_token(Token::TokenType::OpenSquare);
  689. }
  690. if (is_reverse_solidus(input)) {
  691. dbgln_if(CSS_TOKENIZER_TRACE, "is reverse solidus");
  692. if (is_valid_escape_sequence()) {
  693. reconsume_current_input_code_point();
  694. return consume_an_ident_like_token();
  695. }
  696. log_parse_error();
  697. return create_value_token(Token::TokenType::Delim, input);
  698. }
  699. if (is_closed_square_bracket(input)) {
  700. dbgln_if(CSS_TOKENIZER_TRACE, "is closed square");
  701. return create_new_token(Token::TokenType::CloseSquare);
  702. }
  703. if (is_open_curly_bracket(input)) {
  704. dbgln_if(CSS_TOKENIZER_TRACE, "is open curly");
  705. return create_new_token(Token::TokenType::OpenCurly);
  706. }
  707. if (is_closed_curly_bracket(input)) {
  708. dbgln_if(CSS_TOKENIZER_TRACE, "is closed curly");
  709. return create_new_token(Token::TokenType::CloseCurly);
  710. }
  711. if (isdigit(input)) {
  712. dbgln_if(CSS_TOKENIZER_TRACE, "is digit");
  713. reconsume_current_input_code_point();
  714. return consume_a_numeric_token();
  715. }
  716. if (is_name_start_code_point(input)) {
  717. dbgln_if(CSS_TOKENIZER_TRACE, "is name start");
  718. reconsume_current_input_code_point();
  719. return consume_an_ident_like_token();
  720. }
  721. dbgln_if(CSS_TOKENIZER_TRACE, "is delimiter");
  722. return create_value_token(Token::TokenType::Delim, input);
  723. }
  724. }