Lexer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2023, Volodymyr V. <vvmposeydon@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include "Lexer.h"
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Function.h>
  10. #include <AK/HashTable.h>
  11. #include <AK/StdLibExtras.h>
  12. #include <AK/String.h>
  13. namespace GLSL {
  14. Lexer::Lexer(StringView input, size_t start_line)
  15. : m_input(input)
  16. , m_previous_position { start_line, 0 }
  17. , m_position { start_line, 0 }
  18. {
  19. }
  20. char Lexer::peek(size_t offset) const
  21. {
  22. if ((m_index + offset) >= m_input.length())
  23. return 0;
  24. return m_input[m_index + offset];
  25. }
  26. char Lexer::consume()
  27. {
  28. VERIFY(m_index < m_input.length());
  29. char ch = m_input[m_index++];
  30. m_previous_position = m_position;
  31. if (ch == '\n') {
  32. m_position.line++;
  33. m_position.column = 0;
  34. } else {
  35. m_position.column++;
  36. }
  37. return ch;
  38. }
  39. constexpr bool is_valid_first_character_of_identifier(char ch)
  40. {
  41. return is_ascii_alpha(ch) || ch == '_' || ch == '$';
  42. }
  43. constexpr bool is_valid_nonfirst_character_of_identifier(char ch)
  44. {
  45. return is_valid_first_character_of_identifier(ch) || is_ascii_digit(ch);
  46. }
  47. // NOTE: some of these keywords are not used at the moment, however they are reserved for future use and should not be used as identifiers
  48. constexpr Array<StringView, 66> s_known_keywords = {
  49. "asm"sv,
  50. "attribute"sv,
  51. "break"sv,
  52. "case"sv,
  53. "cast"sv,
  54. "centroid"sv,
  55. "class"sv,
  56. "common"
  57. "partition"sv,
  58. "active"sv,
  59. "const"sv,
  60. "continue"sv,
  61. "default"sv,
  62. "discard"sv,
  63. "do"sv,
  64. "else"sv,
  65. "enum"sv,
  66. "extern"sv,
  67. "external"sv,
  68. "false"sv,
  69. "filter"sv,
  70. "fixed"sv,
  71. "flat"sv,
  72. "for"sv,
  73. "goto"sv,
  74. "half"sv,
  75. "highp"sv,
  76. "if"sv,
  77. "in"sv,
  78. "inline"sv,
  79. "inout"sv,
  80. "input"sv,
  81. "interface"sv,
  82. "invariant"sv,
  83. "layout"sv,
  84. "lowp"sv,
  85. "mediump"sv,
  86. "namespace"sv,
  87. "noinline"sv,
  88. "noperspective"sv,
  89. "out"sv,
  90. "output"sv,
  91. "packed"sv,
  92. "patch"sv,
  93. "precision"sv,
  94. "public"sv,
  95. "return"sv,
  96. "row_major"sv,
  97. "sample"sv,
  98. "sizeof"sv,
  99. "smooth"sv,
  100. "static"sv,
  101. "struct"sv,
  102. "subroutine"sv,
  103. "superp"sv,
  104. "switch"sv,
  105. "template"sv,
  106. "this"sv,
  107. "true"sv,
  108. "typedef"sv,
  109. "uniform"sv,
  110. "union"sv,
  111. "using"sv,
  112. "varying"sv,
  113. "volatile"sv,
  114. "while"sv,
  115. };
  116. constexpr Array<StringView, 120> s_known_types = {
  117. "bool"sv,
  118. "bvec2"sv,
  119. "bvec3"sv,
  120. "bvec4"sv,
  121. "dmat2"sv,
  122. "dmat2x2"sv,
  123. "dmat2x3"sv,
  124. "dmat2x4"sv,
  125. "dmat3"sv,
  126. "dmat3x2"sv,
  127. "dmat3x3"sv,
  128. "dmat3x4"sv,
  129. "dmat4"sv,
  130. "dmat4x2"sv,
  131. "dmat4x3"sv,
  132. "dmat4x4"sv,
  133. "double"sv,
  134. "dvec2"sv,
  135. "dvec3"sv,
  136. "dvec4"sv,
  137. "float"sv,
  138. "fvec2"sv,
  139. "fvec3"sv,
  140. "fvec4"sv,
  141. "hvec2"sv,
  142. "hvec3"sv,
  143. "hvec4"sv,
  144. "iimage1D"sv,
  145. "iimage1DArray"sv,
  146. "iimage2D"sv,
  147. "iimage2DArray"sv,
  148. "iimage3D"sv,
  149. "iimageBuffer"sv,
  150. "iimageCube"sv,
  151. "image1D"sv,
  152. "image1DArray"sv,
  153. "image1DArrayShadow"sv,
  154. "image1DShadow"sv,
  155. "image2D"sv,
  156. "image2DArray"sv,
  157. "image2DArrayShadow"sv,
  158. "image2DShadow"sv,
  159. "image3D"sv,
  160. "imageBuffer"sv,
  161. "imageCube"sv,
  162. "int"sv,
  163. "isampler1D"sv,
  164. "isampler1DArray"sv,
  165. "isampler2D"sv,
  166. "isampler2DArray"sv,
  167. "isampler2DMS"sv,
  168. "isampler2DMSArray"sv,
  169. "isampler2DRect"sv,
  170. "isampler3D"sv,
  171. "isamplerBuffer"sv,
  172. "isamplerCube"sv,
  173. "isamplerCubeArray"sv,
  174. "ivec2"sv,
  175. "ivec3"sv,
  176. "ivec4"sv,
  177. "long"sv,
  178. "mat2"sv,
  179. "mat2x2"sv,
  180. "mat2x3"sv,
  181. "mat2x4"sv,
  182. "mat3"sv,
  183. "mat3x2"sv,
  184. "mat3x3"sv,
  185. "mat3x4"sv,
  186. "mat4"sv,
  187. "mat4x2"sv,
  188. "mat4x3"sv,
  189. "mat4x4"sv,
  190. "sampler1D"sv,
  191. "sampler1DArray"sv,
  192. "sampler1DArrayShadow"sv,
  193. "sampler1DShadow"sv,
  194. "sampler2D"sv,
  195. "sampler2DArray"sv,
  196. "sampler2DArrayShadow"sv,
  197. "sampler2DMS"sv,
  198. "sampler2DMSArray"sv,
  199. "sampler2DRect"sv,
  200. "sampler2DRectShadow"sv,
  201. "sampler2DShadow"sv,
  202. "sampler3D"sv,
  203. "sampler3DRect"sv,
  204. "samplerBuffer"sv,
  205. "samplerCube"sv,
  206. "samplerCubeArray"sv,
  207. "samplerCubeArrayShadow"sv,
  208. "samplerCubeShadow"sv,
  209. "short"sv,
  210. "uimage1D"sv,
  211. "uimage1DArray"sv,
  212. "uimage2D"sv,
  213. "uimage2DArray"sv,
  214. "uimage3D"sv,
  215. "uimageBuffer"sv,
  216. "uimageCube"sv,
  217. "uint"sv,
  218. "unsigned"sv,
  219. "usampler1D"sv,
  220. "usampler1DArray"sv,
  221. "usampler2D"sv,
  222. "usampler2DArray"sv,
  223. "usampler2DMS"sv,
  224. "usampler2DMSArray"sv,
  225. "usampler2DRect"sv,
  226. "usampler3D"sv,
  227. "usamplerBuffer"sv,
  228. "usamplerCube"sv,
  229. "usamplerCubeArray"sv,
  230. "uvec2"sv,
  231. "uvec3"sv,
  232. "uvec4"sv,
  233. "vec2"sv,
  234. "vec3"sv,
  235. "vec4"sv,
  236. "void"sv,
  237. };
  238. static bool is_keyword(StringView string)
  239. {
  240. return AK::find(s_known_keywords.begin(), s_known_keywords.end(), string) != s_known_keywords.end();
  241. }
  242. static bool is_known_type(StringView string)
  243. {
  244. return AK::find(s_known_types.begin(), s_known_types.end(), string) != s_known_types.end();
  245. }
  246. void Lexer::lex_impl(Function<void(Token)> callback)
  247. {
  248. size_t token_start_index = 0;
  249. Position token_start_position;
  250. auto emit_single_char_token = [&](auto type) {
  251. callback(Token(type, m_position, m_position, m_input.substring_view(m_index, 1)));
  252. consume();
  253. };
  254. auto begin_token = [&] {
  255. token_start_index = m_index;
  256. token_start_position = m_position;
  257. };
  258. auto commit_token = [&](auto type) {
  259. if (m_options.ignore_whitespace && type == Token::Type::Whitespace)
  260. return;
  261. callback(Token(type, token_start_position, m_previous_position, m_input.substring_view(token_start_index, m_index - token_start_index)));
  262. };
  263. auto emit_token_equals = [&](auto type, auto equals_type) {
  264. if (peek(1) == '=') {
  265. begin_token();
  266. consume();
  267. consume();
  268. commit_token(equals_type);
  269. return;
  270. }
  271. emit_single_char_token(type);
  272. };
  273. auto match_escape_sequence = [&]() -> size_t {
  274. switch (peek(1)) {
  275. case '\'':
  276. case '"':
  277. case '?':
  278. case '\\':
  279. case 'a':
  280. case 'b':
  281. case 'f':
  282. case 'n':
  283. case 'r':
  284. case 't':
  285. case 'v':
  286. return 2;
  287. case '0':
  288. case '1':
  289. case '2':
  290. case '3':
  291. case '4':
  292. case '5':
  293. case '6':
  294. case '7': {
  295. size_t octal_digits = 1;
  296. for (size_t i = 0; i < 2; ++i) {
  297. char next = peek(2 + i);
  298. if (next < '0' || next > '7')
  299. break;
  300. ++octal_digits;
  301. }
  302. return 1 + octal_digits;
  303. }
  304. case 'x': {
  305. size_t hex_digits = 0;
  306. while (is_ascii_hex_digit(peek(2 + hex_digits)))
  307. ++hex_digits;
  308. return 2 + hex_digits;
  309. }
  310. case 'u':
  311. case 'U': {
  312. bool is_unicode = true;
  313. size_t number_of_digits = peek(1) == 'u' ? 4 : 8;
  314. for (size_t i = 0; i < number_of_digits; ++i) {
  315. if (!is_ascii_hex_digit(peek(2 + i))) {
  316. is_unicode = false;
  317. break;
  318. }
  319. }
  320. return is_unicode ? 2 + number_of_digits : 0;
  321. }
  322. default:
  323. return 0;
  324. }
  325. };
  326. auto match_string_prefix = [&](char quote) -> size_t {
  327. if (peek() == quote)
  328. return 1;
  329. if (peek() == 'L' && peek(1) == quote)
  330. return 2;
  331. if (peek() == 'u') {
  332. if (peek(1) == quote)
  333. return 2;
  334. if (peek(1) == '8' && peek(2) == quote)
  335. return 3;
  336. }
  337. if (peek() == 'U' && peek(1) == quote)
  338. return 2;
  339. return 0;
  340. };
  341. while (m_index < m_input.length()) {
  342. auto ch = peek();
  343. if (is_ascii_space(ch)) {
  344. begin_token();
  345. while (is_ascii_space(peek()))
  346. consume();
  347. commit_token(Token::Type::Whitespace);
  348. continue;
  349. }
  350. if (ch == '(') {
  351. emit_single_char_token(Token::Type::LeftParen);
  352. continue;
  353. }
  354. if (ch == ')') {
  355. emit_single_char_token(Token::Type::RightParen);
  356. continue;
  357. }
  358. if (ch == '{') {
  359. emit_single_char_token(Token::Type::LeftCurly);
  360. continue;
  361. }
  362. if (ch == '}') {
  363. emit_single_char_token(Token::Type::RightCurly);
  364. continue;
  365. }
  366. if (ch == '[') {
  367. emit_single_char_token(Token::Type::LeftBracket);
  368. continue;
  369. }
  370. if (ch == ']') {
  371. emit_single_char_token(Token::Type::RightBracket);
  372. continue;
  373. }
  374. if (ch == '<') {
  375. begin_token();
  376. consume();
  377. if (peek() == '<') {
  378. consume();
  379. if (peek() == '=') {
  380. consume();
  381. commit_token(Token::Type::LessLessEquals);
  382. continue;
  383. }
  384. commit_token(Token::Type::LessLess);
  385. continue;
  386. }
  387. if (peek() == '=') {
  388. consume();
  389. commit_token(Token::Type::LessEquals);
  390. continue;
  391. }
  392. commit_token(Token::Type::Less);
  393. continue;
  394. }
  395. if (ch == '>') {
  396. begin_token();
  397. consume();
  398. if (peek() == '>') {
  399. consume();
  400. if (peek() == '=') {
  401. consume();
  402. commit_token(Token::Type::GreaterGreaterEquals);
  403. continue;
  404. }
  405. commit_token(Token::Type::GreaterGreater);
  406. continue;
  407. }
  408. if (peek() == '=') {
  409. consume();
  410. commit_token(Token::Type::GreaterEquals);
  411. continue;
  412. }
  413. commit_token(Token::Type::Greater);
  414. continue;
  415. }
  416. if (ch == ',') {
  417. emit_single_char_token(Token::Type::Comma);
  418. continue;
  419. }
  420. if (ch == '+') {
  421. begin_token();
  422. consume();
  423. if (peek() == '+') {
  424. consume();
  425. commit_token(Token::Type::PlusPlus);
  426. continue;
  427. }
  428. if (peek() == '=') {
  429. consume();
  430. commit_token(Token::Type::PlusEquals);
  431. continue;
  432. }
  433. commit_token(Token::Type::Plus);
  434. continue;
  435. }
  436. if (ch == '-') {
  437. begin_token();
  438. consume();
  439. if (peek() == '-') {
  440. consume();
  441. commit_token(Token::Type::MinusMinus);
  442. continue;
  443. }
  444. if (peek() == '=') {
  445. consume();
  446. commit_token(Token::Type::MinusEquals);
  447. continue;
  448. }
  449. commit_token(Token::Type::Minus);
  450. continue;
  451. }
  452. if (ch == '*') {
  453. emit_token_equals(Token::Type::Asterisk, Token::Type::AsteriskEquals);
  454. continue;
  455. }
  456. if (ch == '%') {
  457. emit_token_equals(Token::Type::Percent, Token::Type::PercentEquals);
  458. continue;
  459. }
  460. if (ch == '^') {
  461. begin_token();
  462. consume();
  463. if (peek() == '^') {
  464. consume();
  465. commit_token(Token::Type::CaretCaret);
  466. continue;
  467. }
  468. if (peek() == '=') {
  469. consume();
  470. commit_token(Token::Type::CaretEquals);
  471. continue;
  472. }
  473. commit_token(Token::Type::Caret);
  474. continue;
  475. }
  476. if (ch == '!') {
  477. emit_token_equals(Token::Type::ExclamationMark, Token::Type::ExclamationMarkEquals);
  478. continue;
  479. }
  480. if (ch == '=') {
  481. emit_token_equals(Token::Type::Equals, Token::Type::EqualsEquals);
  482. continue;
  483. }
  484. if (ch == '&') {
  485. begin_token();
  486. consume();
  487. if (peek() == '&') {
  488. consume();
  489. commit_token(Token::Type::AndAnd);
  490. continue;
  491. }
  492. if (peek() == '=') {
  493. consume();
  494. commit_token(Token::Type::AndEquals);
  495. continue;
  496. }
  497. commit_token(Token::Type::And);
  498. continue;
  499. }
  500. if (ch == '|') {
  501. begin_token();
  502. consume();
  503. if (peek() == '|') {
  504. consume();
  505. commit_token(Token::Type::PipePipe);
  506. continue;
  507. }
  508. if (peek() == '=') {
  509. consume();
  510. commit_token(Token::Type::PipeEquals);
  511. continue;
  512. }
  513. commit_token(Token::Type::Pipe);
  514. continue;
  515. }
  516. if (ch == '~') {
  517. emit_single_char_token(Token::Type::Tilde);
  518. continue;
  519. }
  520. if (ch == '?') {
  521. emit_single_char_token(Token::Type::QuestionMark);
  522. continue;
  523. }
  524. if (ch == ':') {
  525. emit_single_char_token(Token::Type::Colon);
  526. continue;
  527. }
  528. if (ch == ';') {
  529. emit_single_char_token(Token::Type::Semicolon);
  530. continue;
  531. }
  532. if (ch == '.') {
  533. emit_single_char_token(Token::Type::Dot);
  534. continue;
  535. }
  536. if (ch == '#') {
  537. begin_token();
  538. consume();
  539. while (AK::is_ascii_space(peek()))
  540. consume();
  541. size_t directive_start = m_index;
  542. if (is_valid_first_character_of_identifier(peek()))
  543. while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
  544. consume();
  545. auto directive = StringView(m_input.characters_without_null_termination() + directive_start, m_index - directive_start);
  546. if (directive == "include"sv) {
  547. commit_token(Token::Type::IncludeStatement);
  548. if (is_ascii_space(peek())) {
  549. begin_token();
  550. do {
  551. consume();
  552. } while (is_ascii_space(peek()));
  553. commit_token(Token::Type::Whitespace);
  554. }
  555. begin_token();
  556. if (peek() == '<' || peek() == '"') {
  557. char closing = consume() == '<' ? '>' : '"';
  558. while (peek() && peek() != closing && peek() != '\n')
  559. consume();
  560. if (peek() && consume() == '\n') {
  561. commit_token(Token::Type::IncludePath);
  562. continue;
  563. }
  564. commit_token(Token::Type::IncludePath);
  565. begin_token();
  566. }
  567. } else {
  568. while (peek()) {
  569. if (peek() == '\\' && peek(1) == '\n') {
  570. consume();
  571. consume();
  572. } else if (peek() == '\n') {
  573. break;
  574. } else {
  575. consume();
  576. }
  577. }
  578. commit_token(Token::Type::PreprocessorStatement);
  579. }
  580. continue;
  581. }
  582. if (ch == '/' && peek(1) == '/') {
  583. while (peek() && peek() != '\n')
  584. consume();
  585. continue;
  586. }
  587. if (ch == '/' && peek(1) == '*') {
  588. consume();
  589. consume();
  590. bool comment_block_ends = false;
  591. while (peek()) {
  592. if (peek() == '*' && peek(1) == '/') {
  593. comment_block_ends = true;
  594. break;
  595. }
  596. consume();
  597. }
  598. if (comment_block_ends) {
  599. consume();
  600. consume();
  601. }
  602. continue;
  603. }
  604. if (ch == '/') {
  605. emit_token_equals(Token::Type::Slash, Token::Type::SlashEquals);
  606. continue;
  607. }
  608. if (size_t prefix = match_string_prefix('"'); prefix > 0) {
  609. begin_token();
  610. for (size_t i = 0; i < prefix; ++i)
  611. consume();
  612. while (peek()) {
  613. if (peek() == '\\') {
  614. if (size_t escape = match_escape_sequence(); escape > 0) {
  615. commit_token(Token::Type::DoubleQuotedString);
  616. begin_token();
  617. for (size_t i = 0; i < escape; ++i)
  618. consume();
  619. commit_token(Token::Type::EscapeSequence);
  620. begin_token();
  621. continue;
  622. }
  623. }
  624. // If string is not terminated - stop before EOF
  625. if (!peek(1))
  626. break;
  627. if (consume() == '"')
  628. break;
  629. }
  630. commit_token(Token::Type::DoubleQuotedString);
  631. continue;
  632. }
  633. if (size_t prefix = match_string_prefix('R'); prefix > 0 && peek(prefix) == '"') {
  634. begin_token();
  635. for (size_t i = 0; i < prefix + 1; ++i)
  636. consume();
  637. size_t prefix_start = m_index;
  638. while (peek() && peek() != '(')
  639. consume();
  640. StringView prefix_string = m_input.substring_view(prefix_start, m_index - prefix_start);
  641. while (peek()) {
  642. if (consume() == '"') {
  643. VERIFY(m_index >= prefix_string.length() + 2);
  644. VERIFY(m_input[m_index - 1] == '"');
  645. if (m_input[m_index - 1 - prefix_string.length() - 1] == ')') {
  646. StringView suffix_string = m_input.substring_view(m_index - 1 - prefix_string.length(), prefix_string.length());
  647. if (prefix_string == suffix_string)
  648. break;
  649. }
  650. }
  651. }
  652. commit_token(Token::Type::RawString);
  653. continue;
  654. }
  655. if (size_t prefix = match_string_prefix('\''); prefix > 0) {
  656. begin_token();
  657. for (size_t i = 0; i < prefix; ++i)
  658. consume();
  659. while (peek()) {
  660. if (peek() == '\\') {
  661. if (size_t escape = match_escape_sequence(); escape > 0) {
  662. commit_token(Token::Type::SingleQuotedString);
  663. begin_token();
  664. for (size_t i = 0; i < escape; ++i)
  665. consume();
  666. commit_token(Token::Type::EscapeSequence);
  667. begin_token();
  668. continue;
  669. }
  670. }
  671. if (consume() == '\'')
  672. break;
  673. }
  674. commit_token(Token::Type::SingleQuotedString);
  675. continue;
  676. }
  677. if (is_ascii_digit(ch) || (ch == '.' && is_ascii_digit(peek(1)))) {
  678. begin_token();
  679. consume();
  680. auto type = ch == '.' ? Token::Type::Float : Token::Type::Integer;
  681. bool is_hex = false;
  682. bool is_binary = false;
  683. auto match_exponent = [&]() -> size_t {
  684. char ch = peek();
  685. if (ch != 'e' && ch != 'E' && ch != 'p' && ch != 'P')
  686. return 0;
  687. type = Token::Type::Float;
  688. size_t length = 1;
  689. ch = peek(length);
  690. if (ch == '+' || ch == '-') {
  691. ++length;
  692. }
  693. for (ch = peek(length); is_ascii_digit(ch); ch = peek(length)) {
  694. ++length;
  695. }
  696. return length;
  697. };
  698. auto match_type_literal = [&]() -> size_t {
  699. size_t length = 0;
  700. for (;;) {
  701. char ch = peek(length);
  702. if ((ch == 'u' || ch == 'U') && type == Token::Type::Integer) {
  703. ++length;
  704. } else if ((ch == 'f' || ch == 'F') && !is_binary) {
  705. type = Token::Type::Float;
  706. ++length;
  707. } else if (ch == 'l' || ch == 'L') {
  708. ++length;
  709. } else
  710. return length;
  711. }
  712. };
  713. if (peek() == 'b' || peek() == 'B') {
  714. consume();
  715. is_binary = true;
  716. for (char ch = peek(); ch == '0' || ch == '1' || (ch == '\'' && peek(1) != '\''); ch = peek()) {
  717. consume();
  718. }
  719. } else {
  720. if (peek() == 'x' || peek() == 'X') {
  721. consume();
  722. is_hex = true;
  723. }
  724. for (char ch = peek(); (is_hex ? is_ascii_hex_digit(ch) : is_ascii_digit(ch)) || (ch == '\'' && peek(1) != '\'') || ch == '.'; ch = peek()) {
  725. if (ch == '.') {
  726. if (type == Token::Type::Integer) {
  727. type = Token::Type::Float;
  728. } else
  729. break;
  730. };
  731. consume();
  732. }
  733. }
  734. if (!is_binary) {
  735. size_t length = match_exponent();
  736. for (size_t i = 0; i < length; ++i)
  737. consume();
  738. }
  739. size_t length = match_type_literal();
  740. for (size_t i = 0; i < length; ++i)
  741. consume();
  742. commit_token(type);
  743. continue;
  744. }
  745. if (is_valid_first_character_of_identifier(ch)) {
  746. begin_token();
  747. while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
  748. consume();
  749. auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
  750. if (is_keyword(token_view))
  751. commit_token(Token::Type::Keyword);
  752. else if (is_known_type(token_view))
  753. commit_token(Token::Type::KnownType);
  754. else
  755. commit_token(Token::Type::Identifier);
  756. continue;
  757. }
  758. if (ch == '\\' && peek(1) == '\n') {
  759. consume();
  760. consume();
  761. continue;
  762. }
  763. dbgln("Unimplemented token character: {}", ch);
  764. emit_single_char_token(Token::Type::Unknown);
  765. }
  766. }
  767. Vector<Token> Lexer::lex()
  768. {
  769. Vector<Token> tokens;
  770. lex_impl([&](auto token) {
  771. tokens.append(move(token));
  772. });
  773. return tokens;
  774. }
  775. }