Lexer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Lexer.h"
  7. #include <AK/ByteString.h>
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Function.h>
  10. #include <AK/HashTable.h>
  11. #include <AK/StdLibExtras.h>
  12. namespace Cpp {
  13. Lexer::Lexer(StringView input, size_t start_line)
  14. : m_input(input)
  15. , m_previous_position { start_line, 0 }
  16. , m_position { start_line, 0 }
  17. {
  18. }
  19. char Lexer::peek(size_t offset) const
  20. {
  21. if ((m_index + offset) >= m_input.length())
  22. return 0;
  23. return m_input[m_index + offset];
  24. }
  25. char Lexer::consume()
  26. {
  27. VERIFY(m_index < m_input.length());
  28. char ch = m_input[m_index++];
  29. m_previous_position = m_position;
  30. if (ch == '\n') {
  31. m_position.line++;
  32. m_position.column = 0;
  33. } else {
  34. m_position.column++;
  35. }
  36. return ch;
  37. }
  38. constexpr bool is_valid_first_character_of_identifier(char ch)
  39. {
  40. return is_ascii_alpha(ch) || ch == '_' || ch == '$';
  41. }
  42. constexpr bool is_valid_nonfirst_character_of_identifier(char ch)
  43. {
  44. return is_valid_first_character_of_identifier(ch) || is_ascii_digit(ch);
  45. }
  46. constexpr StringView s_known_keywords[] = {
  47. "alignas"sv,
  48. "alignof"sv,
  49. "and"sv,
  50. "and_eq"sv,
  51. "asm"sv,
  52. "bitand"sv,
  53. "bitor"sv,
  54. "break"sv,
  55. "case"sv,
  56. "catch"sv,
  57. "class"sv,
  58. "compl"sv,
  59. "const"sv,
  60. "const_cast"sv,
  61. "constexpr"sv,
  62. "continue"sv,
  63. "decltype"sv,
  64. "default"sv,
  65. "delete"sv,
  66. "do"sv,
  67. "dynamic_cast"sv,
  68. "else"sv,
  69. "enum"sv,
  70. "explicit"sv,
  71. "export"sv,
  72. "extern"sv,
  73. "false"sv,
  74. "final"sv,
  75. "for"sv,
  76. "friend"sv,
  77. "goto"sv,
  78. "if"sv,
  79. "inline"sv,
  80. "mutable"sv,
  81. "namespace"sv,
  82. "new"sv,
  83. "noexcept"sv,
  84. "not"sv,
  85. "not_eq"sv,
  86. "nullptr"sv,
  87. "operator"sv,
  88. "or"sv,
  89. "or_eq"sv,
  90. "override"sv,
  91. "private"sv,
  92. "protected"sv,
  93. "public"sv,
  94. "register"sv,
  95. "reinterpret_cast"sv,
  96. "return"sv,
  97. "signed"sv,
  98. "sizeof"sv,
  99. "static"sv,
  100. "static_assert"sv,
  101. "static_cast"sv,
  102. "struct"sv,
  103. "switch"sv,
  104. "template"sv,
  105. "this"sv,
  106. "thread_local"sv,
  107. "throw"sv,
  108. "true"sv,
  109. "try"sv,
  110. "typedef"sv,
  111. "typeid"sv,
  112. "typename"sv,
  113. "union"sv,
  114. "using"sv,
  115. "virtual"sv,
  116. "volatile"sv,
  117. "while"sv,
  118. "xor"sv,
  119. "xor_eq"sv
  120. };
  121. constexpr StringView s_known_types[] = {
  122. "Array"sv,
  123. "Array"sv,
  124. "Badge"sv,
  125. "Bitmap"sv,
  126. "ByteBuffer"sv,
  127. "Bytes"sv,
  128. "Checked"sv,
  129. "CircularDeque"sv,
  130. "CircularQueue"sv,
  131. "Deque"sv,
  132. "DoublyLinkedList"sv,
  133. "Error"sv,
  134. "ErrorOr"sv,
  135. "FlyString"sv,
  136. "Function"sv,
  137. "HashMap"sv,
  138. "HashTable"sv,
  139. "IPv4Address"sv,
  140. "IntrusiveList"sv,
  141. "IntrusiveList"sv,
  142. "JsonArray"sv,
  143. "JsonObject"sv,
  144. "JsonValue"sv,
  145. "LexicalPath"sv,
  146. "MappedFile"sv,
  147. "NetworkOrdered"sv,
  148. "NeverDestroyed"sv,
  149. "NonnullOwnPtr"sv,
  150. "NonnullRefPtr"sv,
  151. "Optional"sv,
  152. "OwnPtr"sv,
  153. "ReadonlyBytes"sv,
  154. "RedBlackTree"sv,
  155. "RefPtr"sv,
  156. "Result"sv,
  157. "ScopeGuard"sv,
  158. "Singleton"sv,
  159. "SinglyLinkedList"sv,
  160. "Span"sv,
  161. "String"sv,
  162. "StringBuilder"sv,
  163. "StringImpl"sv,
  164. "StringView"sv,
  165. "Utf8View"sv,
  166. "Variant"sv,
  167. "Vector"sv,
  168. "WeakPtr"sv,
  169. "auto"sv,
  170. "bool"sv,
  171. "char"sv,
  172. "char16_t"sv,
  173. "char32_t"sv,
  174. "char8_t"sv,
  175. "double"sv,
  176. "float"sv,
  177. "i16"sv,
  178. "i32"sv,
  179. "i64"sv,
  180. "i8"sv,
  181. "int"sv,
  182. "int"sv,
  183. "long"sv,
  184. "short"sv,
  185. "signed"sv,
  186. "u16"sv,
  187. "u32"sv,
  188. "u64"sv,
  189. "u8"sv,
  190. "unsigned"sv,
  191. "void"sv,
  192. "wchar_t"sv,
  193. };
  194. static bool is_keyword(StringView string)
  195. {
  196. static HashTable<ByteString> keywords(array_size(s_known_keywords));
  197. if (keywords.is_empty()) {
  198. keywords.set_from(s_known_keywords);
  199. }
  200. return keywords.contains(string);
  201. }
  202. static bool is_known_type(StringView string)
  203. {
  204. static HashTable<ByteString> types(array_size(s_known_types));
  205. if (types.is_empty()) {
  206. types.set_from(s_known_types);
  207. }
  208. return types.contains(string);
  209. }
  210. void Lexer::lex_impl(Function<void(Token)> callback)
  211. {
  212. size_t token_start_index = 0;
  213. Position token_start_position;
  214. auto emit_single_char_token = [&](auto type) {
  215. callback(Token(type, m_position, m_position, m_input.substring_view(m_index, 1)));
  216. consume();
  217. };
  218. auto begin_token = [&] {
  219. token_start_index = m_index;
  220. token_start_position = m_position;
  221. };
  222. auto commit_token = [&](auto type) {
  223. if (m_options.ignore_whitespace && type == Token::Type::Whitespace)
  224. return;
  225. callback(Token(type, token_start_position, m_previous_position, m_input.substring_view(token_start_index, m_index - token_start_index)));
  226. };
  227. auto emit_token_equals = [&](auto type, auto equals_type) {
  228. if (peek(1) == '=') {
  229. begin_token();
  230. consume();
  231. consume();
  232. commit_token(equals_type);
  233. return;
  234. }
  235. emit_single_char_token(type);
  236. };
  237. auto match_escape_sequence = [&]() -> size_t {
  238. switch (peek(1)) {
  239. case '\'':
  240. case '"':
  241. case '?':
  242. case '\\':
  243. case 'a':
  244. case 'b':
  245. case 'f':
  246. case 'n':
  247. case 'r':
  248. case 't':
  249. case 'v':
  250. return 2;
  251. case '0':
  252. case '1':
  253. case '2':
  254. case '3':
  255. case '4':
  256. case '5':
  257. case '6':
  258. case '7': {
  259. size_t octal_digits = 1;
  260. for (size_t i = 0; i < 2; ++i) {
  261. char next = peek(2 + i);
  262. if (next < '0' || next > '7')
  263. break;
  264. ++octal_digits;
  265. }
  266. return 1 + octal_digits;
  267. }
  268. case 'x': {
  269. size_t hex_digits = 0;
  270. while (is_ascii_hex_digit(peek(2 + hex_digits)))
  271. ++hex_digits;
  272. return 2 + hex_digits;
  273. }
  274. case 'u':
  275. case 'U': {
  276. bool is_unicode = true;
  277. size_t number_of_digits = peek(1) == 'u' ? 4 : 8;
  278. for (size_t i = 0; i < number_of_digits; ++i) {
  279. if (!is_ascii_hex_digit(peek(2 + i))) {
  280. is_unicode = false;
  281. break;
  282. }
  283. }
  284. return is_unicode ? 2 + number_of_digits : 0;
  285. }
  286. default:
  287. return 0;
  288. }
  289. };
  290. auto match_string_prefix = [&](char quote) -> size_t {
  291. if (peek() == quote)
  292. return 1;
  293. if (peek() == 'L' && peek(1) == quote)
  294. return 2;
  295. if (peek() == 'u') {
  296. if (peek(1) == quote)
  297. return 2;
  298. if (peek(1) == '8' && peek(2) == quote)
  299. return 3;
  300. }
  301. if (peek() == 'U' && peek(1) == quote)
  302. return 2;
  303. return 0;
  304. };
  305. while (m_index < m_input.length()) {
  306. auto ch = peek();
  307. if (is_ascii_space(ch)) {
  308. begin_token();
  309. while (is_ascii_space(peek()))
  310. consume();
  311. commit_token(Token::Type::Whitespace);
  312. continue;
  313. }
  314. if (ch == '(') {
  315. emit_single_char_token(Token::Type::LeftParen);
  316. continue;
  317. }
  318. if (ch == ')') {
  319. emit_single_char_token(Token::Type::RightParen);
  320. continue;
  321. }
  322. if (ch == '{') {
  323. emit_single_char_token(Token::Type::LeftCurly);
  324. continue;
  325. }
  326. if (ch == '}') {
  327. emit_single_char_token(Token::Type::RightCurly);
  328. continue;
  329. }
  330. if (ch == '[') {
  331. emit_single_char_token(Token::Type::LeftBracket);
  332. continue;
  333. }
  334. if (ch == ']') {
  335. emit_single_char_token(Token::Type::RightBracket);
  336. continue;
  337. }
  338. if (ch == '<') {
  339. begin_token();
  340. consume();
  341. if (peek() == '<') {
  342. consume();
  343. if (peek() == '=') {
  344. consume();
  345. commit_token(Token::Type::LessLessEquals);
  346. continue;
  347. }
  348. commit_token(Token::Type::LessLess);
  349. continue;
  350. }
  351. if (peek() == '=') {
  352. consume();
  353. commit_token(Token::Type::LessEquals);
  354. continue;
  355. }
  356. if (peek() == '>') {
  357. consume();
  358. commit_token(Token::Type::LessGreater);
  359. continue;
  360. }
  361. commit_token(Token::Type::Less);
  362. continue;
  363. }
  364. if (ch == '>') {
  365. begin_token();
  366. consume();
  367. if (peek() == '>') {
  368. consume();
  369. if (peek() == '=') {
  370. consume();
  371. commit_token(Token::Type::GreaterGreaterEquals);
  372. continue;
  373. }
  374. commit_token(Token::Type::GreaterGreater);
  375. continue;
  376. }
  377. if (peek() == '=') {
  378. consume();
  379. commit_token(Token::Type::GreaterEquals);
  380. continue;
  381. }
  382. commit_token(Token::Type::Greater);
  383. continue;
  384. }
  385. if (ch == ',') {
  386. emit_single_char_token(Token::Type::Comma);
  387. continue;
  388. }
  389. if (ch == '+') {
  390. begin_token();
  391. consume();
  392. if (peek() == '+') {
  393. consume();
  394. commit_token(Token::Type::PlusPlus);
  395. continue;
  396. }
  397. if (peek() == '=') {
  398. consume();
  399. commit_token(Token::Type::PlusEquals);
  400. continue;
  401. }
  402. commit_token(Token::Type::Plus);
  403. continue;
  404. }
  405. if (ch == '-') {
  406. begin_token();
  407. consume();
  408. if (peek() == '-') {
  409. consume();
  410. commit_token(Token::Type::MinusMinus);
  411. continue;
  412. }
  413. if (peek() == '=') {
  414. consume();
  415. commit_token(Token::Type::MinusEquals);
  416. continue;
  417. }
  418. if (peek() == '>') {
  419. consume();
  420. if (peek() == '*') {
  421. consume();
  422. commit_token(Token::Type::ArrowAsterisk);
  423. continue;
  424. }
  425. commit_token(Token::Type::Arrow);
  426. continue;
  427. }
  428. commit_token(Token::Type::Minus);
  429. continue;
  430. }
  431. if (ch == '*') {
  432. emit_token_equals(Token::Type::Asterisk, Token::Type::AsteriskEquals);
  433. continue;
  434. }
  435. if (ch == '%') {
  436. emit_token_equals(Token::Type::Percent, Token::Type::PercentEquals);
  437. continue;
  438. }
  439. if (ch == '^') {
  440. emit_token_equals(Token::Type::Caret, Token::Type::CaretEquals);
  441. continue;
  442. }
  443. if (ch == '!') {
  444. emit_token_equals(Token::Type::ExclamationMark, Token::Type::ExclamationMarkEquals);
  445. continue;
  446. }
  447. if (ch == '=') {
  448. emit_token_equals(Token::Type::Equals, Token::Type::EqualsEquals);
  449. continue;
  450. }
  451. if (ch == '&') {
  452. begin_token();
  453. consume();
  454. if (peek() == '&') {
  455. consume();
  456. commit_token(Token::Type::AndAnd);
  457. continue;
  458. }
  459. if (peek() == '=') {
  460. consume();
  461. commit_token(Token::Type::AndEquals);
  462. continue;
  463. }
  464. commit_token(Token::Type::And);
  465. continue;
  466. }
  467. if (ch == '|') {
  468. begin_token();
  469. consume();
  470. if (peek() == '|') {
  471. consume();
  472. commit_token(Token::Type::PipePipe);
  473. continue;
  474. }
  475. if (peek() == '=') {
  476. consume();
  477. commit_token(Token::Type::PipeEquals);
  478. continue;
  479. }
  480. commit_token(Token::Type::Pipe);
  481. continue;
  482. }
  483. if (ch == '~') {
  484. emit_single_char_token(Token::Type::Tilde);
  485. continue;
  486. }
  487. if (ch == '?') {
  488. emit_single_char_token(Token::Type::QuestionMark);
  489. continue;
  490. }
  491. if (ch == ':') {
  492. begin_token();
  493. consume();
  494. if (peek() == ':') {
  495. consume();
  496. if (peek() == '*') {
  497. consume();
  498. commit_token(Token::Type::ColonColonAsterisk);
  499. continue;
  500. }
  501. commit_token(Token::Type::ColonColon);
  502. continue;
  503. }
  504. commit_token(Token::Type::Colon);
  505. continue;
  506. }
  507. if (ch == ';') {
  508. emit_single_char_token(Token::Type::Semicolon);
  509. continue;
  510. }
  511. if (ch == '.') {
  512. begin_token();
  513. consume();
  514. if (peek() == '*') {
  515. consume();
  516. commit_token(Token::Type::DotAsterisk);
  517. continue;
  518. }
  519. commit_token(Token::Type::Dot);
  520. continue;
  521. }
  522. if (ch == '#') {
  523. begin_token();
  524. consume();
  525. while (AK::is_ascii_space(peek()))
  526. consume();
  527. size_t directive_start = m_index;
  528. if (is_valid_first_character_of_identifier(peek()))
  529. while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
  530. consume();
  531. auto directive = StringView(m_input.characters_without_null_termination() + directive_start, m_index - directive_start);
  532. if (directive == "include"sv) {
  533. commit_token(Token::Type::IncludeStatement);
  534. if (is_ascii_space(peek())) {
  535. begin_token();
  536. do {
  537. consume();
  538. } while (is_ascii_space(peek()));
  539. commit_token(Token::Type::Whitespace);
  540. }
  541. begin_token();
  542. if (peek() == '<' || peek() == '"') {
  543. char closing = consume() == '<' ? '>' : '"';
  544. while (peek() && peek() != closing && peek() != '\n')
  545. consume();
  546. if (peek() && consume() == '\n') {
  547. commit_token(Token::Type::IncludePath);
  548. continue;
  549. }
  550. commit_token(Token::Type::IncludePath);
  551. begin_token();
  552. }
  553. } else {
  554. while (peek()) {
  555. if (peek() == '\\' && peek(1) == '\n') {
  556. consume();
  557. consume();
  558. } else if (peek() == '\n') {
  559. break;
  560. } else {
  561. consume();
  562. }
  563. }
  564. commit_token(Token::Type::PreprocessorStatement);
  565. }
  566. continue;
  567. }
  568. if (ch == '/' && peek(1) == '/') {
  569. begin_token();
  570. while (peek() && peek() != '\n')
  571. consume();
  572. commit_token(Token::Type::Comment);
  573. continue;
  574. }
  575. if (ch == '/' && peek(1) == '*') {
  576. begin_token();
  577. consume();
  578. consume();
  579. bool comment_block_ends = false;
  580. while (peek()) {
  581. if (peek() == '*' && peek(1) == '/') {
  582. comment_block_ends = true;
  583. break;
  584. }
  585. consume();
  586. }
  587. if (comment_block_ends) {
  588. consume();
  589. consume();
  590. }
  591. commit_token(Token::Type::Comment);
  592. continue;
  593. }
  594. if (ch == '/') {
  595. emit_token_equals(Token::Type::Slash, Token::Type::SlashEquals);
  596. continue;
  597. }
  598. if (size_t prefix = match_string_prefix('"'); prefix > 0) {
  599. begin_token();
  600. for (size_t i = 0; i < prefix; ++i)
  601. consume();
  602. while (peek()) {
  603. if (peek() == '\\') {
  604. if (size_t escape = match_escape_sequence(); escape > 0) {
  605. commit_token(Token::Type::DoubleQuotedString);
  606. begin_token();
  607. for (size_t i = 0; i < escape; ++i)
  608. consume();
  609. commit_token(Token::Type::EscapeSequence);
  610. begin_token();
  611. continue;
  612. }
  613. }
  614. // If string is not terminated - stop before EOF
  615. if (!peek(1))
  616. break;
  617. if (consume() == '"')
  618. break;
  619. }
  620. commit_token(Token::Type::DoubleQuotedString);
  621. continue;
  622. }
  623. if (size_t prefix = match_string_prefix('R'); prefix > 0 && peek(prefix) == '"') {
  624. begin_token();
  625. for (size_t i = 0; i < prefix + 1; ++i)
  626. consume();
  627. size_t prefix_start = m_index;
  628. while (peek() && peek() != '(')
  629. consume();
  630. StringView prefix_string = m_input.substring_view(prefix_start, m_index - prefix_start);
  631. while (peek()) {
  632. if (consume() == '"') {
  633. VERIFY(m_index >= prefix_string.length() + 2);
  634. VERIFY(m_input[m_index - 1] == '"');
  635. if (m_input[m_index - 1 - prefix_string.length() - 1] == ')') {
  636. StringView suffix_string = m_input.substring_view(m_index - 1 - prefix_string.length(), prefix_string.length());
  637. if (prefix_string == suffix_string)
  638. break;
  639. }
  640. }
  641. }
  642. commit_token(Token::Type::RawString);
  643. continue;
  644. }
  645. if (size_t prefix = match_string_prefix('\''); prefix > 0) {
  646. begin_token();
  647. for (size_t i = 0; i < prefix; ++i)
  648. consume();
  649. while (peek()) {
  650. if (peek() == '\\') {
  651. if (size_t escape = match_escape_sequence(); escape > 0) {
  652. commit_token(Token::Type::SingleQuotedString);
  653. begin_token();
  654. for (size_t i = 0; i < escape; ++i)
  655. consume();
  656. commit_token(Token::Type::EscapeSequence);
  657. begin_token();
  658. continue;
  659. }
  660. }
  661. if (consume() == '\'')
  662. break;
  663. }
  664. commit_token(Token::Type::SingleQuotedString);
  665. continue;
  666. }
  667. if (is_ascii_digit(ch) || (ch == '.' && is_ascii_digit(peek(1)))) {
  668. begin_token();
  669. consume();
  670. auto type = ch == '.' ? Token::Type::Float : Token::Type::Integer;
  671. bool is_hex = false;
  672. bool is_binary = false;
  673. auto match_exponent = [&]() -> size_t {
  674. char ch = peek();
  675. if (ch != 'e' && ch != 'E' && ch != 'p' && ch != 'P')
  676. return 0;
  677. type = Token::Type::Float;
  678. size_t length = 1;
  679. ch = peek(length);
  680. if (ch == '+' || ch == '-') {
  681. ++length;
  682. }
  683. for (ch = peek(length); is_ascii_digit(ch); ch = peek(length)) {
  684. ++length;
  685. }
  686. return length;
  687. };
  688. auto match_type_literal = [&]() -> size_t {
  689. size_t length = 0;
  690. for (;;) {
  691. char ch = peek(length);
  692. if ((ch == 'u' || ch == 'U') && type == Token::Type::Integer) {
  693. ++length;
  694. } else if ((ch == 'f' || ch == 'F') && !is_binary) {
  695. type = Token::Type::Float;
  696. ++length;
  697. } else if (ch == 'l' || ch == 'L') {
  698. ++length;
  699. } else
  700. return length;
  701. }
  702. };
  703. if (peek() == 'b' || peek() == 'B') {
  704. consume();
  705. is_binary = true;
  706. for (char ch = peek(); ch == '0' || ch == '1' || (ch == '\'' && peek(1) != '\''); ch = peek()) {
  707. consume();
  708. }
  709. } else {
  710. if (peek() == 'x' || peek() == 'X') {
  711. consume();
  712. is_hex = true;
  713. }
  714. for (char ch = peek(); (is_hex ? is_ascii_hex_digit(ch) : is_ascii_digit(ch)) || (ch == '\'' && peek(1) != '\'') || ch == '.'; ch = peek()) {
  715. if (ch == '.') {
  716. if (type == Token::Type::Integer) {
  717. type = Token::Type::Float;
  718. } else
  719. break;
  720. };
  721. consume();
  722. }
  723. }
  724. if (!is_binary) {
  725. size_t length = match_exponent();
  726. for (size_t i = 0; i < length; ++i)
  727. consume();
  728. }
  729. size_t length = match_type_literal();
  730. for (size_t i = 0; i < length; ++i)
  731. consume();
  732. commit_token(type);
  733. continue;
  734. }
  735. if (is_valid_first_character_of_identifier(ch)) {
  736. begin_token();
  737. while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
  738. consume();
  739. auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
  740. if (is_keyword(token_view))
  741. commit_token(Token::Type::Keyword);
  742. else if (is_known_type(token_view))
  743. commit_token(Token::Type::KnownType);
  744. else
  745. commit_token(Token::Type::Identifier);
  746. continue;
  747. }
  748. if (ch == '\\' && peek(1) == '\n') {
  749. consume();
  750. consume();
  751. continue;
  752. }
  753. dbgln("Unimplemented token character: {}", ch);
  754. emit_single_char_token(Token::Type::Unknown);
  755. }
  756. }
  757. Vector<Token> Lexer::lex()
  758. {
  759. Vector<Token> tokens;
  760. lex_impl([&](auto token) {
  761. tokens.append(move(token));
  762. });
  763. return tokens;
  764. }
  765. }