Lexer.cpp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Lexer.h"
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/Vector.h>
  9. namespace GUI::GML {
  10. Lexer::Lexer(StringView input)
  11. : m_input(input)
  12. {
  13. }
  14. char Lexer::peek(size_t offset) const
  15. {
  16. if ((m_index + offset) >= m_input.length())
  17. return 0;
  18. return m_input[m_index + offset];
  19. }
  20. char Lexer::consume()
  21. {
  22. VERIFY(m_index < m_input.length());
  23. char ch = m_input[m_index++];
  24. if (ch == '\n') {
  25. m_position.line++;
  26. m_position.column = 0;
  27. } else {
  28. m_position.column++;
  29. }
  30. return ch;
  31. }
  32. constexpr bool is_valid_identifier_start(char ch)
  33. {
  34. return is_ascii_alpha(ch) || ch == '_';
  35. }
  36. constexpr bool is_valid_identifier_character(char ch)
  37. {
  38. return is_ascii_alphanumeric(ch) || ch == '_';
  39. }
  40. constexpr bool is_valid_class_character(char ch)
  41. {
  42. return is_ascii_alphanumeric(ch) || ch == '_' || ch == ':';
  43. }
  44. Vector<Token> Lexer::lex()
  45. {
  46. Vector<Token> tokens;
  47. size_t token_start_index = 0;
  48. Position token_start_position;
  49. auto begin_token = [&] {
  50. token_start_index = m_index;
  51. token_start_position = m_position;
  52. };
  53. auto commit_token = [&](auto type) {
  54. Token token;
  55. token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index);
  56. token.m_type = type;
  57. token.m_start = token_start_position;
  58. token.m_end = m_position;
  59. tokens.append(token);
  60. };
  61. auto consume_class = [&] {
  62. begin_token();
  63. consume();
  64. commit_token(Token::Type::ClassMarker);
  65. begin_token();
  66. while (is_valid_class_character(peek()))
  67. consume();
  68. commit_token(Token::Type::ClassName);
  69. };
  70. while (m_index < m_input.length()) {
  71. if (is_ascii_space(peek(0))) {
  72. begin_token();
  73. while (is_ascii_space(peek()))
  74. consume();
  75. continue;
  76. }
  77. // C++ style comments
  78. if (peek(0) && peek(0) == '/' && peek(1) == '/') {
  79. begin_token();
  80. while (peek() && peek() != '\n')
  81. consume();
  82. commit_token(Token::Type::Comment);
  83. continue;
  84. }
  85. if (peek(0) == '{') {
  86. begin_token();
  87. consume();
  88. commit_token(Token::Type::LeftCurly);
  89. continue;
  90. }
  91. if (peek(0) == '}') {
  92. begin_token();
  93. consume();
  94. commit_token(Token::Type::RightCurly);
  95. continue;
  96. }
  97. if (peek(0) == '@') {
  98. consume_class();
  99. continue;
  100. }
  101. if (is_valid_identifier_start(peek(0))) {
  102. begin_token();
  103. consume();
  104. while (is_valid_identifier_character(peek(0)))
  105. consume();
  106. commit_token(Token::Type::Identifier);
  107. continue;
  108. }
  109. if (peek(0) == ':') {
  110. begin_token();
  111. consume();
  112. commit_token(Token::Type::Colon);
  113. while (is_ascii_space(peek()))
  114. consume();
  115. if (peek(0) == '@') {
  116. consume_class();
  117. } else {
  118. begin_token();
  119. while (peek() && peek() != '\n')
  120. consume();
  121. commit_token(Token::Type::JsonValue);
  122. }
  123. continue;
  124. }
  125. consume();
  126. commit_token(Token::Type::Unknown);
  127. }
  128. return tokens;
  129. }
  130. }