GMLLexer.cpp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GMLLexer.h"
  7. #include <AK/Vector.h>
  8. #include <ctype.h>
  9. namespace GUI {
  10. GMLLexer::GMLLexer(const StringView& input)
  11. : m_input(input)
  12. {
  13. }
  14. char GMLLexer::peek(size_t offset) const
  15. {
  16. if ((m_index + offset) >= m_input.length())
  17. return 0;
  18. return m_input[m_index + offset];
  19. }
  20. char GMLLexer::consume()
  21. {
  22. VERIFY(m_index < m_input.length());
  23. char ch = m_input[m_index++];
  24. m_previous_position = m_position;
  25. if (ch == '\n') {
  26. m_position.line++;
  27. m_position.column = 0;
  28. } else {
  29. m_position.column++;
  30. }
  31. return ch;
  32. }
  33. static bool is_valid_identifier_start(char ch)
  34. {
  35. return isalpha(ch) || ch == '_';
  36. }
  37. static bool is_valid_identifier_character(char ch)
  38. {
  39. return isalnum(ch) || ch == '_';
  40. }
  41. static bool is_valid_class_character(char ch)
  42. {
  43. return isalnum(ch) || ch == '_' || ch == ':';
  44. }
  45. Vector<GMLToken> GMLLexer::lex()
  46. {
  47. Vector<GMLToken> tokens;
  48. size_t token_start_index = 0;
  49. GMLPosition token_start_position;
  50. auto begin_token = [&] {
  51. token_start_index = m_index;
  52. token_start_position = m_position;
  53. };
  54. auto commit_token = [&](auto type) {
  55. GMLToken token;
  56. token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index);
  57. token.m_type = type;
  58. token.m_start = token_start_position;
  59. token.m_end = m_previous_position;
  60. tokens.append(token);
  61. };
  62. auto consume_class = [&] {
  63. begin_token();
  64. consume();
  65. commit_token(GMLToken::Type::ClassMarker);
  66. begin_token();
  67. while (is_valid_class_character(peek()))
  68. consume();
  69. commit_token(GMLToken::Type::ClassName);
  70. };
  71. while (m_index < m_input.length()) {
  72. if (isspace(peek(0))) {
  73. begin_token();
  74. while (isspace(peek()))
  75. consume();
  76. continue;
  77. }
  78. // C++ style comments
  79. if (peek(0) && peek(0) == '/' && peek(1) == '/') {
  80. begin_token();
  81. while (peek() && peek() != '\n')
  82. consume();
  83. commit_token(GMLToken::Type::Comment);
  84. continue;
  85. }
  86. if (peek(0) == '{') {
  87. begin_token();
  88. consume();
  89. commit_token(GMLToken::Type::LeftCurly);
  90. continue;
  91. }
  92. if (peek(0) == '}') {
  93. begin_token();
  94. consume();
  95. commit_token(GMLToken::Type::RightCurly);
  96. continue;
  97. }
  98. if (peek(0) == '@') {
  99. consume_class();
  100. continue;
  101. }
  102. if (is_valid_identifier_start(peek(0))) {
  103. begin_token();
  104. consume();
  105. while (is_valid_identifier_character(peek(0)))
  106. consume();
  107. commit_token(GMLToken::Type::Identifier);
  108. continue;
  109. }
  110. if (peek(0) == ':') {
  111. begin_token();
  112. consume();
  113. commit_token(GMLToken::Type::Colon);
  114. while (isspace(peek()))
  115. consume();
  116. if (peek(0) == '@') {
  117. consume_class();
  118. } else {
  119. begin_token();
  120. while (peek() && peek() != '\n')
  121. consume();
  122. commit_token(GMLToken::Type::JsonValue);
  123. }
  124. continue;
  125. }
  126. consume();
  127. commit_token(GMLToken::Type::Unknown);
  128. }
  129. return tokens;
  130. }
  131. }