Lexer.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Lexer.h"
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/Debug.h>
  9. #include <AK/ScopeLogger.h>
  10. namespace CMake::Cache {
  11. static bool is_identifier_start_character(u32 c)
  12. {
  13. return AK::is_ascii_alpha(c) || c == '_' || c == '-';
  14. }
  15. static bool is_identifier_character(u32 c)
  16. {
  17. return AK::is_ascii_alphanumeric(c) || c == '_' || c == '-';
  18. }
  19. Lexer::Lexer(StringView input)
  20. : GenericLexer(input)
  21. {
  22. }
  23. ErrorOr<Vector<Token>> Lexer::lex(StringView input)
  24. {
  25. Lexer lexer { input };
  26. return lexer.lex_file();
  27. }
  28. ErrorOr<Vector<Token>> Lexer::lex_file()
  29. {
  30. ScopeLogger<CMAKE_DEBUG> logger;
  31. while (!is_eof()) {
  32. skip_whitespace();
  33. if (is_eof())
  34. break;
  35. if (next_is('#')) {
  36. consume_comment();
  37. continue;
  38. }
  39. if (next_is("//"sv)) {
  40. consume_help_text();
  41. continue;
  42. }
  43. if (next_is(is_identifier_start_character)) {
  44. consume_variable_definition();
  45. continue;
  46. }
  47. consume_garbage();
  48. }
  49. return m_tokens;
  50. }
  51. void Lexer::skip_whitespace()
  52. {
  53. ScopeLogger<CMAKE_DEBUG> log;
  54. while (!is_eof()) {
  55. if (next_is('\n')) {
  56. next_line();
  57. continue;
  58. }
  59. auto consumed = consume_while(AK::is_ascii_space);
  60. if (consumed.is_empty())
  61. break;
  62. }
  63. }
  64. void Lexer::consume_comment()
  65. {
  66. ScopeLogger<CMAKE_DEBUG> log;
  67. auto start = position();
  68. VERIFY(consume_specific('#'));
  69. auto comment = consume_until('\n');
  70. emit_token(Token::Type::Comment, comment, start, position());
  71. }
  72. void Lexer::consume_help_text()
  73. {
  74. ScopeLogger<CMAKE_DEBUG> log;
  75. auto start = position();
  76. VERIFY(consume_specific("//"sv));
  77. auto help_text = consume_until('\n');
  78. emit_token(Token::Type::HelpText, help_text, start, position());
  79. }
  80. void Lexer::consume_variable_definition()
  81. {
  82. ScopeLogger<CMAKE_DEBUG> log;
  83. consume_key();
  84. if (!next_is(':')) {
  85. consume_garbage();
  86. return;
  87. }
  88. consume_colon();
  89. if (!next_is(is_identifier_start_character)) {
  90. consume_garbage();
  91. return;
  92. }
  93. consume_type();
  94. if (!next_is('=')) {
  95. consume_garbage();
  96. return;
  97. }
  98. consume_equals();
  99. consume_value();
  100. }
  101. void Lexer::consume_key()
  102. {
  103. ScopeLogger<CMAKE_DEBUG> log;
  104. auto start = position();
  105. auto key = consume_while(is_identifier_character);
  106. emit_token(Token::Type::Key, key, start, position());
  107. }
  108. void Lexer::consume_colon()
  109. {
  110. ScopeLogger<CMAKE_DEBUG> log;
  111. auto start = position();
  112. VERIFY(consume_specific(':'));
  113. emit_token(Token::Type::Colon, ":"sv, start, position());
  114. }
  115. void Lexer::consume_type()
  116. {
  117. ScopeLogger<CMAKE_DEBUG> log;
  118. auto start = position();
  119. auto type = consume_while(is_identifier_character);
  120. emit_token(Token::Type::Type, type, start, position());
  121. }
  122. void Lexer::consume_equals()
  123. {
  124. ScopeLogger<CMAKE_DEBUG> log;
  125. auto start = position();
  126. VERIFY(consume_specific('='));
  127. emit_token(Token::Type::Colon, "="sv, start, position());
  128. }
  129. void Lexer::consume_value()
  130. {
  131. ScopeLogger<CMAKE_DEBUG> log;
  132. auto start = position();
  133. auto value = consume_until('\n');
  134. emit_token(Token::Type::Value, value, start, position());
  135. }
  136. void Lexer::consume_garbage()
  137. {
  138. ScopeLogger<CMAKE_DEBUG> log;
  139. auto start = position();
  140. auto garbage = consume_until('\n');
  141. emit_token(Token::Type::Garbage, garbage, start, position());
  142. }
  143. Position Lexer::position() const
  144. {
  145. return Position {
  146. .line = m_line,
  147. .column = tell() - m_string_offset_after_previous_newline,
  148. };
  149. }
  150. void Lexer::next_line()
  151. {
  152. VERIFY(consume_specific('\n'));
  153. m_string_offset_after_previous_newline = tell();
  154. m_line++;
  155. }
  156. void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end)
  157. {
  158. dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
  159. m_tokens.empend(type, value, start, end);
  160. }
  161. }