Preprocessor.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Preprocessor.h"
  7. #include <AK/Assertions.h>
  8. #include <AK/GenericLexer.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibCpp/Lexer.h>
  11. #include <ctype.h>
  12. namespace Cpp {
  13. Preprocessor::Preprocessor(const String& filename, const StringView& program)
  14. : m_filename(filename)
  15. , m_program(program)
  16. {
  17. GenericLexer program_lexer { m_program };
  18. for (;;) {
  19. if (program_lexer.is_eof())
  20. break;
  21. auto line = program_lexer.consume_until('\n');
  22. bool has_multiline = false;
  23. while (line.ends_with('\\') && !program_lexer.is_eof()) {
  24. auto continuation = program_lexer.consume_until('\n');
  25. line = StringView { line.characters_without_null_termination(), line.length() + continuation.length() + 1 };
  26. // Append an empty line to keep the line count correct.
  27. m_lines.append({});
  28. has_multiline = true;
  29. }
  30. if (has_multiline)
  31. m_lines.last() = line;
  32. else
  33. m_lines.append(line);
  34. }
  35. }
  36. Vector<Token> Preprocessor::process_and_lex()
  37. {
  38. for (; m_line_index < m_lines.size(); ++m_line_index) {
  39. auto& line = m_lines[m_line_index];
  40. bool include_in_processed_text = false;
  41. if (line.starts_with("#")) {
  42. auto keyword = handle_preprocessor_line(line);
  43. if (m_options.keep_include_statements && keyword == "include")
  44. include_in_processed_text = true;
  45. } else if (m_state == State::Normal) {
  46. include_in_processed_text = true;
  47. }
  48. if (include_in_processed_text) {
  49. process_line(line);
  50. }
  51. }
  52. return m_tokens;
  53. }
  54. static void consume_whitespace(GenericLexer& lexer)
  55. {
  56. auto ignore_line = [&] {
  57. for (;;) {
  58. if (lexer.consume_specific("\\\n"sv)) {
  59. lexer.ignore(2);
  60. } else {
  61. lexer.ignore_until('\n');
  62. break;
  63. }
  64. }
  65. };
  66. for (;;) {
  67. if (lexer.consume_specific("//"sv))
  68. ignore_line();
  69. else if (lexer.consume_specific("/*"sv))
  70. lexer.ignore_until("*/");
  71. else if (lexer.next_is("\\\n"sv))
  72. lexer.ignore(2);
  73. else if (lexer.is_eof() || !lexer.next_is(isspace))
  74. break;
  75. else
  76. lexer.ignore();
  77. }
  78. }
  79. Preprocessor::PreprocessorKeyword Preprocessor::handle_preprocessor_line(const StringView& line)
  80. {
  81. GenericLexer lexer(line);
  82. consume_whitespace(lexer);
  83. lexer.consume_specific('#');
  84. consume_whitespace(lexer);
  85. auto keyword = lexer.consume_until(' ');
  86. if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
  87. return {};
  88. handle_preprocessor_keyword(keyword, lexer);
  89. return keyword;
  90. }
  91. void Preprocessor::handle_preprocessor_keyword(const StringView& keyword, GenericLexer& line_lexer)
  92. {
  93. if (keyword == "include") {
  94. consume_whitespace(line_lexer);
  95. auto include_path = line_lexer.consume_all();
  96. m_included_paths.append(include_path);
  97. if (definitions_in_header_callback) {
  98. for (auto& def : definitions_in_header_callback(include_path))
  99. m_definitions.set(def.key, def.value);
  100. }
  101. return;
  102. }
  103. if (keyword == "else") {
  104. VERIFY(m_current_depth > 0);
  105. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
  106. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  107. m_state = State::Normal;
  108. }
  109. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  110. m_state = State::SkipElseBranch;
  111. }
  112. return;
  113. }
  114. if (keyword == "endif") {
  115. VERIFY(m_current_depth > 0);
  116. --m_current_depth;
  117. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
  118. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  119. }
  120. if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
  121. m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  122. }
  123. m_state = State::Normal;
  124. return;
  125. }
  126. if (keyword == "define") {
  127. if (m_state == State::Normal) {
  128. auto key = line_lexer.consume_until(' ');
  129. consume_whitespace(line_lexer);
  130. DefinedValue value;
  131. value.filename = m_filename;
  132. value.line = m_line_index;
  133. auto string_value = line_lexer.consume_all();
  134. if (!string_value.is_empty())
  135. value.value = string_value;
  136. m_definitions.set(key, value);
  137. }
  138. return;
  139. }
  140. if (keyword == "undef") {
  141. if (m_state == State::Normal) {
  142. auto key = line_lexer.consume_until(' ');
  143. line_lexer.consume_all();
  144. m_definitions.remove(key);
  145. }
  146. return;
  147. }
  148. if (keyword == "ifdef") {
  149. ++m_current_depth;
  150. if (m_state == State::Normal) {
  151. auto key = line_lexer.consume_until(' ');
  152. if (m_definitions.contains(key)) {
  153. m_depths_of_taken_branches.append(m_current_depth - 1);
  154. return;
  155. } else {
  156. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  157. m_state = State::SkipIfBranch;
  158. return;
  159. }
  160. }
  161. return;
  162. }
  163. if (keyword == "ifndef") {
  164. ++m_current_depth;
  165. if (m_state == State::Normal) {
  166. auto key = line_lexer.consume_until(' ');
  167. if (!m_definitions.contains(key)) {
  168. m_depths_of_taken_branches.append(m_current_depth - 1);
  169. return;
  170. } else {
  171. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  172. m_state = State::SkipIfBranch;
  173. return;
  174. }
  175. }
  176. return;
  177. }
  178. if (keyword == "if") {
  179. ++m_current_depth;
  180. if (m_state == State::Normal) {
  181. // FIXME: Implement #if logic
  182. // We currently always take #if branches.
  183. m_depths_of_taken_branches.append(m_current_depth - 1);
  184. }
  185. return;
  186. }
  187. if (keyword == "elif") {
  188. VERIFY(m_current_depth > 0);
  189. // FIXME: Evaluate the elif expression
  190. // We currently always treat the expression in #elif as true.
  191. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
  192. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  193. m_state = State::Normal;
  194. }
  195. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  196. m_state = State::SkipElseBranch;
  197. }
  198. return;
  199. }
  200. if (keyword == "pragma") {
  201. line_lexer.consume_all();
  202. return;
  203. }
  204. if (!m_options.ignore_unsupported_keywords) {
  205. dbgln("Unsupported preprocessor keyword: {}", keyword);
  206. VERIFY_NOT_REACHED();
  207. }
  208. }
  209. void Preprocessor::process_line(StringView const& line)
  210. {
  211. Lexer line_lexer { line, m_line_index };
  212. auto tokens = line_lexer.lex();
  213. for (auto& token : tokens) {
  214. if (token.type() == Token::Type::Whitespace)
  215. continue;
  216. if (token.type() == Token::Type::Identifier) {
  217. if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
  218. do_substitution(token, defined_value->value);
  219. continue;
  220. }
  221. }
  222. m_tokens.append(token);
  223. }
  224. }
  225. void Preprocessor::do_substitution(Token const& replaced_token, DefinedValue const& defined_value)
  226. {
  227. m_substitutions.append({ replaced_token, defined_value });
  228. if (defined_value.value.is_null())
  229. return;
  230. Lexer lexer(m_substitutions.last().defined_value.value);
  231. for (auto& token : lexer.lex()) {
  232. if (token.type() == Token::Type::Whitespace)
  233. continue;
  234. token.set_start(replaced_token.start());
  235. token.set_end(replaced_token.end());
  236. m_tokens.append(token);
  237. }
  238. }
  239. };