Preprocessor.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Preprocessor.h"
  7. #include <AK/Assertions.h>
  8. #include <AK/GenericLexer.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibCpp/Lexer.h>
  11. #include <ctype.h>
  12. namespace Cpp {
  13. Preprocessor::Preprocessor(const String& filename, StringView program)
  14. : m_filename(filename)
  15. , m_program(program)
  16. {
  17. }
  18. Vector<Token> Preprocessor::process_and_lex()
  19. {
  20. Lexer lexer { m_program };
  21. lexer.set_ignore_whitespace(true);
  22. auto tokens = lexer.lex();
  23. for (size_t token_index = 0; token_index < tokens.size(); ++token_index) {
  24. auto& token = tokens[token_index];
  25. m_current_line = token.start().line;
  26. if (token.type() == Token::Type::PreprocessorStatement) {
  27. handle_preprocessor_statement(token.text());
  28. continue;
  29. }
  30. if (m_state != State::Normal)
  31. continue;
  32. if (token.type() == Token::Type::IncludeStatement) {
  33. if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath)
  34. continue;
  35. handle_include_statement(tokens[token_index + 1].text());
  36. if (m_options.keep_include_statements) {
  37. m_processed_tokens.append(tokens[token_index]);
  38. m_processed_tokens.append(tokens[token_index + 1]);
  39. }
  40. continue;
  41. }
  42. if (token.type() == Token::Type::Identifier) {
  43. if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
  44. auto last_substituted_token_index = do_substitution(tokens, token_index, defined_value->value);
  45. token_index = last_substituted_token_index;
  46. continue;
  47. }
  48. }
  49. m_processed_tokens.append(token);
  50. }
  51. return m_processed_tokens;
  52. }
  53. static void consume_whitespace(GenericLexer& lexer)
  54. {
  55. auto ignore_line = [&] {
  56. for (;;) {
  57. if (lexer.consume_specific("\\\n"sv)) {
  58. lexer.ignore(2);
  59. } else {
  60. lexer.ignore_until('\n');
  61. break;
  62. }
  63. }
  64. };
  65. for (;;) {
  66. if (lexer.consume_specific("//"sv))
  67. ignore_line();
  68. else if (lexer.consume_specific("/*"sv))
  69. lexer.ignore_until("*/");
  70. else if (lexer.next_is("\\\n"sv))
  71. lexer.ignore(2);
  72. else if (lexer.is_eof() || !lexer.next_is(isspace))
  73. break;
  74. else
  75. lexer.ignore();
  76. }
  77. }
  78. void Preprocessor::handle_preprocessor_statement(StringView line)
  79. {
  80. GenericLexer lexer(line);
  81. consume_whitespace(lexer);
  82. lexer.consume_specific('#');
  83. consume_whitespace(lexer);
  84. auto keyword = lexer.consume_until(' ');
  85. if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
  86. return;
  87. handle_preprocessor_keyword(keyword, lexer);
  88. }
  89. void Preprocessor::handle_include_statement(StringView include_path)
  90. {
  91. m_included_paths.append(include_path);
  92. if (definitions_in_header_callback) {
  93. for (auto& def : definitions_in_header_callback(include_path))
  94. m_definitions.set(def.key, def.value);
  95. }
  96. }
  97. void Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer)
  98. {
  99. if (keyword == "include") {
  100. // Should have called 'handle_include_statement'.
  101. VERIFY_NOT_REACHED();
  102. }
  103. if (keyword == "else") {
  104. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  105. return;
  106. VERIFY(m_current_depth > 0);
  107. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
  108. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  109. m_state = State::Normal;
  110. }
  111. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  112. m_state = State::SkipElseBranch;
  113. }
  114. return;
  115. }
  116. if (keyword == "endif") {
  117. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  118. return;
  119. VERIFY(m_current_depth > 0);
  120. --m_current_depth;
  121. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
  122. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  123. }
  124. if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
  125. m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  126. }
  127. m_state = State::Normal;
  128. return;
  129. }
  130. if (keyword == "define") {
  131. if (m_state == State::Normal) {
  132. auto definition = create_definition(line_lexer.consume_all());
  133. if (definition.has_value())
  134. m_definitions.set(definition->key, *definition);
  135. }
  136. return;
  137. }
  138. if (keyword == "undef") {
  139. if (m_state == State::Normal) {
  140. auto key = line_lexer.consume_until(' ');
  141. line_lexer.consume_all();
  142. m_definitions.remove(key);
  143. }
  144. return;
  145. }
  146. if (keyword == "ifdef") {
  147. ++m_current_depth;
  148. if (m_state == State::Normal) {
  149. auto key = line_lexer.consume_until(' ');
  150. if (m_definitions.contains(key)) {
  151. m_depths_of_taken_branches.append(m_current_depth - 1);
  152. return;
  153. } else {
  154. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  155. m_state = State::SkipIfBranch;
  156. return;
  157. }
  158. }
  159. return;
  160. }
  161. if (keyword == "ifndef") {
  162. ++m_current_depth;
  163. if (m_state == State::Normal) {
  164. auto key = line_lexer.consume_until(' ');
  165. if (!m_definitions.contains(key)) {
  166. m_depths_of_taken_branches.append(m_current_depth - 1);
  167. return;
  168. } else {
  169. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  170. m_state = State::SkipIfBranch;
  171. return;
  172. }
  173. }
  174. return;
  175. }
  176. if (keyword == "if") {
  177. ++m_current_depth;
  178. if (m_state == State::Normal) {
  179. // FIXME: Implement #if logic
  180. // We currently always take #if branches.
  181. m_depths_of_taken_branches.append(m_current_depth - 1);
  182. }
  183. return;
  184. }
  185. if (keyword == "elif") {
  186. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  187. return;
  188. VERIFY(m_current_depth > 0);
  189. // FIXME: Evaluate the elif expression
  190. // We currently always treat the expression in #elif as true.
  191. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
  192. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  193. m_state = State::Normal;
  194. }
  195. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  196. m_state = State::SkipElseBranch;
  197. }
  198. return;
  199. }
  200. if (keyword == "pragma") {
  201. line_lexer.consume_all();
  202. return;
  203. }
  204. if (!m_options.ignore_unsupported_keywords) {
  205. dbgln("Unsupported preprocessor keyword: {}", keyword);
  206. VERIFY_NOT_REACHED();
  207. }
  208. }
  209. size_t Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value)
  210. {
  211. if (defined_value.value.is_null())
  212. return token_index;
  213. Substitution sub;
  214. sub.defined_value = defined_value;
  215. auto macro_call = parse_macro_call(tokens, token_index);
  216. if (!macro_call.has_value())
  217. return token_index;
  218. Vector<Token> original_tokens;
  219. for (size_t i = token_index; i <= macro_call->end_token_index; ++i) {
  220. original_tokens.append(tokens[i]);
  221. }
  222. VERIFY(!original_tokens.is_empty());
  223. auto processed_value = evaluate_macro_call(*macro_call, defined_value);
  224. m_substitutions.append({ original_tokens, defined_value, processed_value });
  225. Lexer lexer(processed_value);
  226. lexer.lex_iterable([&](auto token) {
  227. if (token.type() == Token::Type::Whitespace)
  228. return;
  229. token.set_start(original_tokens.first().start());
  230. token.set_end(original_tokens.first().end());
  231. m_processed_tokens.append(token);
  232. });
  233. return macro_call->end_token_index;
  234. }
  235. Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index)
  236. {
  237. auto name = tokens[token_index];
  238. ++token_index;
  239. if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen)
  240. return MacroCall { name, {}, token_index - 1 };
  241. ++token_index;
  242. Vector<MacroCall::Argument> arguments;
  243. MacroCall::Argument current_argument;
  244. size_t paren_depth = 1;
  245. for (; token_index < tokens.size(); ++token_index) {
  246. auto& token = tokens[token_index];
  247. if (token.type() == Token::Type::LeftParen)
  248. ++paren_depth;
  249. if (token.type() == Token::Type::RightParen)
  250. --paren_depth;
  251. if (paren_depth == 0) {
  252. arguments.append(move(current_argument));
  253. break;
  254. }
  255. if (paren_depth == 1 && token.type() == Token::Type::Comma) {
  256. arguments.append(move(current_argument));
  257. current_argument = {};
  258. } else {
  259. current_argument.tokens.append(token);
  260. }
  261. }
  262. if (token_index >= tokens.size())
  263. return {};
  264. return MacroCall { name, move(arguments), token_index };
  265. }
  266. Optional<Preprocessor::Definition> Preprocessor::create_definition(StringView line)
  267. {
  268. Lexer lexer { line };
  269. lexer.set_ignore_whitespace(true);
  270. auto tokens = lexer.lex();
  271. if (tokens.is_empty())
  272. return {};
  273. if (tokens.first().type() != Token::Type::Identifier)
  274. return {};
  275. Definition definition;
  276. definition.filename = m_filename;
  277. definition.line = m_current_line;
  278. definition.key = tokens.first().text();
  279. if (tokens.size() == 1)
  280. return definition;
  281. size_t token_index = 1;
  282. // Parse macro parameters (if any)
  283. if (tokens[token_index].type() == Token::Type::LeftParen) {
  284. ++token_index;
  285. while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) {
  286. auto param = tokens[token_index];
  287. if (param.type() != Token::Type::Identifier)
  288. return {};
  289. if (token_index + 1 >= tokens.size())
  290. return {};
  291. ++token_index;
  292. if (tokens[token_index].type() == Token::Type::Comma)
  293. ++token_index;
  294. else if (tokens[token_index].type() != Token::Type::RightParen)
  295. return {};
  296. definition.parameters.empend(param.text());
  297. }
  298. if (token_index >= tokens.size())
  299. return {};
  300. ++token_index;
  301. }
  302. if (token_index < tokens.size())
  303. definition.value = remove_escaped_newlines(line.substring_view(tokens[token_index].start().column));
  304. return definition;
  305. }
  306. String Preprocessor::remove_escaped_newlines(StringView value)
  307. {
  308. AK::StringBuilder processed_value;
  309. GenericLexer lexer { value };
  310. while (!lexer.is_eof()) {
  311. processed_value.append(lexer.consume_until("\\\n"sv));
  312. }
  313. return processed_value.to_string();
  314. }
  315. String Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition)
  316. {
  317. if (macro_call.arguments.size() != definition.parameters.size()) {
  318. dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text());
  319. return {};
  320. }
  321. Lexer lexer { definition.value };
  322. StringBuilder processed_value;
  323. lexer.lex_iterable([&](auto token) {
  324. if (token.type() != Token::Type::Identifier) {
  325. processed_value.append(token.text());
  326. return;
  327. }
  328. auto param_index = definition.parameters.find_first_index(token.text());
  329. if (!param_index.has_value()) {
  330. processed_value.append(token.text());
  331. return;
  332. }
  333. auto& argument = macro_call.arguments[*param_index];
  334. for (auto& arg_token : argument.tokens) {
  335. processed_value.append(arg_token.text());
  336. }
  337. });
  338. return processed_value.to_string();
  339. }
  340. };