Preprocessor.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Preprocessor.h"
  7. #include <AK/Assertions.h>
  8. #include <AK/GenericLexer.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibCpp/Lexer.h>
  11. #include <ctype.h>
  12. namespace Cpp {
  13. Preprocessor::Preprocessor(DeprecatedString const& filename, StringView program)
  14. : m_filename(filename)
  15. , m_program(program)
  16. {
  17. }
  18. Vector<Token> Preprocessor::process_and_lex()
  19. {
  20. Lexer lexer { m_program };
  21. lexer.set_ignore_whitespace(true);
  22. auto tokens = lexer.lex();
  23. m_unprocessed_tokens = tokens;
  24. for (size_t token_index = 0; token_index < tokens.size(); ++token_index) {
  25. auto& token = tokens[token_index];
  26. m_current_line = token.start().line;
  27. if (token.type() == Token::Type::PreprocessorStatement) {
  28. handle_preprocessor_statement(token.text());
  29. m_processed_tokens.append(tokens[token_index]);
  30. continue;
  31. }
  32. if (m_state != State::Normal)
  33. continue;
  34. if (token.type() == Token::Type::IncludeStatement) {
  35. if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath)
  36. continue;
  37. handle_include_statement(tokens[token_index + 1].text());
  38. if (m_options.keep_include_statements) {
  39. m_processed_tokens.append(tokens[token_index]);
  40. m_processed_tokens.append(tokens[token_index + 1]);
  41. }
  42. ++token_index; // Also skip IncludePath token
  43. continue;
  44. }
  45. if (token.type() == Token::Type::Identifier) {
  46. if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
  47. auto last_substituted_token_index = do_substitution(tokens, token_index, defined_value->value);
  48. token_index = last_substituted_token_index;
  49. continue;
  50. }
  51. }
  52. m_processed_tokens.append(token);
  53. }
  54. return m_processed_tokens;
  55. }
  56. static void consume_whitespace(GenericLexer& lexer)
  57. {
  58. auto ignore_line = [&] {
  59. for (;;) {
  60. if (lexer.consume_specific("\\\n"sv)) {
  61. lexer.ignore(2);
  62. } else {
  63. lexer.ignore_until('\n');
  64. lexer.ignore();
  65. break;
  66. }
  67. }
  68. };
  69. for (;;) {
  70. if (lexer.consume_specific("//"sv)) {
  71. ignore_line();
  72. } else if (lexer.consume_specific("/*"sv)) {
  73. lexer.ignore_until("*/");
  74. lexer.ignore(2);
  75. } else if (lexer.next_is("\\\n"sv)) {
  76. lexer.ignore(2);
  77. } else if (lexer.is_eof() || !lexer.next_is(isspace)) {
  78. break;
  79. } else {
  80. lexer.ignore();
  81. }
  82. }
  83. }
  84. void Preprocessor::handle_preprocessor_statement(StringView line)
  85. {
  86. GenericLexer lexer(line);
  87. consume_whitespace(lexer);
  88. lexer.consume_specific('#');
  89. consume_whitespace(lexer);
  90. auto keyword = lexer.consume_until(' ');
  91. lexer.ignore();
  92. if (keyword.is_empty() || keyword.is_whitespace())
  93. return;
  94. handle_preprocessor_keyword(keyword, lexer);
  95. }
  96. void Preprocessor::handle_include_statement(StringView include_path)
  97. {
  98. m_included_paths.append(include_path);
  99. if (definitions_in_header_callback) {
  100. for (auto& def : definitions_in_header_callback(include_path))
  101. m_definitions.set(def.key, def.value);
  102. }
  103. }
  104. void Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer)
  105. {
  106. if (keyword == "include") {
  107. // Should have called 'handle_include_statement'.
  108. VERIFY_NOT_REACHED();
  109. }
  110. if (keyword == "else") {
  111. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  112. return;
  113. VERIFY(m_current_depth > 0);
  114. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
  115. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  116. m_state = State::Normal;
  117. }
  118. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  119. m_state = State::SkipElseBranch;
  120. }
  121. return;
  122. }
  123. if (keyword == "endif") {
  124. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  125. return;
  126. VERIFY(m_current_depth > 0);
  127. --m_current_depth;
  128. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
  129. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  130. }
  131. if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
  132. m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  133. }
  134. m_state = State::Normal;
  135. return;
  136. }
  137. if (keyword == "define") {
  138. if (m_state == State::Normal) {
  139. auto definition = create_definition(line_lexer.consume_all());
  140. if (definition.has_value())
  141. m_definitions.set(definition->key, *definition);
  142. }
  143. return;
  144. }
  145. if (keyword == "undef") {
  146. if (m_state == State::Normal) {
  147. auto key = line_lexer.consume_until(' ');
  148. line_lexer.consume_all();
  149. m_definitions.remove(key);
  150. }
  151. return;
  152. }
  153. if (keyword == "ifdef") {
  154. ++m_current_depth;
  155. if (m_state == State::Normal) {
  156. auto key = line_lexer.consume_until(' ');
  157. line_lexer.ignore();
  158. if (m_definitions.contains(key)) {
  159. m_depths_of_taken_branches.append(m_current_depth - 1);
  160. return;
  161. } else {
  162. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  163. m_state = State::SkipIfBranch;
  164. return;
  165. }
  166. }
  167. return;
  168. }
  169. if (keyword == "ifndef") {
  170. ++m_current_depth;
  171. if (m_state == State::Normal) {
  172. auto key = line_lexer.consume_until(' ');
  173. line_lexer.ignore();
  174. if (!m_definitions.contains(key)) {
  175. m_depths_of_taken_branches.append(m_current_depth - 1);
  176. return;
  177. } else {
  178. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  179. m_state = State::SkipIfBranch;
  180. return;
  181. }
  182. }
  183. return;
  184. }
  185. if (keyword == "if") {
  186. ++m_current_depth;
  187. if (m_state == State::Normal) {
  188. // FIXME: Implement #if logic
  189. // We currently always take #if branches.
  190. m_depths_of_taken_branches.append(m_current_depth - 1);
  191. }
  192. return;
  193. }
  194. if (keyword == "elif") {
  195. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  196. return;
  197. VERIFY(m_current_depth > 0);
  198. // FIXME: Evaluate the elif expression
  199. // We currently always treat the expression in #elif as true.
  200. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
  201. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  202. m_state = State::Normal;
  203. }
  204. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  205. m_state = State::SkipElseBranch;
  206. }
  207. return;
  208. }
  209. if (keyword == "pragma") {
  210. line_lexer.consume_all();
  211. return;
  212. }
  213. if (keyword == "error") {
  214. line_lexer.consume_all();
  215. return;
  216. }
  217. if (!m_options.ignore_unsupported_keywords) {
  218. dbgln("Unsupported preprocessor keyword: {}", keyword);
  219. VERIFY_NOT_REACHED();
  220. }
  221. }
  222. size_t Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value)
  223. {
  224. if (defined_value.value.is_empty())
  225. return token_index;
  226. Substitution sub;
  227. sub.defined_value = defined_value;
  228. auto macro_call = parse_macro_call(tokens, token_index);
  229. if (!macro_call.has_value())
  230. return token_index;
  231. Vector<Token> original_tokens;
  232. for (size_t i = token_index; i <= macro_call->end_token_index; ++i) {
  233. original_tokens.append(tokens[i]);
  234. }
  235. VERIFY(!original_tokens.is_empty());
  236. auto processed_value = evaluate_macro_call(*macro_call, defined_value);
  237. m_substitutions.append({ original_tokens, defined_value, processed_value });
  238. Lexer lexer(processed_value);
  239. lexer.lex_iterable([&](auto token) {
  240. if (token.type() == Token::Type::Whitespace)
  241. return;
  242. token.set_start(original_tokens.first().start());
  243. token.set_end(original_tokens.first().end());
  244. m_processed_tokens.append(token);
  245. });
  246. return macro_call->end_token_index;
  247. }
  248. Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index)
  249. {
  250. auto name = tokens[token_index];
  251. ++token_index;
  252. if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen)
  253. return MacroCall { name, {}, token_index - 1 };
  254. ++token_index;
  255. Vector<MacroCall::Argument> arguments;
  256. Optional<MacroCall::Argument> current_argument;
  257. size_t paren_depth = 1;
  258. for (; token_index < tokens.size(); ++token_index) {
  259. auto& token = tokens[token_index];
  260. if (token.type() == Token::Type::LeftParen)
  261. ++paren_depth;
  262. if (token.type() == Token::Type::RightParen)
  263. --paren_depth;
  264. if (paren_depth == 0) {
  265. if (current_argument.has_value())
  266. arguments.append(*current_argument);
  267. break;
  268. }
  269. if (paren_depth == 1 && token.type() == Token::Type::Comma) {
  270. if (current_argument.has_value())
  271. arguments.append(*current_argument);
  272. current_argument = {};
  273. } else {
  274. if (!current_argument.has_value())
  275. current_argument = MacroCall::Argument {};
  276. current_argument->tokens.append(token);
  277. }
  278. }
  279. if (token_index >= tokens.size())
  280. return {};
  281. return MacroCall { name, move(arguments), token_index };
  282. }
  283. Optional<Preprocessor::Definition> Preprocessor::create_definition(StringView line)
  284. {
  285. Lexer lexer { line };
  286. lexer.set_ignore_whitespace(true);
  287. auto tokens = lexer.lex();
  288. if (tokens.is_empty())
  289. return {};
  290. if (tokens.first().type() != Token::Type::Identifier)
  291. return {};
  292. Definition definition;
  293. definition.filename = m_filename;
  294. definition.line = m_current_line;
  295. definition.key = tokens.first().text();
  296. if (tokens.size() == 1)
  297. return definition;
  298. size_t token_index = 1;
  299. // Parse macro parameters (if any)
  300. if (tokens[token_index].type() == Token::Type::LeftParen) {
  301. ++token_index;
  302. while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) {
  303. auto param = tokens[token_index];
  304. if (param.type() != Token::Type::Identifier)
  305. return {};
  306. if (token_index + 1 >= tokens.size())
  307. return {};
  308. ++token_index;
  309. if (tokens[token_index].type() == Token::Type::Comma)
  310. ++token_index;
  311. else if (tokens[token_index].type() != Token::Type::RightParen)
  312. return {};
  313. definition.parameters.empend(param.text());
  314. }
  315. if (token_index >= tokens.size())
  316. return {};
  317. ++token_index;
  318. }
  319. if (token_index < tokens.size())
  320. definition.value = remove_escaped_newlines(line.substring_view(tokens[token_index].start().column));
  321. return definition;
  322. }
  323. DeprecatedString Preprocessor::remove_escaped_newlines(StringView value)
  324. {
  325. static constexpr auto escaped_newline = "\\\n"sv;
  326. AK::StringBuilder processed_value;
  327. GenericLexer lexer { value };
  328. while (!lexer.is_eof()) {
  329. processed_value.append(lexer.consume_until(escaped_newline));
  330. lexer.ignore(escaped_newline.length());
  331. }
  332. return processed_value.to_deprecated_string();
  333. }
  334. DeprecatedString Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition)
  335. {
  336. if (macro_call.arguments.size() != definition.parameters.size()) {
  337. dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text());
  338. return {};
  339. }
  340. Lexer lexer { definition.value };
  341. StringBuilder processed_value;
  342. lexer.lex_iterable([&](auto token) {
  343. if (token.type() != Token::Type::Identifier) {
  344. processed_value.append(token.text());
  345. return;
  346. }
  347. auto param_index = definition.parameters.find_first_index(token.text());
  348. if (!param_index.has_value()) {
  349. processed_value.append(token.text());
  350. return;
  351. }
  352. auto& argument = macro_call.arguments[*param_index];
  353. for (auto& arg_token : argument.tokens) {
  354. processed_value.append(arg_token.text());
  355. }
  356. });
  357. return processed_value.to_deprecated_string();
  358. }
  359. };