Preprocessor.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. * Copyright (c) 2023, Volodymyr V. <vvmposeydon@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include "Preprocessor.h"
  8. #include <AK/Assertions.h>
  9. #include <AK/GenericLexer.h>
  10. #include <AK/StringBuilder.h>
  11. #include <LibGLSL/Lexer.h>
  12. #include <ctype.h>
  13. namespace GLSL {
  14. Preprocessor::Preprocessor(String filename, String program)
  15. : m_filename(move(filename))
  16. , m_program(move(program))
  17. {
  18. }
  19. ErrorOr<Vector<Token>> Preprocessor::process_and_lex()
  20. {
  21. Lexer lexer { m_program };
  22. lexer.set_ignore_whitespace(true);
  23. auto tokens = lexer.lex();
  24. m_unprocessed_tokens = tokens;
  25. for (size_t token_index = 0; token_index < tokens.size(); ++token_index) {
  26. auto& token = tokens[token_index];
  27. m_current_line = token.start().line;
  28. if (token.type() == Token::Type::PreprocessorStatement) {
  29. TRY(handle_preprocessor_statement(token.text()));
  30. m_processed_tokens.append(tokens[token_index]);
  31. continue;
  32. }
  33. if (m_state != State::Normal)
  34. continue;
  35. if (token.type() == Token::Type::IncludeStatement) {
  36. if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath)
  37. continue;
  38. handle_include_statement(tokens[token_index + 1].text());
  39. if (m_options.keep_include_statements) {
  40. m_processed_tokens.append(tokens[token_index]);
  41. m_processed_tokens.append(tokens[token_index + 1]);
  42. }
  43. ++token_index; // Also skip IncludePath token
  44. continue;
  45. }
  46. if (token.type() == Token::Type::Identifier) {
  47. if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
  48. auto last_substituted_token_index = TRY(do_substitution(tokens, token_index, defined_value->value));
  49. token_index = last_substituted_token_index;
  50. continue;
  51. }
  52. }
  53. m_processed_tokens.append(token);
  54. }
  55. return m_processed_tokens;
  56. }
  57. static void consume_whitespace(GenericLexer& lexer)
  58. {
  59. auto ignore_line = [&] {
  60. for (;;) {
  61. if (lexer.consume_specific("\\\n"sv)) {
  62. lexer.ignore(2);
  63. } else {
  64. lexer.ignore_until('\n');
  65. lexer.ignore();
  66. break;
  67. }
  68. }
  69. };
  70. for (;;) {
  71. if (lexer.consume_specific("//"sv)) {
  72. ignore_line();
  73. } else if (lexer.consume_specific("/*"sv)) {
  74. lexer.ignore_until("*/");
  75. lexer.ignore(2);
  76. } else if (lexer.next_is("\\\n"sv)) {
  77. lexer.ignore(2);
  78. } else if (lexer.is_eof() || !lexer.next_is(isspace)) {
  79. break;
  80. } else {
  81. lexer.ignore();
  82. }
  83. }
  84. }
  85. ErrorOr<void> Preprocessor::handle_preprocessor_statement(StringView line)
  86. {
  87. GenericLexer lexer(line);
  88. consume_whitespace(lexer);
  89. lexer.consume_specific('#');
  90. consume_whitespace(lexer);
  91. auto keyword = lexer.consume_until(' ');
  92. lexer.ignore();
  93. if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
  94. return {};
  95. return TRY(handle_preprocessor_keyword(keyword, lexer));
  96. }
  97. void Preprocessor::handle_include_statement(StringView include_path)
  98. {
  99. m_included_paths.append(include_path);
  100. if (definitions_in_header_callback) {
  101. for (auto& def : definitions_in_header_callback(include_path))
  102. m_definitions.set(def.key, def.value);
  103. }
  104. }
  105. ErrorOr<void> Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer)
  106. {
  107. if (keyword == "include") {
  108. // Should have called 'handle_include_statement'.
  109. VERIFY_NOT_REACHED();
  110. }
  111. if (keyword == "else") {
  112. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  113. return {};
  114. VERIFY(m_current_depth > 0);
  115. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
  116. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  117. m_state = State::Normal;
  118. }
  119. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  120. m_state = State::SkipElseBranch;
  121. }
  122. return {};
  123. }
  124. if (keyword == "endif") {
  125. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  126. return {};
  127. VERIFY(m_current_depth > 0);
  128. --m_current_depth;
  129. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
  130. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  131. }
  132. if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
  133. m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
  134. }
  135. m_state = State::Normal;
  136. return {};
  137. }
  138. if (keyword == "define") {
  139. if (m_state == State::Normal) {
  140. auto definition = TRY(create_definition(line_lexer.consume_all()));
  141. if (definition.has_value())
  142. m_definitions.set(definition->key, *definition);
  143. }
  144. return {};
  145. }
  146. if (keyword == "undef") {
  147. if (m_state == State::Normal) {
  148. auto key = line_lexer.consume_until(' ');
  149. line_lexer.consume_all();
  150. m_definitions.remove(key);
  151. }
  152. return {};
  153. }
  154. if (keyword == "ifdef") {
  155. ++m_current_depth;
  156. if (m_state == State::Normal) {
  157. auto key = line_lexer.consume_until(' ');
  158. line_lexer.ignore();
  159. if (m_definitions.contains(key)) {
  160. m_depths_of_taken_branches.append(m_current_depth - 1);
  161. return {};
  162. } else {
  163. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  164. m_state = State::SkipIfBranch;
  165. return {};
  166. }
  167. }
  168. return {};
  169. }
  170. if (keyword == "ifndef") {
  171. ++m_current_depth;
  172. if (m_state == State::Normal) {
  173. auto key = line_lexer.consume_until(' ');
  174. line_lexer.ignore();
  175. if (!m_definitions.contains(key)) {
  176. m_depths_of_taken_branches.append(m_current_depth - 1);
  177. return {};
  178. } else {
  179. m_depths_of_not_taken_branches.append(m_current_depth - 1);
  180. m_state = State::SkipIfBranch;
  181. return {};
  182. }
  183. }
  184. return {};
  185. }
  186. if (keyword == "if") {
  187. ++m_current_depth;
  188. if (m_state == State::Normal) {
  189. // FIXME: Implement #if logic
  190. // We currently always take #if branches.
  191. m_depths_of_taken_branches.append(m_current_depth - 1);
  192. }
  193. return {};
  194. }
  195. if (keyword == "elif") {
  196. if (m_options.ignore_invalid_statements && m_current_depth == 0)
  197. return {};
  198. VERIFY(m_current_depth > 0);
  199. // FIXME: Evaluate the elif expression
  200. // We currently always treat the expression in #elif as true.
  201. if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
  202. m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
  203. m_state = State::Normal;
  204. }
  205. if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
  206. m_state = State::SkipElseBranch;
  207. }
  208. return {};
  209. }
  210. if (keyword == "pragma") {
  211. line_lexer.consume_all();
  212. return {};
  213. }
  214. if (keyword == "error") {
  215. line_lexer.consume_all();
  216. return {};
  217. }
  218. if (!m_options.ignore_unsupported_keywords) {
  219. dbgln("Unsupported preprocessor keyword: {}", keyword);
  220. VERIFY_NOT_REACHED();
  221. }
  222. return {};
  223. }
  224. ErrorOr<size_t> Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value)
  225. {
  226. if (defined_value.value.is_empty())
  227. return token_index;
  228. Substitution sub;
  229. sub.defined_value = defined_value;
  230. auto macro_call = parse_macro_call(tokens, token_index);
  231. if (!macro_call.has_value())
  232. return token_index;
  233. Vector<Token> original_tokens;
  234. for (size_t i = token_index; i <= macro_call->end_token_index; ++i) {
  235. original_tokens.append(tokens[i]);
  236. }
  237. VERIFY(!original_tokens.is_empty());
  238. auto processed_value = TRY(evaluate_macro_call(*macro_call, defined_value));
  239. m_substitutions.append({ original_tokens, defined_value, processed_value });
  240. Lexer lexer(processed_value);
  241. lexer.lex_iterable([&](auto token) {
  242. if (token.type() == Token::Type::Whitespace)
  243. return;
  244. token.set_start(original_tokens.first().start());
  245. token.set_end(original_tokens.first().end());
  246. m_processed_tokens.append(token);
  247. });
  248. return macro_call->end_token_index;
  249. }
  250. Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index)
  251. {
  252. auto name = tokens[token_index];
  253. ++token_index;
  254. if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen)
  255. return MacroCall { name, {}, token_index - 1 };
  256. ++token_index;
  257. Vector<MacroCall::Argument> arguments;
  258. Optional<MacroCall::Argument> current_argument;
  259. size_t paren_depth = 1;
  260. for (; token_index < tokens.size(); ++token_index) {
  261. auto& token = tokens[token_index];
  262. if (token.type() == Token::Type::LeftParen)
  263. ++paren_depth;
  264. if (token.type() == Token::Type::RightParen)
  265. --paren_depth;
  266. if (paren_depth == 0) {
  267. if (current_argument.has_value())
  268. arguments.append(*current_argument);
  269. break;
  270. }
  271. if (paren_depth == 1 && token.type() == Token::Type::Comma) {
  272. if (current_argument.has_value())
  273. arguments.append(*current_argument);
  274. current_argument = {};
  275. } else {
  276. if (!current_argument.has_value())
  277. current_argument = MacroCall::Argument {};
  278. current_argument->tokens.append(token);
  279. }
  280. }
  281. if (token_index >= tokens.size())
  282. return {};
  283. return MacroCall { name, move(arguments), token_index };
  284. }
  285. ErrorOr<Optional<Preprocessor::Definition>> Preprocessor::create_definition(StringView line)
  286. {
  287. Lexer lexer { line };
  288. lexer.set_ignore_whitespace(true);
  289. auto tokens = lexer.lex();
  290. if (tokens.is_empty())
  291. return Optional<Preprocessor::Definition> {};
  292. if (tokens.first().type() != Token::Type::Identifier)
  293. return Optional<Preprocessor::Definition> {};
  294. Definition definition;
  295. definition.filename = m_filename;
  296. definition.line = m_current_line;
  297. definition.key = tokens.first().text();
  298. if (tokens.size() == 1)
  299. return definition;
  300. size_t token_index = 1;
  301. // Parse macro parameters (if any)
  302. if (tokens[token_index].type() == Token::Type::LeftParen) {
  303. ++token_index;
  304. while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) {
  305. auto param = tokens[token_index];
  306. if (param.type() != Token::Type::Identifier)
  307. return Optional<Preprocessor::Definition> {};
  308. if (token_index + 1 >= tokens.size())
  309. return Optional<Preprocessor::Definition> {};
  310. ++token_index;
  311. if (tokens[token_index].type() == Token::Type::Comma)
  312. ++token_index;
  313. else if (tokens[token_index].type() != Token::Type::RightParen)
  314. return Optional<Preprocessor::Definition> {};
  315. definition.parameters.empend(param.text());
  316. }
  317. if (token_index >= tokens.size())
  318. return Optional<Preprocessor::Definition> {};
  319. ++token_index;
  320. }
  321. if (token_index < tokens.size())
  322. definition.value = TRY(remove_escaped_newlines(line.substring_view(tokens[token_index].start().column))).bytes_as_string_view();
  323. return definition;
  324. }
  325. ErrorOr<String> Preprocessor::remove_escaped_newlines(StringView value)
  326. {
  327. static constexpr auto escaped_newline = "\\\n"sv;
  328. AK::StringBuilder processed_value;
  329. GenericLexer lexer { value };
  330. while (!lexer.is_eof()) {
  331. processed_value.append(lexer.consume_until(escaped_newline));
  332. lexer.ignore(escaped_newline.length());
  333. }
  334. return processed_value.to_string();
  335. }
  336. ErrorOr<String> Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition)
  337. {
  338. if (macro_call.arguments.size() != definition.parameters.size()) {
  339. dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text());
  340. return String {};
  341. }
  342. Lexer lexer { definition.value };
  343. StringBuilder processed_value;
  344. lexer.lex_iterable([&](auto token) {
  345. if (token.type() != Token::Type::Identifier) {
  346. processed_value.append(token.text());
  347. return;
  348. }
  349. auto param_index = definition.parameters.find_first_index(token.text());
  350. if (!param_index.has_value()) {
  351. processed_value.append(token.text());
  352. return;
  353. }
  354. auto& argument = macro_call.arguments[*param_index];
  355. for (auto& arg_token : argument.tokens) {
  356. processed_value.append(arg_token.text());
  357. }
  358. });
  359. return processed_value.to_string();
  360. }
  361. };