123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- /*
- * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include "Preprocessor.h"
- #include <AK/Assertions.h>
- #include <AK/GenericLexer.h>
- #include <AK/StringBuilder.h>
- #include <LibCpp/Lexer.h>
- #include <ctype.h>
- namespace Cpp {
- Preprocessor::Preprocessor(const String& filename, const StringView& program)
- : m_filename(filename)
- , m_program(program)
- {
- GenericLexer program_lexer { m_program };
- for (;;) {
- if (program_lexer.is_eof())
- break;
- auto line = program_lexer.consume_until('\n');
- bool has_multiline = false;
- while (line.ends_with('\\') && !program_lexer.is_eof()) {
- auto continuation = program_lexer.consume_until('\n');
- line = StringView { line.characters_without_null_termination(), line.length() + continuation.length() + 1 };
- // Append an empty line to keep the line count correct.
- m_lines.append({});
- has_multiline = true;
- }
- if (has_multiline)
- m_lines.last() = line;
- else
- m_lines.append(line);
- }
- }
- Vector<Token> Preprocessor::process_and_lex()
- {
- for (; m_line_index < m_lines.size(); ++m_line_index) {
- auto& line = m_lines[m_line_index];
- bool include_in_processed_text = false;
- if (line.starts_with("#")) {
- auto keyword = handle_preprocessor_line(line);
- if (m_options.keep_include_statements && keyword == "include")
- include_in_processed_text = true;
- } else if (m_state == State::Normal) {
- include_in_processed_text = true;
- }
- if (include_in_processed_text) {
- process_line(line);
- }
- }
- return m_tokens;
- }
- static void consume_whitespace(GenericLexer& lexer)
- {
- auto ignore_line = [&] {
- for (;;) {
- if (lexer.consume_specific("\\\n"sv)) {
- lexer.ignore(2);
- } else {
- lexer.ignore_until('\n');
- break;
- }
- }
- };
- for (;;) {
- if (lexer.consume_specific("//"sv))
- ignore_line();
- else if (lexer.consume_specific("/*"sv))
- lexer.ignore_until("*/");
- else if (lexer.next_is("\\\n"sv))
- lexer.ignore(2);
- else if (lexer.is_eof() || !lexer.next_is(isspace))
- break;
- else
- lexer.ignore();
- }
- }
- Preprocessor::PreprocessorKeyword Preprocessor::handle_preprocessor_line(const StringView& line)
- {
- GenericLexer lexer(line);
- consume_whitespace(lexer);
- lexer.consume_specific('#');
- consume_whitespace(lexer);
- auto keyword = lexer.consume_until(' ');
- if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
- return {};
- handle_preprocessor_keyword(keyword, lexer);
- return keyword;
- }
- void Preprocessor::handle_preprocessor_keyword(const StringView& keyword, GenericLexer& line_lexer)
- {
- if (keyword == "include") {
- consume_whitespace(line_lexer);
- auto include_path = line_lexer.consume_all();
- m_included_paths.append(include_path);
- if (definitions_in_header_callback) {
- for (auto& def : definitions_in_header_callback(include_path))
- m_definitions.set(def.key, def.value);
- }
- return;
- }
- if (keyword == "else") {
- VERIFY(m_current_depth > 0);
- if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
- m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
- m_state = State::Normal;
- }
- if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
- m_state = State::SkipElseBranch;
- }
- return;
- }
- if (keyword == "endif") {
- VERIFY(m_current_depth > 0);
- --m_current_depth;
- if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
- m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
- }
- if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
- m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
- }
- m_state = State::Normal;
- return;
- }
- if (keyword == "define") {
- if (m_state == State::Normal) {
- auto key = line_lexer.consume_until(' ');
- consume_whitespace(line_lexer);
- DefinedValue value;
- value.filename = m_filename;
- value.line = m_line_index;
- auto string_value = line_lexer.consume_all();
- if (!string_value.is_empty())
- value.value = string_value;
- m_definitions.set(key, value);
- }
- return;
- }
- if (keyword == "undef") {
- if (m_state == State::Normal) {
- auto key = line_lexer.consume_until(' ');
- line_lexer.consume_all();
- m_definitions.remove(key);
- }
- return;
- }
- if (keyword == "ifdef") {
- ++m_current_depth;
- if (m_state == State::Normal) {
- auto key = line_lexer.consume_until(' ');
- if (m_definitions.contains(key)) {
- m_depths_of_taken_branches.append(m_current_depth - 1);
- return;
- } else {
- m_depths_of_not_taken_branches.append(m_current_depth - 1);
- m_state = State::SkipIfBranch;
- return;
- }
- }
- return;
- }
- if (keyword == "ifndef") {
- ++m_current_depth;
- if (m_state == State::Normal) {
- auto key = line_lexer.consume_until(' ');
- if (!m_definitions.contains(key)) {
- m_depths_of_taken_branches.append(m_current_depth - 1);
- return;
- } else {
- m_depths_of_not_taken_branches.append(m_current_depth - 1);
- m_state = State::SkipIfBranch;
- return;
- }
- }
- return;
- }
- if (keyword == "if") {
- ++m_current_depth;
- if (m_state == State::Normal) {
- // FIXME: Implement #if logic
- // We currently always take #if branches.
- m_depths_of_taken_branches.append(m_current_depth - 1);
- }
- return;
- }
- if (keyword == "elif") {
- VERIFY(m_current_depth > 0);
- // FIXME: Evaluate the elif expression
- // We currently always treat the expression in #elif as true.
- if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
- m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
- m_state = State::Normal;
- }
- if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
- m_state = State::SkipElseBranch;
- }
- return;
- }
- if (keyword == "pragma") {
- line_lexer.consume_all();
- return;
- }
- if (!m_options.ignore_unsupported_keywords) {
- dbgln("Unsupported preprocessor keyword: {}", keyword);
- VERIFY_NOT_REACHED();
- }
- }
- void Preprocessor::process_line(StringView const& line)
- {
- Lexer line_lexer { line, m_line_index };
- auto tokens = line_lexer.lex();
- for (auto& token : tokens) {
- if (token.type() == Token::Type::Whitespace)
- continue;
- if (token.type() == Token::Type::Identifier) {
- if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
- do_substitution(token, defined_value->value);
- continue;
- }
- }
- m_tokens.append(token);
- }
- }
- void Preprocessor::do_substitution(Token const& replaced_token, DefinedValue const& defined_value)
- {
- m_substitutions.append({ replaced_token, defined_value });
- if (defined_value.value.is_null())
- return;
- Lexer lexer(m_substitutions.last().defined_value.value);
- for (auto& token : lexer.lex()) {
- if (token.type() == Token::Type::Whitespace)
- continue;
- token.set_start(replaced_token.start());
- token.set_end(replaced_token.end());
- m_tokens.append(token);
- }
- }
- };
|