Parser.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #ifdef CPP_DEBUG
  27. # define DEBUG_SPAM
  28. #endif
  29. #include "Parser.h"
  30. #include "AST.h"
  31. #include <AK/Debug.h>
  32. #include <AK/ScopeGuard.h>
  33. #include <AK/ScopeLogger.h>
  34. #include <LibCpp/Lexer.h>
  35. namespace Cpp {
  36. Parser::Parser(const StringView& program, const String& filename, Preprocessor::Definitions&& definitions)
  37. : m_definitions(move(definitions))
  38. , m_filename(filename)
  39. {
  40. initialize_program_tokens(program);
  41. #if CPP_DEBUG
  42. dbgln("Tokens:");
  43. for (auto& token : m_tokens) {
  44. StringView text;
  45. if (token.start().line != token.end().line || token.start().column > token.end().column)
  46. text = {};
  47. else
  48. text = text_of_token(token);
  49. dbgln("{} {}:{}-{}:{} ({})", token.to_string(), token.start().line, token.start().column, token.end().line, token.end().column, text);
  50. }
  51. #endif
  52. }
  53. void Parser::initialize_program_tokens(const StringView& program)
  54. {
  55. Lexer lexer(program);
  56. for (auto& token : lexer.lex()) {
  57. if (token.type() == Token::Type::Whitespace)
  58. continue;
  59. if (token.type() == Token::Type::Identifier) {
  60. if (auto defined_value = m_definitions.find(text_of_token(token)); defined_value != m_definitions.end()) {
  61. add_tokens_for_preprocessor(token, defined_value->value);
  62. continue;
  63. }
  64. }
  65. m_tokens.append(move(token));
  66. }
  67. }
  68. NonnullRefPtr<TranslationUnit> Parser::parse()
  69. {
  70. SCOPE_LOGGER();
  71. auto unit = create_root_ast_node(m_tokens.first().start(), m_tokens.last().end());
  72. while (!done()) {
  73. if (match_comment()) {
  74. consume(Token::Type::Comment);
  75. continue;
  76. }
  77. if (match_preprocessor()) {
  78. consume_preprocessor();
  79. continue;
  80. }
  81. auto declaration = match_declaration();
  82. if (declaration.has_value()) {
  83. unit->append(parse_declaration(*unit, declaration.value()));
  84. continue;
  85. }
  86. error("unexpected token");
  87. consume();
  88. }
  89. return unit;
  90. }
  91. Optional<Parser::DeclarationType> Parser::match_declaration()
  92. {
  93. switch (m_state.context) {
  94. case Context::InTranslationUnit:
  95. return match_declaration_in_translation_unit();
  96. case Context::InFunctionDefinition:
  97. return match_declaration_in_function_definition();
  98. default:
  99. error("unexpected context");
  100. return {};
  101. }
  102. }
  103. NonnullRefPtr<Declaration> Parser::parse_declaration(ASTNode& parent, DeclarationType declaration_type)
  104. {
  105. switch (declaration_type) {
  106. case DeclarationType::Function:
  107. return parse_function_declaration(parent);
  108. case DeclarationType::Variable:
  109. return parse_variable_declaration(parent);
  110. case DeclarationType::Enum:
  111. return parse_enum_declaration(parent);
  112. case DeclarationType::Struct:
  113. return parse_struct_or_class_declaration(parent, StructOrClassDeclaration::Type::Struct);
  114. default:
  115. error("unexpected declaration type");
  116. return create_ast_node<InvalidDeclaration>(parent, position(), position());
  117. }
  118. }
  119. NonnullRefPtr<FunctionDeclaration> Parser::parse_function_declaration(ASTNode& parent)
  120. {
  121. auto func = create_ast_node<FunctionDeclaration>(parent, position(), {});
  122. auto return_type_token = consume(Token::Type::KnownType);
  123. auto function_name = consume(Token::Type::Identifier);
  124. consume(Token::Type::LeftParen);
  125. auto parameters = parse_parameter_list(*func);
  126. consume(Token::Type::RightParen);
  127. RefPtr<FunctionDefinition> body;
  128. Position func_end {};
  129. if (peek(Token::Type::LeftCurly).has_value()) {
  130. body = parse_function_definition(*func);
  131. func_end = body->end();
  132. } else {
  133. func_end = position();
  134. if (match_attribute_specification())
  135. consume_attribute_specification(); // we don't use the value of __attribute__
  136. consume(Token::Type::Semicolon);
  137. }
  138. func->m_name = text_of_token(function_name);
  139. func->m_return_type = create_ast_node<Type>(*func, return_type_token.start(), return_type_token.end(), text_of_token(return_type_token));
  140. if (parameters.has_value())
  141. func->m_parameters = move(parameters.value());
  142. func->m_definition = move(body);
  143. func->set_end(func_end);
  144. return func;
  145. }
  146. NonnullRefPtr<FunctionDefinition> Parser::parse_function_definition(ASTNode& parent)
  147. {
  148. SCOPE_LOGGER();
  149. auto func = create_ast_node<FunctionDefinition>(parent, position(), {});
  150. consume(Token::Type::LeftCurly);
  151. while (!eof() && peek().type() != Token::Type::RightCurly) {
  152. func->statements().append(parse_statement(func));
  153. }
  154. func->set_end(position());
  155. if (!eof())
  156. consume(Token::Type::RightCurly);
  157. return func;
  158. }
  159. NonnullRefPtr<Statement> Parser::parse_statement(ASTNode& parent)
  160. {
  161. SCOPE_LOGGER();
  162. ArmedScopeGuard consume_semicolumn([this]() {
  163. consume(Token::Type::Semicolon);
  164. });
  165. if (match_block_statement()) {
  166. consume_semicolumn.disarm();
  167. return parse_block_statement(parent);
  168. }
  169. if (match_comment()) {
  170. consume_semicolumn.disarm();
  171. return parse_comment(parent);
  172. }
  173. if (match_variable_declaration()) {
  174. return parse_variable_declaration(parent);
  175. }
  176. if (match_expression()) {
  177. return parse_expression(parent);
  178. }
  179. if (match_keyword("return")) {
  180. return parse_return_statement(parent);
  181. }
  182. if (match_keyword("for")) {
  183. consume_semicolumn.disarm();
  184. return parse_for_statement(parent);
  185. }
  186. if (match_keyword("if")) {
  187. consume_semicolumn.disarm();
  188. return parse_if_statement(parent);
  189. } else {
  190. error("unexpected statement type");
  191. consume_semicolumn.disarm();
  192. consume();
  193. return create_ast_node<InvalidStatement>(parent, position(), position());
  194. }
  195. }
  196. NonnullRefPtr<Comment> Parser::parse_comment(ASTNode& parent)
  197. {
  198. auto comment = create_ast_node<Comment>(parent, position(), {});
  199. consume(Token::Type::Comment);
  200. comment->set_end(position());
  201. return comment;
  202. }
  203. bool Parser::match_block_statement()
  204. {
  205. return peek().type() == Token::Type::LeftCurly;
  206. }
  207. NonnullRefPtr<BlockStatement> Parser::parse_block_statement(ASTNode& parent)
  208. {
  209. SCOPE_LOGGER();
  210. auto block_statement = create_ast_node<BlockStatement>(parent, position(), {});
  211. consume(Token::Type::LeftCurly);
  212. while (peek().type() != Token::Type::RightCurly) {
  213. block_statement->m_statements.append(parse_statement(*block_statement));
  214. }
  215. consume(Token::Type::RightCurly);
  216. block_statement->set_end(position());
  217. return block_statement;
  218. }
  219. bool Parser::match_variable_declaration()
  220. {
  221. SCOPE_LOGGER();
  222. save_state();
  223. ScopeGuard state_guard = [this] { load_state(); };
  224. parse_type_qualifiers();
  225. // Type
  226. if (!peek(Token::Type::KnownType).has_value() && !peek(Token::Type::Identifier).has_value())
  227. return false;
  228. consume();
  229. // Identifier
  230. if (!peek(Token::Type::Identifier).has_value())
  231. return false;
  232. consume();
  233. if (match(Token::Type::Equals)) {
  234. consume(Token::Type::Equals);
  235. if (!match_expression()) {
  236. error("initial value of variable is not an expression");
  237. return false;
  238. }
  239. return true;
  240. }
  241. return match(Token::Type::Semicolon);
  242. }
  243. NonnullRefPtr<VariableDeclaration> Parser::parse_variable_declaration(ASTNode& parent)
  244. {
  245. SCOPE_LOGGER();
  246. auto var = create_ast_node<VariableDeclaration>(parent, position(), {});
  247. if (!match_variable_declaration()) {
  248. error("unexpected token for variable type");
  249. var->set_end(position());
  250. return var;
  251. }
  252. var->m_type = parse_type(var);
  253. auto identifier_token = consume(Token::Type::Identifier);
  254. RefPtr<Expression> initial_value;
  255. if (match(Token::Type::Equals)) {
  256. consume(Token::Type::Equals);
  257. initial_value = parse_expression(var);
  258. }
  259. var->set_end(position());
  260. var->m_name = text_of_token(identifier_token);
  261. var->m_initial_value = move(initial_value);
  262. return var;
  263. }
  264. NonnullRefPtr<Expression> Parser::parse_expression(ASTNode& parent)
  265. {
  266. SCOPE_LOGGER();
  267. auto expression = parse_primary_expression(parent);
  268. // TODO: remove eof() logic, should still work without it
  269. if (eof() || match(Token::Type::Semicolon)) {
  270. return expression;
  271. }
  272. NonnullRefPtrVector<Expression> secondary_expressions;
  273. while (match_secondary_expression()) {
  274. // FIXME: Handle operator precedence
  275. expression = parse_secondary_expression(parent, expression);
  276. secondary_expressions.append(expression);
  277. }
  278. for (size_t i = 0; secondary_expressions.size() != 0 && i < secondary_expressions.size() - 1; ++i) {
  279. secondary_expressions[i].set_parent(secondary_expressions[i + 1]);
  280. }
  281. return expression;
  282. }
  283. bool Parser::match_secondary_expression()
  284. {
  285. auto type = peek().type();
  286. return type == Token::Type::Plus
  287. || type == Token::Type::PlusEquals
  288. || type == Token::Type::Minus
  289. || type == Token::Type::MinusEquals
  290. || type == Token::Type::Asterisk
  291. || type == Token::Type::AsteriskEquals
  292. || type == Token::Type::Percent
  293. || type == Token::Type::PercentEquals
  294. || type == Token::Type::Equals
  295. || type == Token::Type::Greater
  296. || type == Token::Type::Greater
  297. || type == Token::Type::Less
  298. || type == Token::Type::LessEquals
  299. || type == Token::Type::Dot
  300. || type == Token::Type::PlusPlus
  301. || type == Token::Type::MinusMinus
  302. || type == Token::Type::And
  303. || type == Token::Type::AndEquals
  304. || type == Token::Type::Pipe
  305. || type == Token::Type::PipeEquals
  306. || type == Token::Type::Caret
  307. || type == Token::Type::CaretEquals
  308. || type == Token::Type::LessLess
  309. || type == Token::Type::LessLessEquals
  310. || type == Token::Type::GreaterGreater
  311. || type == Token::Type::GreaterGreaterEquals
  312. || type == Token::Type::AndAnd
  313. || type == Token::Type::PipePipe;
  314. }
  315. NonnullRefPtr<Expression> Parser::parse_primary_expression(ASTNode& parent)
  316. {
  317. SCOPE_LOGGER();
  318. // TODO: remove eof() logic, should still work without it
  319. if (eof()) {
  320. auto node = create_ast_node<Identifier>(parent, position(), position());
  321. return node;
  322. }
  323. if (match_unary_expression())
  324. return parse_unary_expression(parent);
  325. if (match_literal()) {
  326. return parse_literal(parent);
  327. }
  328. switch (peek().type()) {
  329. case Token::Type::Identifier: {
  330. if (match_function_call())
  331. return parse_function_call(parent);
  332. auto token = consume();
  333. return create_ast_node<Identifier>(parent, token.start(), token.end(), text_of_token(token));
  334. }
  335. default: {
  336. error("could not parse primary expression");
  337. auto token = consume();
  338. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  339. }
  340. }
  341. }
  342. bool Parser::match_literal()
  343. {
  344. switch (peek().type()) {
  345. case Token::Type::Integer:
  346. return true;
  347. case Token::Type::DoubleQuotedString:
  348. return true;
  349. case Token::Type::Keyword: {
  350. return match_boolean_literal();
  351. }
  352. default:
  353. return false;
  354. }
  355. }
  356. bool Parser::match_unary_expression()
  357. {
  358. auto type = peek().type();
  359. return type == Token::Type::PlusPlus
  360. || type == Token::Type::MinusMinus
  361. || type == Token::Type::ExclamationMark
  362. || type == Token::Type::Tilde
  363. || type == Token::Type::Plus
  364. || type == Token::Type::Minus;
  365. }
  366. NonnullRefPtr<UnaryExpression> Parser::parse_unary_expression(ASTNode& parent)
  367. {
  368. auto unary_exp = create_ast_node<UnaryExpression>(parent, position(), {});
  369. auto op_token = consume();
  370. UnaryOp op { UnaryOp::Invalid };
  371. switch (op_token.type()) {
  372. case Token::Type::Minus:
  373. op = UnaryOp::Minus;
  374. break;
  375. case Token::Type::Plus:
  376. op = UnaryOp::Plus;
  377. break;
  378. case Token::Type::ExclamationMark:
  379. op = UnaryOp::Not;
  380. break;
  381. case Token::Type::Tilde:
  382. op = UnaryOp::BitwiseNot;
  383. break;
  384. case Token::Type::PlusPlus:
  385. op = UnaryOp::PlusPlus;
  386. break;
  387. default:
  388. break;
  389. }
  390. unary_exp->m_op = op;
  391. auto lhs = parse_expression(*unary_exp);
  392. unary_exp->m_lhs = lhs;
  393. unary_exp->set_end(lhs->end());
  394. return unary_exp;
  395. }
  396. NonnullRefPtr<Expression> Parser::parse_literal(ASTNode& parent)
  397. {
  398. switch (peek().type()) {
  399. case Token::Type::Integer: {
  400. auto token = consume();
  401. return create_ast_node<NumericLiteral>(parent, token.start(), token.end(), text_of_token(token));
  402. }
  403. case Token::Type::DoubleQuotedString: {
  404. return parse_string_literal(parent);
  405. }
  406. case Token::Type::Keyword: {
  407. if (match_boolean_literal())
  408. return parse_boolean_literal(parent);
  409. [[fallthrough]];
  410. }
  411. default: {
  412. error("could not parse literal");
  413. auto token = consume();
  414. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  415. }
  416. }
  417. }
  418. NonnullRefPtr<Expression> Parser::parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs)
  419. {
  420. SCOPE_LOGGER();
  421. switch (peek().type()) {
  422. case Token::Type::Plus:
  423. return parse_binary_expression(parent, lhs, BinaryOp::Addition);
  424. case Token::Type::Less:
  425. return parse_binary_expression(parent, lhs, BinaryOp::LessThan);
  426. case Token::Type::Equals:
  427. return parse_assignment_expression(parent, lhs, AssignmentOp::Assignment);
  428. case Token::Type::Dot: {
  429. consume();
  430. auto exp = create_ast_node<MemberExpression>(parent, lhs->start(), {});
  431. lhs->set_parent(*exp);
  432. exp->m_object = move(lhs);
  433. auto property_token = consume(Token::Type::Identifier);
  434. exp->m_property = create_ast_node<Identifier>(*exp, property_token.start(), property_token.end(), text_of_token(property_token));
  435. exp->set_end(property_token.end());
  436. return exp;
  437. }
  438. default: {
  439. error(String::formatted("unexpected operator for expression. operator: {}", peek().to_string()));
  440. auto token = consume();
  441. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  442. }
  443. }
  444. }
  445. NonnullRefPtr<BinaryExpression> Parser::parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp op)
  446. {
  447. consume(); // Operator
  448. auto exp = create_ast_node<BinaryExpression>(parent, lhs->start(), {});
  449. lhs->set_parent(*exp);
  450. exp->m_op = op;
  451. exp->m_lhs = move(lhs);
  452. auto rhs = parse_expression(exp);
  453. exp->set_end(rhs->end());
  454. exp->m_rhs = move(rhs);
  455. return exp;
  456. }
  457. NonnullRefPtr<AssignmentExpression> Parser::parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp op)
  458. {
  459. consume(); // Operator
  460. auto exp = create_ast_node<AssignmentExpression>(parent, lhs->start(), {});
  461. lhs->set_parent(*exp);
  462. exp->m_op = op;
  463. exp->m_lhs = move(lhs);
  464. auto rhs = parse_expression(exp);
  465. exp->set_end(rhs->end());
  466. exp->m_rhs = move(rhs);
  467. return exp;
  468. }
  469. Optional<Parser::DeclarationType> Parser::match_declaration_in_translation_unit()
  470. {
  471. if (match_function_declaration())
  472. return DeclarationType::Function;
  473. if (match_enum_declaration())
  474. return DeclarationType::Enum;
  475. if (match_struct_declaration())
  476. return DeclarationType::Struct;
  477. return {};
  478. }
  479. bool Parser::match_enum_declaration()
  480. {
  481. return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "enum";
  482. }
  483. bool Parser::match_struct_declaration()
  484. {
  485. return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "struct";
  486. }
  487. bool Parser::match_function_declaration()
  488. {
  489. save_state();
  490. ScopeGuard state_guard = [this] { load_state(); };
  491. if (!peek(Token::Type::KnownType).has_value())
  492. return false;
  493. consume();
  494. if (!peek(Token::Type::Identifier).has_value())
  495. return false;
  496. consume();
  497. if (!peek(Token::Type::LeftParen).has_value())
  498. return false;
  499. consume();
  500. while (consume().type() != Token::Type::RightParen && !eof()) { };
  501. if (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value())
  502. return true;
  503. if (match_attribute_specification()) {
  504. consume_attribute_specification();
  505. return peek(Token::Type::Semicolon).has_value();
  506. }
  507. return false;
  508. }
  509. Optional<NonnullRefPtrVector<Parameter>> Parser::parse_parameter_list(ASTNode& parent)
  510. {
  511. SCOPE_LOGGER();
  512. NonnullRefPtrVector<Parameter> parameters;
  513. while (peek().type() != Token::Type::RightParen && !eof()) {
  514. if (match_ellipsis()) {
  515. auto last_dot = consume();
  516. while (peek().type() == Token::Type::Dot)
  517. last_dot = consume();
  518. auto param = create_ast_node<Parameter>(parent, position(), last_dot.end(), StringView {});
  519. param->m_is_ellipsis = true;
  520. parameters.append(move(param));
  521. } else {
  522. auto type = parse_type(parent);
  523. auto name_identifier = peek(Token::Type::Identifier);
  524. if (name_identifier.has_value())
  525. consume(Token::Type::Identifier);
  526. StringView name;
  527. if (name_identifier.has_value())
  528. name = text_of_token(name_identifier.value());
  529. auto param = create_ast_node<Parameter>(parent, type->start(), name_identifier.has_value() ? name_identifier.value().end() : type->end(), name);
  530. param->m_type = move(type);
  531. parameters.append(move(param));
  532. }
  533. if (peek(Token::Type::Comma).has_value())
  534. consume(Token::Type::Comma);
  535. }
  536. return parameters;
  537. }
  538. bool Parser::match_comment()
  539. {
  540. return match(Token::Type::Comment);
  541. }
  542. bool Parser::match_whitespace()
  543. {
  544. return match(Token::Type::Whitespace);
  545. }
  546. bool Parser::match_preprocessor()
  547. {
  548. return match(Token::Type::PreprocessorStatement) || match(Token::Type::IncludeStatement);
  549. }
  550. void Parser::consume_preprocessor()
  551. {
  552. SCOPE_LOGGER();
  553. switch (peek().type()) {
  554. case Token::Type::PreprocessorStatement:
  555. consume();
  556. break;
  557. case Token::Type::IncludeStatement:
  558. consume();
  559. consume(Token::Type::IncludePath);
  560. break;
  561. default:
  562. error("unexpected token while parsing preprocessor statement");
  563. consume();
  564. }
  565. }
  566. Optional<Token> Parser::consume_whitespace()
  567. {
  568. SCOPE_LOGGER();
  569. return consume(Token::Type::Whitespace);
  570. }
  571. Token Parser::consume(Token::Type type)
  572. {
  573. auto token = consume();
  574. if (token.type() != type)
  575. error(String::formatted("expected {} at {}:{}, found: {}", Token::type_to_string(type), token.start().line, token.start().column, Token::type_to_string(token.type())));
  576. return token;
  577. }
  578. bool Parser::match(Token::Type type)
  579. {
  580. return peek().type() == type;
  581. }
  582. Token Parser::consume()
  583. {
  584. if (eof()) {
  585. error("C++ Parser: out of tokens");
  586. return { Token::Type::EOF_TOKEN, position(), position(), {} };
  587. }
  588. return m_tokens[m_state.token_index++];
  589. }
  590. Token Parser::peek(size_t offset) const
  591. {
  592. if (m_state.token_index + offset >= m_tokens.size())
  593. return { Token::Type::EOF_TOKEN, position(), position(), {} };
  594. return m_tokens[m_state.token_index + offset];
  595. }
  596. Optional<Token> Parser::peek(Token::Type type) const
  597. {
  598. auto token = peek();
  599. if (token.type() == type)
  600. return token;
  601. return {};
  602. }
  603. void Parser::save_state()
  604. {
  605. m_saved_states.append(m_state);
  606. }
  607. void Parser::load_state()
  608. {
  609. m_state = m_saved_states.take_last();
  610. }
  611. Optional<Parser::DeclarationType> Parser::match_declaration_in_function_definition()
  612. {
  613. VERIFY_NOT_REACHED();
  614. }
  615. bool Parser::done()
  616. {
  617. return m_state.token_index == m_tokens.size();
  618. }
  619. StringView Parser::text_of_token(const Cpp::Token& token) const
  620. {
  621. return token.text();
  622. }
  623. String Parser::text_of_node(const ASTNode& node) const
  624. {
  625. return text_in_range(node.start(), node.end());
  626. }
  627. String Parser::text_in_range(Position start, Position end) const
  628. {
  629. auto start_token_index = index_of_token_at(start);
  630. auto end_node_index = index_of_token_at(end);
  631. VERIFY(start_token_index.has_value());
  632. VERIFY(end_node_index.has_value());
  633. StringBuilder text;
  634. for (size_t i = start_token_index.value(); i <= end_node_index.value(); ++i) {
  635. text.append(m_tokens[i].text());
  636. }
  637. return text.build();
  638. }
  639. void Parser::error(StringView message)
  640. {
  641. SCOPE_LOGGER();
  642. if (message.is_null() || message.is_empty())
  643. message = "<empty>";
  644. String formatted_message;
  645. if (m_state.token_index >= m_tokens.size()) {
  646. formatted_message = String::formatted("C++ Parsed error on EOF.{}", message);
  647. } else {
  648. formatted_message = String::formatted("C++ Parser error: {}. token: {} ({}:{})",
  649. message,
  650. m_state.token_index < m_tokens.size() ? text_of_token(m_tokens[m_state.token_index]) : "EOF",
  651. m_tokens[m_state.token_index].start().line,
  652. m_tokens[m_state.token_index].start().column);
  653. }
  654. m_errors.append(formatted_message);
  655. dbgln_if(CPP_DEBUG, "{}", formatted_message);
  656. }
  657. bool Parser::match_expression()
  658. {
  659. auto token_type = peek().type();
  660. return token_type == Token::Type::Integer
  661. || token_type == Token::Type::Float
  662. || token_type == Token::Type::Identifier
  663. || token_type == Token::Type::DoubleQuotedString
  664. || match_unary_expression();
  665. }
  666. bool Parser::eof() const
  667. {
  668. return m_state.token_index >= m_tokens.size();
  669. }
  670. Position Parser::position() const
  671. {
  672. if (eof())
  673. return m_tokens.last().end();
  674. return peek().start();
  675. }
  676. RefPtr<ASTNode> Parser::eof_node() const
  677. {
  678. VERIFY(m_tokens.size());
  679. return node_at(m_tokens.last().end());
  680. }
  681. RefPtr<ASTNode> Parser::node_at(Position pos) const
  682. {
  683. auto index = index_of_node_at(pos);
  684. if (!index.has_value())
  685. return nullptr;
  686. return m_nodes[index.value()];
  687. }
  688. Optional<size_t> Parser::index_of_node_at(Position pos) const
  689. {
  690. VERIFY(!m_tokens.is_empty());
  691. Optional<size_t> match_node_index;
  692. auto node_span = [](const ASTNode& node) {
  693. VERIFY(node.end().line >= node.start().line);
  694. VERIFY((node.end().line > node.start().line) || (node.end().column >= node.start().column));
  695. return Position { node.end().line - node.start().line, node.start().line != node.end().line ? 0 : node.end().column - node.start().column };
  696. };
  697. for (size_t node_index = 0; node_index < m_nodes.size(); ++node_index) {
  698. auto& node = m_nodes[node_index];
  699. if (node.start() > pos || node.end() < pos)
  700. continue;
  701. if (!match_node_index.has_value() || (node_span(node) < node_span(m_nodes[match_node_index.value()])))
  702. match_node_index = node_index;
  703. }
  704. return match_node_index;
  705. }
  706. Optional<Token> Parser::token_at(Position pos) const
  707. {
  708. auto index = index_of_token_at(pos);
  709. if (!index.has_value())
  710. return {};
  711. return m_tokens[index.value()];
  712. }
  713. Optional<size_t> Parser::index_of_token_at(Position pos) const
  714. {
  715. for (size_t token_index = 0; token_index < m_tokens.size(); ++token_index) {
  716. auto token = m_tokens[token_index];
  717. if (token.start() > pos || token.end() < pos)
  718. continue;
  719. return token_index;
  720. }
  721. return {};
  722. }
  723. void Parser::print_tokens() const
  724. {
  725. for (auto& token : m_tokens) {
  726. dbgln("{}", token.to_string());
  727. }
  728. }
  729. bool Parser::match_function_call()
  730. {
  731. save_state();
  732. ScopeGuard state_guard = [this] { load_state(); };
  733. if (!match(Token::Type::Identifier))
  734. return false;
  735. consume();
  736. return match(Token::Type::LeftParen);
  737. }
  738. NonnullRefPtr<FunctionCall> Parser::parse_function_call(ASTNode& parent)
  739. {
  740. SCOPE_LOGGER();
  741. auto call = create_ast_node<FunctionCall>(parent, position(), {});
  742. auto name_identifier = consume(Token::Type::Identifier);
  743. call->m_name = text_of_token(name_identifier);
  744. NonnullRefPtrVector<Expression> args;
  745. consume(Token::Type::LeftParen);
  746. while (peek().type() != Token::Type::RightParen && !eof()) {
  747. args.append(parse_expression(*call));
  748. if (peek().type() == Token::Type::Comma)
  749. consume(Token::Type::Comma);
  750. }
  751. consume(Token::Type::RightParen);
  752. call->m_arguments = move(args);
  753. call->set_end(position());
  754. return call;
  755. }
  756. NonnullRefPtr<StringLiteral> Parser::parse_string_literal(ASTNode& parent)
  757. {
  758. SCOPE_LOGGER();
  759. Optional<size_t> start_token_index;
  760. Optional<size_t> end_token_index;
  761. while (!eof()) {
  762. auto token = peek();
  763. if (token.type() != Token::Type::DoubleQuotedString && token.type() != Token::Type::EscapeSequence) {
  764. VERIFY(start_token_index.has_value());
  765. end_token_index = m_state.token_index - 1;
  766. break;
  767. }
  768. if (!start_token_index.has_value())
  769. start_token_index = m_state.token_index;
  770. consume();
  771. }
  772. // String was not terminated
  773. if (!end_token_index.has_value()) {
  774. end_token_index = m_tokens.size() - 1;
  775. }
  776. VERIFY(start_token_index.has_value());
  777. VERIFY(end_token_index.has_value());
  778. Token start_token = m_tokens[start_token_index.value()];
  779. Token end_token = m_tokens[end_token_index.value()];
  780. auto text = text_in_range(start_token.start(), end_token.end());
  781. auto string_literal = create_ast_node<StringLiteral>(parent, start_token.start(), end_token.end());
  782. string_literal->m_value = text;
  783. return string_literal;
  784. }
  785. NonnullRefPtr<ReturnStatement> Parser::parse_return_statement(ASTNode& parent)
  786. {
  787. SCOPE_LOGGER();
  788. auto return_statement = create_ast_node<ReturnStatement>(parent, position(), {});
  789. consume(Token::Type::Keyword);
  790. auto expression = parse_expression(*return_statement);
  791. return_statement->m_value = expression;
  792. return_statement->set_end(expression->end());
  793. return return_statement;
  794. }
  795. NonnullRefPtr<EnumDeclaration> Parser::parse_enum_declaration(ASTNode& parent)
  796. {
  797. SCOPE_LOGGER();
  798. auto enum_decl = create_ast_node<EnumDeclaration>(parent, position(), {});
  799. consume_keyword("enum");
  800. auto name_token = consume(Token::Type::Identifier);
  801. enum_decl->m_name = text_of_token(name_token);
  802. consume(Token::Type::LeftCurly);
  803. while (peek().type() != Token::Type::RightCurly && !eof()) {
  804. enum_decl->m_entries.append(text_of_token(consume(Token::Type::Identifier)));
  805. if (peek().type() != Token::Type::Comma) {
  806. break;
  807. }
  808. consume(Token::Type::Comma);
  809. }
  810. consume(Token::Type::RightCurly);
  811. consume(Token::Type::Semicolon);
  812. enum_decl->set_end(position());
  813. return enum_decl;
  814. }
  815. Token Parser::consume_keyword(const String& keyword)
  816. {
  817. auto token = consume();
  818. if (token.type() != Token::Type::Keyword) {
  819. error(String::formatted("unexpected token: {}, expected Keyword", token.to_string()));
  820. return token;
  821. }
  822. if (text_of_token(token) != keyword) {
  823. error(String::formatted("unexpected keyword: {}, expected {}", text_of_token(token), keyword));
  824. return token;
  825. }
  826. return token;
  827. }
  828. bool Parser::match_keyword(const String& keyword)
  829. {
  830. auto token = peek();
  831. if (token.type() != Token::Type::Keyword) {
  832. return false;
  833. }
  834. if (text_of_token(token) != keyword) {
  835. return false;
  836. }
  837. return true;
  838. }
  839. NonnullRefPtr<StructOrClassDeclaration> Parser::parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type type)
  840. {
  841. SCOPE_LOGGER();
  842. auto decl = create_ast_node<StructOrClassDeclaration>(parent, position(), {}, type);
  843. switch (type) {
  844. case StructOrClassDeclaration::Type::Struct:
  845. consume_keyword("struct");
  846. break;
  847. case StructOrClassDeclaration::Type::Class:
  848. consume_keyword("class");
  849. break;
  850. }
  851. auto name_token = consume(Token::Type::Identifier);
  852. decl->m_name = text_of_token(name_token);
  853. consume(Token::Type::LeftCurly);
  854. while (peek().type() != Token::Type::RightCurly && !eof()) {
  855. decl->m_members.append(parse_member_declaration(*decl));
  856. }
  857. consume(Token::Type::RightCurly);
  858. consume(Token::Type::Semicolon);
  859. decl->set_end(position());
  860. return decl;
  861. }
  862. NonnullRefPtr<MemberDeclaration> Parser::parse_member_declaration(ASTNode& parent)
  863. {
  864. SCOPE_LOGGER();
  865. auto member_decl = create_ast_node<MemberDeclaration>(parent, position(), {});
  866. auto type_token = consume();
  867. auto identifier_token = consume(Token::Type::Identifier);
  868. RefPtr<Expression> initial_value;
  869. if (match(Token::Type::LeftCurly)) {
  870. consume(Token::Type::LeftCurly);
  871. initial_value = parse_expression(*member_decl);
  872. consume(Token::Type::RightCurly);
  873. }
  874. member_decl->m_type = create_ast_node<Type>(*member_decl, type_token.start(), type_token.end(), text_of_token(type_token));
  875. member_decl->m_name = text_of_token(identifier_token);
  876. member_decl->m_initial_value = move(initial_value);
  877. consume(Token::Type::Semicolon);
  878. member_decl->set_end(position());
  879. return member_decl;
  880. }
  881. NonnullRefPtr<BooleanLiteral> Parser::parse_boolean_literal(ASTNode& parent)
  882. {
  883. SCOPE_LOGGER();
  884. auto token = consume(Token::Type::Keyword);
  885. auto text = text_of_token(token);
  886. // text == "true" || text == "false";
  887. bool value = (text == "true");
  888. return create_ast_node<BooleanLiteral>(parent, token.start(), token.end(), value);
  889. }
  890. bool Parser::match_boolean_literal()
  891. {
  892. auto token = peek();
  893. if (token.type() != Token::Type::Keyword)
  894. return false;
  895. auto text = text_of_token(token);
  896. return text == "true" || text == "false";
  897. }
  898. NonnullRefPtr<Type> Parser::parse_type(ASTNode& parent)
  899. {
  900. SCOPE_LOGGER();
  901. auto qualifiers = parse_type_qualifiers();
  902. auto token = consume();
  903. auto type = create_ast_node<Type>(parent, token.start(), token.end(), text_of_token(token));
  904. type->m_qualifiers = move(qualifiers);
  905. if (token.type() != Token::Type::KnownType && token.type() != Token::Type::Identifier) {
  906. error(String::formatted("unexpected token for type: {}", token.to_string()));
  907. return type;
  908. }
  909. while (peek().type() == Token::Type::Asterisk) {
  910. auto asterisk = consume();
  911. auto ptr = create_ast_node<Pointer>(type, asterisk.start(), asterisk.end());
  912. ptr->m_pointee = type;
  913. type = ptr;
  914. }
  915. return type;
  916. }
  917. NonnullRefPtr<ForStatement> Parser::parse_for_statement(ASTNode& parent)
  918. {
  919. SCOPE_LOGGER();
  920. auto for_statement = create_ast_node<ForStatement>(parent, position(), {});
  921. consume(Token::Type::Keyword);
  922. consume(Token::Type::LeftParen);
  923. for_statement->m_init = parse_variable_declaration(*for_statement);
  924. consume(Token::Type::Semicolon);
  925. for_statement->m_test = parse_expression(*for_statement);
  926. consume(Token::Type::Semicolon);
  927. for_statement->m_update = parse_expression(*for_statement);
  928. consume(Token::Type::RightParen);
  929. for_statement->m_body = parse_statement(*for_statement);
  930. for_statement->set_end(for_statement->m_body->end());
  931. return for_statement;
  932. }
  933. NonnullRefPtr<IfStatement> Parser::parse_if_statement(ASTNode& parent)
  934. {
  935. SCOPE_LOGGER();
  936. auto if_statement = create_ast_node<IfStatement>(parent, position(), {});
  937. consume(Token::Type::Keyword);
  938. consume(Token::Type::LeftParen);
  939. if_statement->m_predicate = parse_expression(*if_statement);
  940. consume(Token::Type::RightParen);
  941. if_statement->m_then = parse_statement(*if_statement);
  942. if (match_keyword("else")) {
  943. consume(Token::Type::Keyword);
  944. if_statement->m_else = parse_statement(*if_statement);
  945. if_statement->set_end(if_statement->m_else->end());
  946. } else {
  947. if_statement->set_end(if_statement->m_then->end());
  948. }
  949. return if_statement;
  950. }
  951. Vector<StringView> Parser::parse_type_qualifiers()
  952. {
  953. SCOPE_LOGGER();
  954. Vector<StringView> qualifiers;
  955. while (!done()) {
  956. auto token = peek();
  957. if (token.type() != Token::Type::Keyword)
  958. break;
  959. auto text = text_of_token(token);
  960. if (text == "static" || text == "const") {
  961. qualifiers.append(text);
  962. consume();
  963. } else {
  964. break;
  965. }
  966. }
  967. return qualifiers;
  968. }
  969. bool Parser::match_attribute_specification()
  970. {
  971. return text_of_token(peek()) == "__attribute__";
  972. }
  973. void Parser::consume_attribute_specification()
  974. {
  975. consume(); // __attribute__
  976. consume(Token::Type::LeftParen);
  977. size_t left_count = 1;
  978. while (!done()) {
  979. auto token = consume();
  980. if (token.type() == Token::Type::LeftParen) {
  981. ++left_count;
  982. }
  983. if (token.type() == Token::Type::RightParen) {
  984. --left_count;
  985. }
  986. if (left_count == 0)
  987. return;
  988. }
  989. }
  990. bool Parser::match_ellipsis()
  991. {
  992. if (m_state.token_index > m_tokens.size() - 3)
  993. return false;
  994. return peek().type() == Token::Type::Dot && peek().type() == Token::Type::Dot && peek().type() == Token::Type::Dot;
  995. }
  996. void Parser::add_tokens_for_preprocessor(Token& replaced_token, Preprocessor::DefinedValue& definition)
  997. {
  998. if (!definition.value.has_value())
  999. return;
  1000. Lexer lexer(definition.value.value());
  1001. for (auto token : lexer.lex()) {
  1002. if (token.type() == Token::Type::Whitespace)
  1003. continue;
  1004. token.set_start(replaced_token.start());
  1005. token.set_end(replaced_token.end());
  1006. m_tokens.append(move(token));
  1007. }
  1008. }
  1009. }