Parser.cpp 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213
  1. /*
  2. * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #ifdef CPP_DEBUG
  27. # define DEBUG_SPAM
  28. #endif
  29. #include "Parser.h"
  30. #include "AST.h"
  31. #include <AK/Debug.h>
  32. #include <AK/ScopeGuard.h>
  33. #include <AK/ScopeLogger.h>
  34. #include <LibCpp/Lexer.h>
  35. namespace Cpp {
  36. Parser::Parser(const StringView& program, const String& filename, Preprocessor::Definitions&& definitions)
  37. : m_definitions(move(definitions))
  38. , m_filename(filename)
  39. {
  40. initialize_program_tokens(program);
  41. #if CPP_DEBUG
  42. dbgln("Tokens:");
  43. for (auto& token : m_tokens) {
  44. StringView text;
  45. if (token.start().line != token.end().line || token.start().column > token.end().column)
  46. text = {};
  47. else
  48. text = text_of_token(token);
  49. dbgln("{} {}:{}-{}:{} ({})", token.to_string(), token.start().line, token.start().column, token.end().line, token.end().column, text);
  50. }
  51. #endif
  52. }
  53. void Parser::initialize_program_tokens(const StringView& program)
  54. {
  55. Lexer lexer(program);
  56. for (auto& token : lexer.lex()) {
  57. if (token.type() == Token::Type::Whitespace)
  58. continue;
  59. if (token.type() == Token::Type::Identifier) {
  60. if (auto defined_value = m_definitions.find(text_of_token(token)); defined_value != m_definitions.end()) {
  61. add_tokens_for_preprocessor(token, defined_value->value);
  62. m_replaced_preprocessor_tokens.append({ token, defined_value->value });
  63. continue;
  64. }
  65. }
  66. m_tokens.append(move(token));
  67. }
  68. }
  69. NonnullRefPtr<TranslationUnit> Parser::parse()
  70. {
  71. SCOPE_LOGGER();
  72. if (m_tokens.is_empty())
  73. return create_root_ast_node({}, {});
  74. auto unit = create_root_ast_node(m_tokens.first().start(), m_tokens.last().end());
  75. unit->m_declarations = parse_declarations_in_translation_unit(*unit);
  76. return unit;
  77. }
  78. NonnullRefPtrVector<Declaration> Parser::parse_declarations_in_translation_unit(ASTNode& parent)
  79. {
  80. NonnullRefPtrVector<Declaration> declarations;
  81. while (!eof()) {
  82. auto declaration = parse_single_declaration_in_translation_unit(parent);
  83. if (declaration) {
  84. declarations.append(declaration.release_nonnull());
  85. } else {
  86. error("unexpected token");
  87. consume();
  88. }
  89. }
  90. return declarations;
  91. }
  92. RefPtr<Declaration> Parser::parse_single_declaration_in_translation_unit(ASTNode& parent)
  93. {
  94. while (!eof()) {
  95. if (match_comment()) {
  96. consume(Token::Type::Comment);
  97. continue;
  98. }
  99. if (match_preprocessor()) {
  100. consume_preprocessor();
  101. continue;
  102. }
  103. auto declaration = match_declaration_in_translation_unit();
  104. if (declaration.has_value()) {
  105. return parse_declaration(parent, declaration.value());
  106. }
  107. return {};
  108. }
  109. return {};
  110. }
  111. NonnullRefPtr<Declaration> Parser::parse_declaration(ASTNode& parent, DeclarationType declaration_type)
  112. {
  113. switch (declaration_type) {
  114. case DeclarationType::Function:
  115. return parse_function_declaration(parent);
  116. case DeclarationType::Variable:
  117. return parse_variable_declaration(parent);
  118. case DeclarationType::Enum:
  119. return parse_enum_declaration(parent);
  120. case DeclarationType::Struct:
  121. return parse_struct_or_class_declaration(parent, StructOrClassDeclaration::Type::Struct);
  122. case DeclarationType::Namespace:
  123. return parse_namespace_declaration(parent);
  124. default:
  125. error("unexpected declaration type");
  126. return create_ast_node<InvalidDeclaration>(parent, position(), position());
  127. }
  128. }
  129. NonnullRefPtr<FunctionDeclaration> Parser::parse_function_declaration(ASTNode& parent)
  130. {
  131. auto func = create_ast_node<FunctionDeclaration>(parent, position(), {});
  132. func->m_qualifiers = parse_function_qualifiers();
  133. func->m_return_type = parse_type(*func);
  134. auto function_name = consume(Token::Type::Identifier);
  135. func->m_name = text_of_token(function_name);
  136. consume(Token::Type::LeftParen);
  137. auto parameters = parse_parameter_list(*func);
  138. if (parameters.has_value())
  139. func->m_parameters = move(parameters.value());
  140. consume(Token::Type::RightParen);
  141. RefPtr<FunctionDefinition> body;
  142. Position func_end {};
  143. if (peek(Token::Type::LeftCurly).has_value()) {
  144. body = parse_function_definition(*func);
  145. func_end = body->end();
  146. } else {
  147. func_end = position();
  148. if (match_attribute_specification())
  149. consume_attribute_specification(); // we don't use the value of __attribute__
  150. consume(Token::Type::Semicolon);
  151. }
  152. func->m_definition = move(body);
  153. func->set_end(func_end);
  154. return func;
  155. }
  156. NonnullRefPtr<FunctionDefinition> Parser::parse_function_definition(ASTNode& parent)
  157. {
  158. SCOPE_LOGGER();
  159. auto func = create_ast_node<FunctionDefinition>(parent, position(), {});
  160. consume(Token::Type::LeftCurly);
  161. while (!eof() && peek().type() != Token::Type::RightCurly) {
  162. func->statements().append(parse_statement(func));
  163. }
  164. func->set_end(position());
  165. if (!eof())
  166. consume(Token::Type::RightCurly);
  167. return func;
  168. }
  169. NonnullRefPtr<Statement> Parser::parse_statement(ASTNode& parent)
  170. {
  171. SCOPE_LOGGER();
  172. ArmedScopeGuard consume_semicolumn([this]() {
  173. consume(Token::Type::Semicolon);
  174. });
  175. if (match_block_statement()) {
  176. consume_semicolumn.disarm();
  177. return parse_block_statement(parent);
  178. }
  179. if (match_comment()) {
  180. consume_semicolumn.disarm();
  181. return parse_comment(parent);
  182. }
  183. if (match_variable_declaration()) {
  184. return parse_variable_declaration(parent, false);
  185. }
  186. if (match_expression()) {
  187. return parse_expression(parent);
  188. }
  189. if (match_keyword("return")) {
  190. return parse_return_statement(parent);
  191. }
  192. if (match_keyword("for")) {
  193. consume_semicolumn.disarm();
  194. return parse_for_statement(parent);
  195. }
  196. if (match_keyword("if")) {
  197. consume_semicolumn.disarm();
  198. return parse_if_statement(parent);
  199. } else {
  200. error("unexpected statement type");
  201. consume_semicolumn.disarm();
  202. consume();
  203. return create_ast_node<InvalidStatement>(parent, position(), position());
  204. }
  205. }
  206. NonnullRefPtr<Comment> Parser::parse_comment(ASTNode& parent)
  207. {
  208. auto comment = create_ast_node<Comment>(parent, position(), {});
  209. consume(Token::Type::Comment);
  210. comment->set_end(position());
  211. return comment;
  212. }
  213. bool Parser::match_block_statement()
  214. {
  215. return peek().type() == Token::Type::LeftCurly;
  216. }
  217. NonnullRefPtr<BlockStatement> Parser::parse_block_statement(ASTNode& parent)
  218. {
  219. SCOPE_LOGGER();
  220. auto block_statement = create_ast_node<BlockStatement>(parent, position(), {});
  221. consume(Token::Type::LeftCurly);
  222. while (!eof() && peek().type() != Token::Type::RightCurly) {
  223. block_statement->m_statements.append(parse_statement(*block_statement));
  224. }
  225. consume(Token::Type::RightCurly);
  226. block_statement->set_end(position());
  227. return block_statement;
  228. }
  229. bool Parser::match_type()
  230. {
  231. save_state();
  232. ScopeGuard state_guard = [this] { load_state(); };
  233. parse_type_qualifiers();
  234. // Type
  235. if (!peek(Token::Type::KnownType).has_value() && !peek(Token::Type::Identifier).has_value())
  236. return false;
  237. return true;
  238. }
  239. bool Parser::match_variable_declaration()
  240. {
  241. SCOPE_LOGGER();
  242. save_state();
  243. ScopeGuard state_guard = [this] { load_state(); };
  244. if (!match_type())
  245. return false;
  246. VERIFY(m_root_node);
  247. parse_type(*m_root_node);
  248. // Identifier
  249. if (!peek(Token::Type::Identifier).has_value())
  250. return false;
  251. consume();
  252. if (match(Token::Type::Equals)) {
  253. consume(Token::Type::Equals);
  254. if (!match_expression()) {
  255. error("initial value of variable is not an expression");
  256. return false;
  257. }
  258. return true;
  259. }
  260. return match(Token::Type::Semicolon);
  261. }
  262. NonnullRefPtr<VariableDeclaration> Parser::parse_variable_declaration(ASTNode& parent, bool expect_semicolon)
  263. {
  264. SCOPE_LOGGER();
  265. auto var = create_ast_node<VariableDeclaration>(parent, position(), {});
  266. if (!match_variable_declaration()) {
  267. error("unexpected token for variable type");
  268. var->set_end(position());
  269. return var;
  270. }
  271. var->m_type = parse_type(var);
  272. auto identifier_token = consume(Token::Type::Identifier);
  273. RefPtr<Expression> initial_value;
  274. if (match(Token::Type::Equals)) {
  275. consume(Token::Type::Equals);
  276. initial_value = parse_expression(var);
  277. }
  278. if(expect_semicolon)
  279. consume(Token::Type::Semicolon);
  280. var->set_end(position());
  281. var->m_name = text_of_token(identifier_token);
  282. var->m_initial_value = move(initial_value);
  283. return var;
  284. }
  285. NonnullRefPtr<Expression> Parser::parse_expression(ASTNode& parent)
  286. {
  287. SCOPE_LOGGER();
  288. auto expression = parse_primary_expression(parent);
  289. // TODO: remove eof() logic, should still work without it
  290. if (eof() || match(Token::Type::Semicolon)) {
  291. return expression;
  292. }
  293. NonnullRefPtrVector<Expression> secondary_expressions;
  294. while (match_secondary_expression()) {
  295. // FIXME: Handle operator precedence
  296. expression = parse_secondary_expression(parent, expression);
  297. secondary_expressions.append(expression);
  298. }
  299. for (size_t i = 0; secondary_expressions.size() != 0 && i < secondary_expressions.size() - 1; ++i) {
  300. secondary_expressions[i].set_parent(secondary_expressions[i + 1]);
  301. }
  302. return expression;
  303. }
  304. bool Parser::match_secondary_expression()
  305. {
  306. auto type = peek().type();
  307. return type == Token::Type::Plus
  308. || type == Token::Type::PlusEquals
  309. || type == Token::Type::Minus
  310. || type == Token::Type::MinusEquals
  311. || type == Token::Type::Asterisk
  312. || type == Token::Type::AsteriskEquals
  313. || type == Token::Type::Percent
  314. || type == Token::Type::PercentEquals
  315. || type == Token::Type::Equals
  316. || type == Token::Type::Greater
  317. || type == Token::Type::Greater
  318. || type == Token::Type::Less
  319. || type == Token::Type::LessEquals
  320. || type == Token::Type::Dot
  321. || type == Token::Type::PlusPlus
  322. || type == Token::Type::MinusMinus
  323. || type == Token::Type::And
  324. || type == Token::Type::AndEquals
  325. || type == Token::Type::Pipe
  326. || type == Token::Type::PipeEquals
  327. || type == Token::Type::Caret
  328. || type == Token::Type::CaretEquals
  329. || type == Token::Type::LessLess
  330. || type == Token::Type::LessLessEquals
  331. || type == Token::Type::GreaterGreater
  332. || type == Token::Type::GreaterGreaterEquals
  333. || type == Token::Type::AndAnd
  334. || type == Token::Type::PipePipe;
  335. }
  336. NonnullRefPtr<Expression> Parser::parse_primary_expression(ASTNode& parent)
  337. {
  338. SCOPE_LOGGER();
  339. // TODO: remove eof() logic, should still work without it
  340. if (eof()) {
  341. auto node = create_ast_node<Identifier>(parent, position(), position());
  342. return node;
  343. }
  344. if (match_unary_expression())
  345. return parse_unary_expression(parent);
  346. if (match_literal()) {
  347. return parse_literal(parent);
  348. }
  349. switch (peek().type()) {
  350. case Token::Type::Identifier: {
  351. if (match_function_call())
  352. return parse_function_call(parent);
  353. auto token = consume();
  354. return create_ast_node<Identifier>(parent, token.start(), token.end(), text_of_token(token));
  355. }
  356. default: {
  357. error("could not parse primary expression");
  358. auto token = consume();
  359. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  360. }
  361. }
  362. }
  363. bool Parser::match_literal()
  364. {
  365. switch (peek().type()) {
  366. case Token::Type::Integer:
  367. return true;
  368. case Token::Type::DoubleQuotedString:
  369. return true;
  370. case Token::Type::Keyword: {
  371. return match_boolean_literal();
  372. }
  373. default:
  374. return false;
  375. }
  376. }
  377. bool Parser::match_unary_expression()
  378. {
  379. auto type = peek().type();
  380. return type == Token::Type::PlusPlus
  381. || type == Token::Type::MinusMinus
  382. || type == Token::Type::ExclamationMark
  383. || type == Token::Type::Tilde
  384. || type == Token::Type::Plus
  385. || type == Token::Type::Minus;
  386. }
  387. NonnullRefPtr<UnaryExpression> Parser::parse_unary_expression(ASTNode& parent)
  388. {
  389. auto unary_exp = create_ast_node<UnaryExpression>(parent, position(), {});
  390. auto op_token = consume();
  391. UnaryOp op { UnaryOp::Invalid };
  392. switch (op_token.type()) {
  393. case Token::Type::Minus:
  394. op = UnaryOp::Minus;
  395. break;
  396. case Token::Type::Plus:
  397. op = UnaryOp::Plus;
  398. break;
  399. case Token::Type::ExclamationMark:
  400. op = UnaryOp::Not;
  401. break;
  402. case Token::Type::Tilde:
  403. op = UnaryOp::BitwiseNot;
  404. break;
  405. case Token::Type::PlusPlus:
  406. op = UnaryOp::PlusPlus;
  407. break;
  408. default:
  409. break;
  410. }
  411. unary_exp->m_op = op;
  412. auto lhs = parse_expression(*unary_exp);
  413. unary_exp->m_lhs = lhs;
  414. unary_exp->set_end(lhs->end());
  415. return unary_exp;
  416. }
  417. NonnullRefPtr<Expression> Parser::parse_literal(ASTNode& parent)
  418. {
  419. switch (peek().type()) {
  420. case Token::Type::Integer: {
  421. auto token = consume();
  422. return create_ast_node<NumericLiteral>(parent, token.start(), token.end(), text_of_token(token));
  423. }
  424. case Token::Type::DoubleQuotedString: {
  425. return parse_string_literal(parent);
  426. }
  427. case Token::Type::Keyword: {
  428. if (match_boolean_literal())
  429. return parse_boolean_literal(parent);
  430. [[fallthrough]];
  431. }
  432. default: {
  433. error("could not parse literal");
  434. auto token = consume();
  435. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  436. }
  437. }
  438. }
  439. NonnullRefPtr<Expression> Parser::parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs)
  440. {
  441. SCOPE_LOGGER();
  442. switch (peek().type()) {
  443. case Token::Type::Plus:
  444. return parse_binary_expression(parent, lhs, BinaryOp::Addition);
  445. case Token::Type::Less:
  446. return parse_binary_expression(parent, lhs, BinaryOp::LessThan);
  447. case Token::Type::Equals:
  448. return parse_assignment_expression(parent, lhs, AssignmentOp::Assignment);
  449. case Token::Type::Dot: {
  450. consume();
  451. auto exp = create_ast_node<MemberExpression>(parent, lhs->start(), {});
  452. lhs->set_parent(*exp);
  453. exp->m_object = move(lhs);
  454. auto property_token = consume(Token::Type::Identifier);
  455. exp->m_property = create_ast_node<Identifier>(*exp, property_token.start(), property_token.end(), text_of_token(property_token));
  456. exp->set_end(property_token.end());
  457. return exp;
  458. }
  459. default: {
  460. error(String::formatted("unexpected operator for expression. operator: {}", peek().to_string()));
  461. auto token = consume();
  462. return create_ast_node<InvalidExpression>(parent, token.start(), token.end());
  463. }
  464. }
  465. }
  466. NonnullRefPtr<BinaryExpression> Parser::parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp op)
  467. {
  468. consume(); // Operator
  469. auto exp = create_ast_node<BinaryExpression>(parent, lhs->start(), {});
  470. lhs->set_parent(*exp);
  471. exp->m_op = op;
  472. exp->m_lhs = move(lhs);
  473. auto rhs = parse_expression(exp);
  474. exp->set_end(rhs->end());
  475. exp->m_rhs = move(rhs);
  476. return exp;
  477. }
  478. NonnullRefPtr<AssignmentExpression> Parser::parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp op)
  479. {
  480. consume(); // Operator
  481. auto exp = create_ast_node<AssignmentExpression>(parent, lhs->start(), {});
  482. lhs->set_parent(*exp);
  483. exp->m_op = op;
  484. exp->m_lhs = move(lhs);
  485. auto rhs = parse_expression(exp);
  486. exp->set_end(rhs->end());
  487. exp->m_rhs = move(rhs);
  488. return exp;
  489. }
  490. Optional<Parser::DeclarationType> Parser::match_declaration_in_translation_unit()
  491. {
  492. if (match_function_declaration())
  493. return DeclarationType::Function;
  494. if (match_enum_declaration())
  495. return DeclarationType::Enum;
  496. if (match_struct_declaration())
  497. return DeclarationType::Struct;
  498. if (match_namespace_declaration())
  499. return DeclarationType::Namespace;
  500. if (match_variable_declaration())
  501. return DeclarationType::Variable;
  502. return {};
  503. }
  504. bool Parser::match_enum_declaration()
  505. {
  506. return match_keyword("enum");
  507. }
  508. bool Parser::match_struct_declaration()
  509. {
  510. return match_keyword("struct");
  511. }
  512. bool Parser::match_namespace_declaration()
  513. {
  514. return match_keyword("namespace");
  515. }
  516. bool Parser::match_function_declaration()
  517. {
  518. save_state();
  519. ScopeGuard state_guard = [this] { load_state(); };
  520. parse_function_qualifiers();
  521. if (!match_type())
  522. return false;
  523. VERIFY(m_root_node);
  524. parse_type(*m_root_node);
  525. if (!peek(Token::Type::Identifier).has_value())
  526. return false;
  527. consume();
  528. if (!peek(Token::Type::LeftParen).has_value())
  529. return false;
  530. consume();
  531. while (consume().type() != Token::Type::RightParen && !eof()) { };
  532. if (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value())
  533. return true;
  534. if (match_attribute_specification()) {
  535. consume_attribute_specification();
  536. return peek(Token::Type::Semicolon).has_value();
  537. }
  538. return false;
  539. }
  540. Optional<NonnullRefPtrVector<Parameter>> Parser::parse_parameter_list(ASTNode& parent)
  541. {
  542. SCOPE_LOGGER();
  543. NonnullRefPtrVector<Parameter> parameters;
  544. while (peek().type() != Token::Type::RightParen && !eof()) {
  545. if (match_ellipsis()) {
  546. auto last_dot = consume();
  547. while (peek().type() == Token::Type::Dot)
  548. last_dot = consume();
  549. auto param = create_ast_node<Parameter>(parent, position(), last_dot.end(), StringView {});
  550. param->m_is_ellipsis = true;
  551. parameters.append(move(param));
  552. } else {
  553. auto type = parse_type(parent);
  554. auto name_identifier = peek(Token::Type::Identifier);
  555. if (name_identifier.has_value())
  556. consume(Token::Type::Identifier);
  557. StringView name;
  558. if (name_identifier.has_value())
  559. name = text_of_token(name_identifier.value());
  560. auto param = create_ast_node<Parameter>(parent, type->start(), name_identifier.has_value() ? name_identifier.value().end() : type->end(), name);
  561. param->m_type = move(type);
  562. parameters.append(move(param));
  563. }
  564. if (peek(Token::Type::Comma).has_value())
  565. consume(Token::Type::Comma);
  566. }
  567. return parameters;
  568. }
  569. bool Parser::match_comment()
  570. {
  571. return match(Token::Type::Comment);
  572. }
  573. bool Parser::match_whitespace()
  574. {
  575. return match(Token::Type::Whitespace);
  576. }
  577. bool Parser::match_preprocessor()
  578. {
  579. return match(Token::Type::PreprocessorStatement) || match(Token::Type::IncludeStatement);
  580. }
  581. void Parser::consume_preprocessor()
  582. {
  583. SCOPE_LOGGER();
  584. switch (peek().type()) {
  585. case Token::Type::PreprocessorStatement:
  586. consume();
  587. break;
  588. case Token::Type::IncludeStatement:
  589. consume();
  590. consume(Token::Type::IncludePath);
  591. break;
  592. default:
  593. error("unexpected token while parsing preprocessor statement");
  594. consume();
  595. }
  596. }
  597. Optional<Token> Parser::consume_whitespace()
  598. {
  599. SCOPE_LOGGER();
  600. return consume(Token::Type::Whitespace);
  601. }
  602. Token Parser::consume(Token::Type type)
  603. {
  604. auto token = consume();
  605. if (token.type() != type)
  606. error(String::formatted("expected {} at {}:{}, found: {}", Token::type_to_string(type), token.start().line, token.start().column, Token::type_to_string(token.type())));
  607. return token;
  608. }
  609. bool Parser::match(Token::Type type)
  610. {
  611. return peek().type() == type;
  612. }
  613. Token Parser::consume()
  614. {
  615. if (eof()) {
  616. error("C++ Parser: out of tokens");
  617. return { Token::Type::EOF_TOKEN, position(), position(), {} };
  618. }
  619. return m_tokens[m_state.token_index++];
  620. }
  621. Token Parser::peek(size_t offset) const
  622. {
  623. if (m_state.token_index + offset >= m_tokens.size())
  624. return { Token::Type::EOF_TOKEN, position(), position(), {} };
  625. return m_tokens[m_state.token_index + offset];
  626. }
  627. Optional<Token> Parser::peek(Token::Type type) const
  628. {
  629. auto token = peek();
  630. if (token.type() == type)
  631. return token;
  632. return {};
  633. }
  634. void Parser::save_state()
  635. {
  636. m_saved_states.append(m_state);
  637. }
  638. void Parser::load_state()
  639. {
  640. m_state = m_saved_states.take_last();
  641. }
  642. StringView Parser::text_of_token(const Cpp::Token& token) const
  643. {
  644. return token.text();
  645. }
  646. String Parser::text_of_node(const ASTNode& node) const
  647. {
  648. return text_in_range(node.start(), node.end());
  649. }
  650. String Parser::text_in_range(Position start, Position end) const
  651. {
  652. auto start_token_index = index_of_token_at(start);
  653. auto end_node_index = index_of_token_at(end);
  654. VERIFY(start_token_index.has_value());
  655. VERIFY(end_node_index.has_value());
  656. StringBuilder text;
  657. for (size_t i = start_token_index.value(); i <= end_node_index.value(); ++i) {
  658. text.append(m_tokens[i].text());
  659. }
  660. return text.build();
  661. }
  662. void Parser::error(StringView message)
  663. {
  664. SCOPE_LOGGER();
  665. if (message.is_null() || message.is_empty())
  666. message = "<empty>";
  667. String formatted_message;
  668. if (m_state.token_index >= m_tokens.size()) {
  669. formatted_message = String::formatted("C++ Parsed error on EOF.{}", message);
  670. } else {
  671. formatted_message = String::formatted("C++ Parser error: {}. token: {} ({}:{})",
  672. message,
  673. m_state.token_index < m_tokens.size() ? text_of_token(m_tokens[m_state.token_index]) : "EOF",
  674. m_tokens[m_state.token_index].start().line,
  675. m_tokens[m_state.token_index].start().column);
  676. }
  677. m_errors.append(formatted_message);
  678. dbgln_if(CPP_DEBUG, "{}", formatted_message);
  679. }
  680. bool Parser::match_expression()
  681. {
  682. auto token_type = peek().type();
  683. return token_type == Token::Type::Integer
  684. || token_type == Token::Type::Float
  685. || token_type == Token::Type::Identifier
  686. || token_type == Token::Type::DoubleQuotedString
  687. || match_unary_expression();
  688. }
  689. bool Parser::eof() const
  690. {
  691. return m_state.token_index >= m_tokens.size();
  692. }
  693. Position Parser::position() const
  694. {
  695. if (eof())
  696. return m_tokens.last().end();
  697. return peek().start();
  698. }
  699. RefPtr<ASTNode> Parser::eof_node() const
  700. {
  701. VERIFY(m_tokens.size());
  702. return node_at(m_tokens.last().end());
  703. }
  704. RefPtr<ASTNode> Parser::node_at(Position pos) const
  705. {
  706. auto index = index_of_node_at(pos);
  707. if (!index.has_value())
  708. return nullptr;
  709. return m_nodes[index.value()];
  710. }
  711. Optional<size_t> Parser::index_of_node_at(Position pos) const
  712. {
  713. VERIFY(!m_tokens.is_empty());
  714. Optional<size_t> match_node_index;
  715. auto node_span = [](const ASTNode& node) {
  716. VERIFY(node.end().line >= node.start().line);
  717. VERIFY((node.end().line > node.start().line) || (node.end().column >= node.start().column));
  718. return Position { node.end().line - node.start().line, node.start().line != node.end().line ? 0 : node.end().column - node.start().column };
  719. };
  720. for (size_t node_index = 0; node_index < m_nodes.size(); ++node_index) {
  721. auto& node = m_nodes[node_index];
  722. if (node.start() > pos || node.end() < pos)
  723. continue;
  724. if (!match_node_index.has_value() || (node_span(node) < node_span(m_nodes[match_node_index.value()])))
  725. match_node_index = node_index;
  726. }
  727. return match_node_index;
  728. }
  729. Optional<Token> Parser::token_at(Position pos) const
  730. {
  731. auto index = index_of_token_at(pos);
  732. if (!index.has_value())
  733. return {};
  734. return m_tokens[index.value()];
  735. }
  736. Optional<size_t> Parser::index_of_token_at(Position pos) const
  737. {
  738. for (size_t token_index = 0; token_index < m_tokens.size(); ++token_index) {
  739. auto token = m_tokens[token_index];
  740. if (token.start() > pos || token.end() < pos)
  741. continue;
  742. return token_index;
  743. }
  744. return {};
  745. }
  746. void Parser::print_tokens() const
  747. {
  748. for (auto& token : m_tokens) {
  749. dbgln("{}", token.to_string());
  750. }
  751. }
  752. bool Parser::match_function_call()
  753. {
  754. save_state();
  755. ScopeGuard state_guard = [this] { load_state(); };
  756. if (!match(Token::Type::Identifier))
  757. return false;
  758. consume();
  759. return match(Token::Type::LeftParen);
  760. }
  761. NonnullRefPtr<FunctionCall> Parser::parse_function_call(ASTNode& parent)
  762. {
  763. SCOPE_LOGGER();
  764. auto call = create_ast_node<FunctionCall>(parent, position(), {});
  765. auto name_identifier = consume(Token::Type::Identifier);
  766. call->m_name = text_of_token(name_identifier);
  767. NonnullRefPtrVector<Expression> args;
  768. consume(Token::Type::LeftParen);
  769. while (peek().type() != Token::Type::RightParen && !eof()) {
  770. args.append(parse_expression(*call));
  771. if (peek().type() == Token::Type::Comma)
  772. consume(Token::Type::Comma);
  773. }
  774. consume(Token::Type::RightParen);
  775. call->m_arguments = move(args);
  776. call->set_end(position());
  777. return call;
  778. }
  779. NonnullRefPtr<StringLiteral> Parser::parse_string_literal(ASTNode& parent)
  780. {
  781. SCOPE_LOGGER();
  782. Optional<size_t> start_token_index;
  783. Optional<size_t> end_token_index;
  784. while (!eof()) {
  785. auto token = peek();
  786. if (token.type() != Token::Type::DoubleQuotedString && token.type() != Token::Type::EscapeSequence) {
  787. VERIFY(start_token_index.has_value());
  788. end_token_index = m_state.token_index - 1;
  789. break;
  790. }
  791. if (!start_token_index.has_value())
  792. start_token_index = m_state.token_index;
  793. consume();
  794. }
  795. // String was not terminated
  796. if (!end_token_index.has_value()) {
  797. end_token_index = m_tokens.size() - 1;
  798. }
  799. VERIFY(start_token_index.has_value());
  800. VERIFY(end_token_index.has_value());
  801. Token start_token = m_tokens[start_token_index.value()];
  802. Token end_token = m_tokens[end_token_index.value()];
  803. auto text = text_in_range(start_token.start(), end_token.end());
  804. auto string_literal = create_ast_node<StringLiteral>(parent, start_token.start(), end_token.end());
  805. string_literal->m_value = text;
  806. return string_literal;
  807. }
  808. NonnullRefPtr<ReturnStatement> Parser::parse_return_statement(ASTNode& parent)
  809. {
  810. SCOPE_LOGGER();
  811. auto return_statement = create_ast_node<ReturnStatement>(parent, position(), {});
  812. consume(Token::Type::Keyword);
  813. if(!peek(Token::Type::Semicolon).has_value()) {
  814. auto expression = parse_expression(*return_statement);
  815. return_statement->m_value = expression;
  816. }
  817. return_statement->set_end(position());
  818. return return_statement;
  819. }
  820. NonnullRefPtr<EnumDeclaration> Parser::parse_enum_declaration(ASTNode& parent)
  821. {
  822. SCOPE_LOGGER();
  823. auto enum_decl = create_ast_node<EnumDeclaration>(parent, position(), {});
  824. consume_keyword("enum");
  825. auto name_token = consume(Token::Type::Identifier);
  826. enum_decl->m_name = text_of_token(name_token);
  827. consume(Token::Type::LeftCurly);
  828. while (!eof() && peek().type() != Token::Type::RightCurly) {
  829. enum_decl->m_entries.append(text_of_token(consume(Token::Type::Identifier)));
  830. if (peek().type() != Token::Type::Comma) {
  831. break;
  832. }
  833. consume(Token::Type::Comma);
  834. }
  835. consume(Token::Type::RightCurly);
  836. consume(Token::Type::Semicolon);
  837. enum_decl->set_end(position());
  838. return enum_decl;
  839. }
  840. Token Parser::consume_keyword(const String& keyword)
  841. {
  842. auto token = consume();
  843. if (token.type() != Token::Type::Keyword) {
  844. error(String::formatted("unexpected token: {}, expected Keyword", token.to_string()));
  845. return token;
  846. }
  847. if (text_of_token(token) != keyword) {
  848. error(String::formatted("unexpected keyword: {}, expected {}", text_of_token(token), keyword));
  849. return token;
  850. }
  851. return token;
  852. }
  853. bool Parser::match_keyword(const String& keyword)
  854. {
  855. auto token = peek();
  856. if (token.type() != Token::Type::Keyword) {
  857. return false;
  858. }
  859. if (text_of_token(token) != keyword) {
  860. return false;
  861. }
  862. return true;
  863. }
  864. NonnullRefPtr<StructOrClassDeclaration> Parser::parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type type)
  865. {
  866. SCOPE_LOGGER();
  867. auto decl = create_ast_node<StructOrClassDeclaration>(parent, position(), {}, type);
  868. switch (type) {
  869. case StructOrClassDeclaration::Type::Struct:
  870. consume_keyword("struct");
  871. break;
  872. case StructOrClassDeclaration::Type::Class:
  873. consume_keyword("class");
  874. break;
  875. }
  876. auto name_token = consume(Token::Type::Identifier);
  877. decl->m_name = text_of_token(name_token);
  878. consume(Token::Type::LeftCurly);
  879. while (!eof() && peek().type() != Token::Type::RightCurly) {
  880. decl->m_members.append(parse_member_declaration(*decl));
  881. }
  882. consume(Token::Type::RightCurly);
  883. consume(Token::Type::Semicolon);
  884. decl->set_end(position());
  885. return decl;
  886. }
  887. NonnullRefPtr<MemberDeclaration> Parser::parse_member_declaration(ASTNode& parent)
  888. {
  889. SCOPE_LOGGER();
  890. auto member_decl = create_ast_node<MemberDeclaration>(parent, position(), {});
  891. auto type_token = consume();
  892. auto identifier_token = consume(Token::Type::Identifier);
  893. RefPtr<Expression> initial_value;
  894. if (match(Token::Type::LeftCurly)) {
  895. consume(Token::Type::LeftCurly);
  896. initial_value = parse_expression(*member_decl);
  897. consume(Token::Type::RightCurly);
  898. }
  899. member_decl->m_type = create_ast_node<Type>(*member_decl, type_token.start(), type_token.end(), text_of_token(type_token));
  900. member_decl->m_name = text_of_token(identifier_token);
  901. member_decl->m_initial_value = move(initial_value);
  902. consume(Token::Type::Semicolon);
  903. member_decl->set_end(position());
  904. return member_decl;
  905. }
  906. NonnullRefPtr<BooleanLiteral> Parser::parse_boolean_literal(ASTNode& parent)
  907. {
  908. SCOPE_LOGGER();
  909. auto token = consume(Token::Type::Keyword);
  910. auto text = text_of_token(token);
  911. // text == "true" || text == "false";
  912. bool value = (text == "true");
  913. return create_ast_node<BooleanLiteral>(parent, token.start(), token.end(), value);
  914. }
  915. bool Parser::match_boolean_literal()
  916. {
  917. auto token = peek();
  918. if (token.type() != Token::Type::Keyword)
  919. return false;
  920. auto text = text_of_token(token);
  921. return text == "true" || text == "false";
  922. }
  923. NonnullRefPtr<Type> Parser::parse_type(ASTNode& parent)
  924. {
  925. SCOPE_LOGGER();
  926. auto qualifiers = parse_type_qualifiers();
  927. auto token = consume();
  928. auto type = create_ast_node<Type>(parent, token.start(), token.end(), text_of_token(token));
  929. type->m_qualifiers = move(qualifiers);
  930. if (token.type() != Token::Type::KnownType && token.type() != Token::Type::Identifier) {
  931. error(String::formatted("unexpected token for type: {}", token.to_string()));
  932. return type;
  933. }
  934. while (peek().type() == Token::Type::Asterisk) {
  935. auto asterisk = consume();
  936. auto ptr = create_ast_node<Pointer>(type, asterisk.start(), asterisk.end());
  937. ptr->m_pointee = type;
  938. type = ptr;
  939. }
  940. return type;
  941. }
  942. NonnullRefPtr<ForStatement> Parser::parse_for_statement(ASTNode& parent)
  943. {
  944. SCOPE_LOGGER();
  945. auto for_statement = create_ast_node<ForStatement>(parent, position(), {});
  946. consume(Token::Type::Keyword);
  947. consume(Token::Type::LeftParen);
  948. for_statement->m_init = parse_variable_declaration(*for_statement);
  949. consume(Token::Type::Semicolon);
  950. for_statement->m_test = parse_expression(*for_statement);
  951. consume(Token::Type::Semicolon);
  952. for_statement->m_update = parse_expression(*for_statement);
  953. consume(Token::Type::RightParen);
  954. for_statement->m_body = parse_statement(*for_statement);
  955. for_statement->set_end(for_statement->m_body->end());
  956. return for_statement;
  957. }
  958. NonnullRefPtr<IfStatement> Parser::parse_if_statement(ASTNode& parent)
  959. {
  960. SCOPE_LOGGER();
  961. auto if_statement = create_ast_node<IfStatement>(parent, position(), {});
  962. consume(Token::Type::Keyword);
  963. consume(Token::Type::LeftParen);
  964. if_statement->m_predicate = parse_expression(*if_statement);
  965. consume(Token::Type::RightParen);
  966. if_statement->m_then = parse_statement(*if_statement);
  967. if (match_keyword("else")) {
  968. consume(Token::Type::Keyword);
  969. if_statement->m_else = parse_statement(*if_statement);
  970. if_statement->set_end(if_statement->m_else->end());
  971. } else {
  972. if_statement->set_end(if_statement->m_then->end());
  973. }
  974. return if_statement;
  975. }
  976. Vector<StringView> Parser::parse_type_qualifiers()
  977. {
  978. SCOPE_LOGGER();
  979. Vector<StringView> qualifiers;
  980. while (!eof()) {
  981. auto token = peek();
  982. if (token.type() != Token::Type::Keyword)
  983. break;
  984. auto text = text_of_token(token);
  985. if (text == "static" || text == "const") {
  986. qualifiers.append(text);
  987. consume();
  988. } else {
  989. break;
  990. }
  991. }
  992. return qualifiers;
  993. }
  994. Vector<StringView> Parser::parse_function_qualifiers()
  995. {
  996. SCOPE_LOGGER();
  997. Vector<StringView> qualifiers;
  998. while (!eof()) {
  999. auto token = peek();
  1000. if (token.type() != Token::Type::Keyword)
  1001. break;
  1002. auto text = text_of_token(token);
  1003. if (text == "static" || text == "inline") {
  1004. qualifiers.append(text);
  1005. consume();
  1006. } else {
  1007. break;
  1008. }
  1009. }
  1010. return qualifiers;
  1011. }
  1012. bool Parser::match_attribute_specification()
  1013. {
  1014. return text_of_token(peek()) == "__attribute__";
  1015. }
  1016. void Parser::consume_attribute_specification()
  1017. {
  1018. consume(); // __attribute__
  1019. consume(Token::Type::LeftParen);
  1020. size_t left_count = 1;
  1021. while (!eof()) {
  1022. auto token = consume();
  1023. if (token.type() == Token::Type::LeftParen) {
  1024. ++left_count;
  1025. }
  1026. if (token.type() == Token::Type::RightParen) {
  1027. --left_count;
  1028. }
  1029. if (left_count == 0)
  1030. return;
  1031. }
  1032. }
  1033. bool Parser::match_ellipsis()
  1034. {
  1035. if (m_state.token_index > m_tokens.size() - 3)
  1036. return false;
  1037. return peek().type() == Token::Type::Dot && peek().type() == Token::Type::Dot && peek().type() == Token::Type::Dot;
  1038. }
  1039. void Parser::add_tokens_for_preprocessor(Token& replaced_token, Preprocessor::DefinedValue& definition)
  1040. {
  1041. if (!definition.value.has_value())
  1042. return;
  1043. Lexer lexer(definition.value.value());
  1044. for (auto token : lexer.lex()) {
  1045. if (token.type() == Token::Type::Whitespace)
  1046. continue;
  1047. token.set_start(replaced_token.start());
  1048. token.set_end(replaced_token.end());
  1049. m_tokens.append(move(token));
  1050. }
  1051. }
  1052. NonnullRefPtr<NamespaceDeclaration> Parser::parse_namespace_declaration(ASTNode& parent, bool is_nested_namespace)
  1053. {
  1054. auto namespace_decl = create_ast_node<NamespaceDeclaration>(parent, position(), {});
  1055. if (!is_nested_namespace)
  1056. consume(Token::Type::Keyword);
  1057. auto name_token = consume(Token::Type::Identifier);
  1058. namespace_decl->m_name = name_token.text();
  1059. if (peek().type() == Token::Type::ColonColon) {
  1060. consume(Token::Type::ColonColon);
  1061. namespace_decl->m_declarations.append(parse_namespace_declaration(*namespace_decl, true));
  1062. namespace_decl->set_end(position());
  1063. return namespace_decl;
  1064. }
  1065. consume(Token::Type::LeftCurly);
  1066. while (!eof() && peek().type() != Token::Type::RightCurly) {
  1067. auto declaration = parse_single_declaration_in_translation_unit(*namespace_decl);
  1068. if (declaration) {
  1069. namespace_decl->m_declarations.append(declaration.release_nonnull());
  1070. } else {
  1071. error("unexpected token");
  1072. consume();
  1073. }
  1074. }
  1075. consume(Token::Type::RightCurly);
  1076. namespace_decl->set_end(position());
  1077. return namespace_decl;
  1078. }
  1079. }