TextParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include "Parser/TextParser.h"
  8. namespace JSSpecCompiler {
  9. void TextParser::retreat()
  10. {
  11. --m_next_token_index;
  12. }
  13. auto TextParser::rollback_point()
  14. {
  15. return ArmedScopeGuard {
  16. [this, index = this->m_next_token_index] {
  17. m_next_token_index = index;
  18. }
  19. };
  20. }
  21. ParseErrorOr<Token const*> TextParser::peek_token()
  22. {
  23. if (m_next_token_index == m_tokens.size())
  24. return ParseError::create("Expected token but found EOF"sv, m_node);
  25. return &m_tokens[m_next_token_index];
  26. }
  27. ParseErrorOr<Token const*> TextParser::consume_token()
  28. {
  29. auto result = peek_token();
  30. if (!result.is_error())
  31. ++m_next_token_index;
  32. return result;
  33. }
  34. ParseErrorOr<Token const*> TextParser::consume_token_with_one_of_types(std::initializer_list<TokenType> types)
  35. {
  36. auto token = TRY(consume_token());
  37. for (TokenType type : types)
  38. if (token->type == type)
  39. return token;
  40. retreat();
  41. return ParseError::create(String::formatted("Unexpected token type {}", token->name()), token->node);
  42. }
  43. ParseErrorOr<Token const*> TextParser::consume_token_with_type(TokenType type)
  44. {
  45. return consume_token_with_one_of_types({ type });
  46. }
  47. ParseErrorOr<void> TextParser::consume_word(StringView word)
  48. {
  49. auto token = TRY(consume_token_with_type(TokenType::Word));
  50. if (!token->data.equals_ignoring_ascii_case(word)) {
  51. retreat();
  52. return ParseError::create("Unexpected word"sv, token->node);
  53. }
  54. return {};
  55. }
  56. ParseErrorOr<void> TextParser::consume_words(std::initializer_list<StringView> words)
  57. {
  58. for (auto word : words)
  59. TRY(consume_word(word));
  60. return {};
  61. }
  62. bool TextParser::is_eof() const
  63. {
  64. return m_next_token_index == m_tokens.size();
  65. }
  66. ParseErrorOr<void> TextParser::expect_eof() const
  67. {
  68. if (!is_eof())
  69. return ParseError::create("Expected EOF"sv, m_node);
  70. return {};
  71. }
  72. ParseErrorOr<Tree> TextParser::parse_record_direct_list_initialization()
  73. {
  74. auto rollback = rollback_point();
  75. (void)consume_word("the"sv);
  76. auto identifier = TRY(consume_token_with_type(TokenType::Identifier));
  77. TRY(consume_token_with_type(TokenType::BraceOpen));
  78. Vector<RecordDirectListInitialization::Argument> arguments;
  79. while (true) {
  80. auto name = TRY(consume_token_with_one_of_types({ TokenType::Identifier, TokenType::BraceClose }));
  81. if (name->is_bracket()) {
  82. break;
  83. } else {
  84. TRY(consume_token_with_type(TokenType::Colon));
  85. auto value = TRY(parse_expression());
  86. (void)consume_token_with_type(TokenType::Comma);
  87. arguments.append({ make_ref_counted<UnresolvedReference>(name->data), value });
  88. }
  89. }
  90. rollback.disarm();
  91. return make_ref_counted<RecordDirectListInitialization>(
  92. make_ref_counted<UnresolvedReference>(identifier->data), move(arguments));
  93. }
  94. // <expr>
  95. ParseErrorOr<Tree> TextParser::parse_expression()
  96. {
  97. auto rollback = rollback_point();
  98. // (the)? <record_name> { (<name>: <value>,)* }
  99. if (auto record_init = parse_record_direct_list_initialization(); !record_init.is_error()) {
  100. rollback.disarm();
  101. return record_init.release_value();
  102. }
  103. #define THROW_PARSE_ERROR_IF(expr) \
  104. do { \
  105. if (expr) \
  106. return ParseError::create("Expected expression"sv, m_node); \
  107. } while (false)
  108. #define THROW_PARSE_ERROR THROW_PARSE_ERROR_IF(true)
  109. Vector<Variant<Tree, Token>> stack;
  110. auto merge_stack = [&](i32 precedence) {
  111. if (!stack.last().has<Tree>())
  112. return;
  113. while (stack.size() >= 2) {
  114. auto const& maybe_operator = stack[stack.size() - 2];
  115. if (!maybe_operator.has<Token>())
  116. break;
  117. auto last_operator = maybe_operator.get<Token>();
  118. auto right = stack.last().get<Tree>();
  119. if (last_operator.is_unary_operator()) {
  120. auto operation = make_ref_counted<UnaryOperation>(last_operator.as_unary_operator(), right);
  121. stack.shrink(stack.size() - 2);
  122. stack.empend(operation);
  123. } else if (last_operator.is_binary_operator() && last_operator.precedence() < precedence) {
  124. auto left = stack[stack.size() - 3].get<Tree>();
  125. auto operation = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), left, right);
  126. stack.shrink(stack.size() - 3);
  127. stack.empend(operation);
  128. } else {
  129. break;
  130. }
  131. }
  132. };
  133. auto merge_pre_merged = [&] {
  134. if (stack.size() < 3)
  135. return;
  136. auto const& maybe_left = stack[stack.size() - 3];
  137. auto const& maybe_operator = stack[stack.size() - 2];
  138. auto const& maybe_right = stack.last();
  139. if (!maybe_left.has<Tree>() || !maybe_operator.has<Token>() || !maybe_right.has<Tree>())
  140. return;
  141. auto last_operator = maybe_operator.get<Token>();
  142. if (!last_operator.is_pre_merged_binary_operator())
  143. return;
  144. auto expression = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), maybe_left.get<Tree>(), maybe_right.get<Tree>());
  145. stack.shrink(stack.size() - 3);
  146. stack.empend(expression);
  147. };
  148. i32 bracket_balance = 0;
  149. while (true) {
  150. auto token_or_error = peek_token();
  151. if (token_or_error.is_error())
  152. break;
  153. auto token = *token_or_error.release_value();
  154. enum {
  155. NoneType,
  156. ExpressionType,
  157. PreMergedBinaryOperatorType,
  158. UnaryOperatorType,
  159. BinaryOperatorType,
  160. BracketType,
  161. } last_element_type;
  162. if (stack.is_empty())
  163. last_element_type = NoneType;
  164. else if (stack.last().has<Tree>())
  165. last_element_type = ExpressionType;
  166. else if (stack.last().get<Token>().is_pre_merged_binary_operator())
  167. last_element_type = PreMergedBinaryOperatorType;
  168. else if (stack.last().get<Token>().is_unary_operator())
  169. last_element_type = UnaryOperatorType;
  170. else if (stack.last().get<Token>().is_binary_operator())
  171. last_element_type = BinaryOperatorType;
  172. else if (stack.last().get<Token>().is_bracket())
  173. last_element_type = BracketType;
  174. else
  175. VERIFY_NOT_REACHED();
  176. if (token.is_ambiguous_operator()) {
  177. if (token.type == TokenType::AmbiguousMinus)
  178. token.type = last_element_type == ExpressionType ? TokenType::BinaryMinus : TokenType::UnaryMinus;
  179. else
  180. VERIFY_NOT_REACHED();
  181. }
  182. bracket_balance += token.is_opening_bracket();
  183. bracket_balance -= token.is_closing_bracket();
  184. if (bracket_balance < 0)
  185. break;
  186. if (token.type == TokenType::ParenOpen) {
  187. if (last_element_type == ExpressionType)
  188. stack.append(Token { TokenType::FunctionCall, ""sv, m_node });
  189. stack.append(token);
  190. if (m_next_token_index + 1 < m_tokens.size()
  191. && m_tokens[m_next_token_index + 1].type == TokenType::ParenClose) {
  192. // This is a call to function which does not take parameters. We cannot handle it
  193. // normally since we need text between parenthesis to be a valid expression. As a
  194. // workaround, we push an artificial tree to stack to act as an argument (it'll be
  195. // removed later during function call canonicalization).
  196. stack.append(zero_argument_function_call);
  197. }
  198. } else if (token.is_pre_merged_binary_operator()) {
  199. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  200. stack.append(token);
  201. } else if (token.is_unary_operator()) {
  202. THROW_PARSE_ERROR_IF(last_element_type == PreMergedBinaryOperatorType);
  203. stack.append(token);
  204. } else if (token.is_binary_operator() || token.is_closing_bracket()) {
  205. if (bracket_balance == 0 && token.type == TokenType::Comma)
  206. break;
  207. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  208. merge_stack(token.precedence());
  209. if (token.is_closing_bracket()) {
  210. THROW_PARSE_ERROR_IF(stack.size() == 1);
  211. THROW_PARSE_ERROR_IF(!stack[stack.size() - 2].get<Token>().matches_with(token));
  212. stack.remove(stack.size() - 2);
  213. merge_pre_merged();
  214. } else {
  215. stack.append(token);
  216. }
  217. } else {
  218. NullableTree expression;
  219. if (token.type == TokenType::Identifier) {
  220. expression = make_ref_counted<UnresolvedReference>(token.data);
  221. } else if (token.type == TokenType::Number) {
  222. expression = make_ref_counted<MathematicalConstant>(token.data.to_number<i64>().value());
  223. } else if (token.type == TokenType::String) {
  224. expression = make_ref_counted<StringLiteral>(token.data);
  225. } else {
  226. break;
  227. }
  228. THROW_PARSE_ERROR_IF(last_element_type == ExpressionType);
  229. stack.append(expression.release_nonnull());
  230. merge_pre_merged();
  231. }
  232. MUST(consume_token());
  233. }
  234. THROW_PARSE_ERROR_IF(stack.is_empty());
  235. merge_stack(closing_bracket_precedence);
  236. THROW_PARSE_ERROR_IF(stack.size() != 1 || !stack[0].has<Tree>());
  237. rollback.disarm();
  238. return stack[0].get<Tree>();
  239. #undef THROW_PARSE_ERROR
  240. #undef THROW_PARSE_ERROR_IF
  241. }
  242. // <condition> :== <expr> | (<expr> is <expr> (or <expr>)?)
  243. ParseErrorOr<Tree> TextParser::parse_condition()
  244. {
  245. auto rollback = rollback_point();
  246. auto expression = TRY(parse_expression());
  247. if (!consume_token_with_type(TokenType::Is).is_error()) {
  248. Vector compare_values { TRY(parse_expression()) };
  249. if (!consume_word("or"sv).is_error())
  250. compare_values.append(TRY(parse_expression()));
  251. rollback.disarm();
  252. return make_ref_counted<IsOneOfOperation>(expression, move(compare_values));
  253. }
  254. rollback.disarm();
  255. return expression;
  256. }
  257. // return <expr>
  258. ParseErrorOr<Tree> TextParser::parse_return_statement()
  259. {
  260. auto rollback = rollback_point();
  261. TRY(consume_word("return"sv));
  262. auto return_value = TRY(parse_expression());
  263. rollback.disarm();
  264. return make_ref_counted<ReturnNode>(return_value);
  265. }
  266. // assert: <condition>
  267. ParseErrorOr<Tree> TextParser::parse_assert()
  268. {
  269. auto rollback = rollback_point();
  270. auto identifier = TRY(consume_token_with_type(TokenType::Identifier))->data;
  271. if (!identifier.equals_ignoring_ascii_case("assert"sv)) {
  272. return ParseError::create("Expected identifier \"Assert\""sv, m_node);
  273. }
  274. TRY(consume_token_with_type(TokenType::Colon));
  275. auto condition = TRY(parse_condition());
  276. rollback.disarm();
  277. return make_ref_counted<AssertExpression>(condition);
  278. }
  279. // (let <expr> be <expr>) | (set <expr> to <expr>)
  280. ParseErrorOr<Tree> TextParser::parse_assignment()
  281. {
  282. auto rollback = rollback_point();
  283. bool is_let = !consume_word("let"sv).is_error();
  284. if (!is_let)
  285. TRY(consume_word("set"sv));
  286. auto lvalue = TRY(parse_expression());
  287. TRY(consume_word(is_let ? "be"sv : "to"sv));
  288. auto rvalue = TRY(parse_expression());
  289. rollback.disarm();
  290. auto op = is_let ? BinaryOperator::Declaration : BinaryOperator::Assignment;
  291. return make_ref_counted<BinaryOperation>(op, lvalue, rvalue);
  292. }
  293. // <simple_step>
  294. ParseErrorOr<Tree> TextParser::parse_simple_step_or_inline_if_branch()
  295. {
  296. auto rollback = rollback_point();
  297. // Return <expr>.$
  298. if (auto result = parse_return_statement(); !result.is_error()) {
  299. TRY(consume_token_with_type(TokenType::Dot));
  300. TRY(expect_eof());
  301. rollback.disarm();
  302. return result.release_value();
  303. }
  304. // Assert: <expr>.$
  305. if (auto result = parse_assert(); !result.is_error()) {
  306. TRY(consume_token_with_type(TokenType::Dot));
  307. TRY(expect_eof());
  308. rollback.disarm();
  309. return result.release_value();
  310. }
  311. // Let <expr> be <expr>.$
  312. // Set <expr> to <expr>.$
  313. if (auto result = parse_assignment(); !result.is_error()) {
  314. TRY(consume_token_with_type(TokenType::Dot));
  315. TRY(expect_eof());
  316. rollback.disarm();
  317. return result.release_value();
  318. }
  319. return ParseError::create("Unable to parse simple step or inline if branch"sv, m_node);
  320. }
  321. // <if_condition> :== (If <condition>) | (Else) | (Else if <condition>),
  322. ParseErrorOr<TextParser::IfConditionParseResult> TextParser::parse_if_beginning()
  323. {
  324. auto rollback = rollback_point();
  325. bool is_if_branch = !consume_word("if"sv).is_error();
  326. NullableTree condition = nullptr;
  327. if (is_if_branch) {
  328. condition = TRY(parse_condition());
  329. } else {
  330. TRY(consume_word("else"sv));
  331. if (!consume_word("if"sv).is_error())
  332. condition = TRY(parse_condition());
  333. }
  334. TRY(consume_token_with_type(TokenType::Comma));
  335. rollback.disarm();
  336. return IfConditionParseResult { is_if_branch, condition };
  337. }
  338. // <inline_if> :== <if_condition> <simple_step>.$
  339. ParseErrorOr<Tree> TextParser::parse_inline_if_else()
  340. {
  341. auto rollback = rollback_point();
  342. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  343. auto then_branch = TRY(parse_simple_step_or_inline_if_branch());
  344. rollback.disarm();
  345. if (is_if_branch)
  346. return make_ref_counted<IfBranch>(condition.release_nonnull(), then_branch);
  347. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  348. }
  349. // <if> :== <if_condition> then$ <substeps>
  350. ParseErrorOr<Tree> TextParser::parse_if(Tree then_branch)
  351. {
  352. auto rollback = rollback_point();
  353. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  354. TRY(consume_word("then"sv));
  355. TRY(expect_eof());
  356. rollback.disarm();
  357. if (is_if_branch)
  358. return make_ref_counted<IfBranch>(*condition, then_branch);
  359. else
  360. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  361. }
  362. // <else> :== Else,$ <substeps>
  363. ParseErrorOr<Tree> TextParser::parse_else(Tree else_branch)
  364. {
  365. auto rollback = rollback_point();
  366. TRY(consume_word("else"sv));
  367. TRY(consume_token_with_type(TokenType::Comma));
  368. TRY(expect_eof());
  369. rollback.disarm();
  370. return make_ref_counted<ElseIfBranch>(nullptr, else_branch);
  371. }
  372. // <simple_step> | <inline_if>
  373. ParseErrorOr<Tree> TextParser::parse_step_without_substeps()
  374. {
  375. auto rollback = rollback_point();
  376. // <simple_step>
  377. if (auto result = parse_simple_step_or_inline_if_branch(); !result.is_error()) {
  378. rollback.disarm();
  379. return result.release_value();
  380. }
  381. // <inline_if>
  382. if (auto result = parse_inline_if_else(); !result.is_error()) {
  383. rollback.disarm();
  384. return result.release_value();
  385. }
  386. return ParseError::create("Unable to parse step without substeps"sv, m_node);
  387. }
  388. // <if> | <else>
  389. ParseErrorOr<Tree> TextParser::parse_step_with_substeps(Tree substeps)
  390. {
  391. auto rollback = rollback_point();
  392. // <if>
  393. if (auto result = parse_if(substeps); !result.is_error()) {
  394. rollback.disarm();
  395. return result.release_value();
  396. }
  397. // <else>
  398. if (auto result = parse_else(substeps); !result.is_error()) {
  399. rollback.disarm();
  400. return result.release_value();
  401. }
  402. return ParseError::create("Unable to parse step with substeps"sv, m_node);
  403. }
  404. ParseErrorOr<ClauseHeader> TextParser::parse_clause_header()
  405. {
  406. ClauseHeader result;
  407. auto section_number_token = TRY(consume_token_with_type(TokenType::SectionNumber));
  408. result.section_number = section_number_token->data;
  409. ClauseHeader::FunctionDefinition function_definition;
  410. function_definition.name = TRY(consume_token())->data;
  411. TRY(consume_token_with_type(TokenType::ParenOpen));
  412. while (true) {
  413. if (function_definition.arguments.is_empty()) {
  414. auto argument = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Identifier }));
  415. if (argument->type == TokenType::ParenClose)
  416. break;
  417. function_definition.arguments.append({ argument->data });
  418. } else {
  419. function_definition.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier))->data });
  420. }
  421. auto next_token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
  422. if (next_token->type == TokenType::ParenClose)
  423. break;
  424. }
  425. TRY(expect_eof());
  426. result.header = function_definition;
  427. return result;
  428. }
  429. }