TextParser.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include "Parser/SpecParser.h"
  8. #include "Parser/TextParser.h"
  9. namespace JSSpecCompiler {
  10. void TextParser::save_error(Variant<TokenType, StringView, CustomMessage>&& expected)
  11. {
  12. if (m_max_parsed_tokens > m_next_token_index)
  13. return;
  14. if (m_max_parsed_tokens < m_next_token_index)
  15. m_suitable_continuations.clear();
  16. m_max_parsed_tokens = m_next_token_index;
  17. m_suitable_continuations.append(move(expected));
  18. }
  19. void TextParser::retreat()
  20. {
  21. --m_next_token_index;
  22. }
  23. auto TextParser::rollback_point()
  24. {
  25. return ArmedScopeGuard {
  26. [this, index = this->m_next_token_index] {
  27. m_next_token_index = index;
  28. }
  29. };
  30. }
  31. Optional<Token> TextParser::peek_token()
  32. {
  33. if (m_next_token_index == m_tokens.size())
  34. return {};
  35. return m_tokens[m_next_token_index];
  36. }
  37. Optional<Token> TextParser::consume_token()
  38. {
  39. auto result = peek_token();
  40. if (result.has_value())
  41. ++m_next_token_index;
  42. return result;
  43. }
  44. TextParseErrorOr<Token> TextParser::consume_token_with_one_of_types(std::initializer_list<TokenType> types)
  45. {
  46. auto token = peek_token();
  47. if (token.has_value()) {
  48. for (TokenType type : types) {
  49. if (token->type == type) {
  50. (void)consume_token();
  51. return *token;
  52. } else {
  53. save_error(type);
  54. }
  55. }
  56. } else {
  57. for (TokenType type : types)
  58. save_error(type);
  59. }
  60. return TextParseError {};
  61. }
  62. TextParseErrorOr<Token> TextParser::consume_token_with_type(TokenType type)
  63. {
  64. return consume_token_with_one_of_types({ type });
  65. }
  66. TextParseErrorOr<void> TextParser::consume_token(TokenType type, StringView data)
  67. {
  68. auto token = consume_token();
  69. if (!token.has_value() || token->type != type || !token->data.equals_ignoring_ascii_case(data)) {
  70. retreat();
  71. save_error(data);
  72. return TextParseError {};
  73. }
  74. return {};
  75. }
  76. TextParseErrorOr<void> TextParser::consume_word(StringView word)
  77. {
  78. auto token = consume_token();
  79. if (!token.has_value() || token->type != TokenType::Word || !token->data.equals_ignoring_ascii_case(word)) {
  80. retreat();
  81. save_error(word);
  82. return TextParseError {};
  83. }
  84. return {};
  85. }
  86. TextParseErrorOr<void> TextParser::consume_words(std::initializer_list<StringView> words)
  87. {
  88. for (auto word : words)
  89. TRY(consume_word(word));
  90. return {};
  91. }
  92. bool TextParser::is_eof() const
  93. {
  94. return m_next_token_index == m_tokens.size();
  95. }
  96. TextParseErrorOr<void> TextParser::expect_eof()
  97. {
  98. if (!is_eof()) {
  99. save_error(CustomMessage { "EOF"sv });
  100. return TextParseError {};
  101. }
  102. return {};
  103. }
  104. // (the)? <record_name> { (<name>: <value>,)* }
  105. TextParseErrorOr<Tree> TextParser::parse_record_direct_list_initialization()
  106. {
  107. auto rollback = rollback_point();
  108. (void)consume_word("the"sv);
  109. auto identifier = TRY(consume_token_with_type(TokenType::Identifier));
  110. TRY(consume_token_with_type(TokenType::BraceOpen));
  111. Vector<RecordDirectListInitialization::Argument> arguments;
  112. while (true) {
  113. auto name = TRY(consume_token_with_one_of_types({ TokenType::Identifier, TokenType::BraceClose }));
  114. if (name.is_bracket()) {
  115. break;
  116. } else {
  117. TRY(consume_token_with_type(TokenType::Colon));
  118. auto value = TRY(parse_expression());
  119. (void)consume_token_with_type(TokenType::Comma);
  120. arguments.append({ make_ref_counted<UnresolvedReference>(name.data), value });
  121. }
  122. }
  123. rollback.disarm();
  124. return make_ref_counted<RecordDirectListInitialization>(
  125. make_ref_counted<UnresolvedReference>(identifier.data), move(arguments));
  126. }
  127. // <function_arguments> :== '(' (<expr> (, <expr>)* )? ')'
  128. TextParseErrorOr<Vector<Tree>> TextParser::parse_function_arguments()
  129. {
  130. auto rollback = rollback_point();
  131. TRY(consume_token_with_type(TokenType::ParenOpen));
  132. if (!consume_token_with_type(TokenType::ParenClose).is_error()) {
  133. rollback.disarm();
  134. return Vector<Tree> {};
  135. }
  136. Vector<Tree> arguments;
  137. while (true) {
  138. arguments.append(TRY(parse_expression()));
  139. auto token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
  140. if (token.type == TokenType::ParenClose)
  141. break;
  142. }
  143. rollback.disarm();
  144. return arguments;
  145. }
  146. // <expr>
  147. TextParseErrorOr<Tree> TextParser::parse_expression()
  148. {
  149. auto rollback = rollback_point();
  150. if (auto record_init = parse_record_direct_list_initialization(); !record_init.is_error()) {
  151. rollback.disarm();
  152. return record_init.release_value();
  153. }
  154. #define THROW_PARSE_ERROR_IF(expr) \
  155. do { \
  156. if (expr) { \
  157. save_error(CustomMessage { "valid expression continuation (not valid because " #expr ")"##sv }); \
  158. return TextParseError {}; \
  159. } \
  160. } while (false)
  161. #define THROW_PARSE_ERROR THROW_PARSE_ERROR_IF(true)
  162. Vector<Variant<Tree, Token>> stack;
  163. auto merge_stack = [&](i32 precedence) {
  164. if (!stack.last().has<Tree>())
  165. return;
  166. while (stack.size() >= 2) {
  167. auto const& maybe_operator = stack[stack.size() - 2];
  168. if (!maybe_operator.has<Token>())
  169. break;
  170. auto last_operator = maybe_operator.get<Token>();
  171. auto right = stack.last().get<Tree>();
  172. if (last_operator.is_unary_operator()) {
  173. auto operation = make_ref_counted<UnaryOperation>(last_operator.as_unary_operator(), right);
  174. stack.shrink(stack.size() - 2);
  175. stack.empend(operation);
  176. } else if (last_operator.is_binary_operator() && last_operator.precedence() < precedence) {
  177. auto left = stack[stack.size() - 3].get<Tree>();
  178. auto operation = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), left, right);
  179. stack.shrink(stack.size() - 3);
  180. stack.empend(operation);
  181. } else {
  182. break;
  183. }
  184. }
  185. };
  186. auto merge_pre_merged = [&] {
  187. if (stack.size() < 3)
  188. return;
  189. auto const& maybe_left = stack[stack.size() - 3];
  190. auto const& maybe_operator = stack[stack.size() - 2];
  191. auto const& maybe_right = stack.last();
  192. if (!maybe_left.has<Tree>() || !maybe_operator.has<Token>() || !maybe_right.has<Tree>())
  193. return;
  194. auto last_operator = maybe_operator.get<Token>();
  195. if (!last_operator.is_pre_merged_binary_operator())
  196. return;
  197. auto expression = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), maybe_left.get<Tree>(), maybe_right.get<Tree>());
  198. stack.shrink(stack.size() - 3);
  199. stack.empend(expression);
  200. };
  201. i32 bracket_balance = 0;
  202. while (true) {
  203. auto token_or_error = peek_token();
  204. if (!token_or_error.has_value())
  205. break;
  206. auto token = token_or_error.release_value();
  207. bool is_consumed = false;
  208. enum {
  209. NoneType,
  210. ExpressionType,
  211. PreMergedBinaryOperatorType,
  212. UnaryOperatorType,
  213. BinaryOperatorType,
  214. BracketType,
  215. } last_element_type;
  216. if (stack.is_empty())
  217. last_element_type = NoneType;
  218. else if (stack.last().has<Tree>())
  219. last_element_type = ExpressionType;
  220. else if (stack.last().get<Token>().is_pre_merged_binary_operator())
  221. last_element_type = PreMergedBinaryOperatorType;
  222. else if (stack.last().get<Token>().is_unary_operator())
  223. last_element_type = UnaryOperatorType;
  224. else if (stack.last().get<Token>().is_binary_operator())
  225. last_element_type = BinaryOperatorType;
  226. else if (stack.last().get<Token>().is_bracket())
  227. last_element_type = BracketType;
  228. else
  229. VERIFY_NOT_REACHED();
  230. if (token.is_ambiguous_operator()) {
  231. if (token.type == TokenType::AmbiguousMinus)
  232. token.type = last_element_type == ExpressionType ? TokenType::BinaryMinus : TokenType::UnaryMinus;
  233. else
  234. VERIFY_NOT_REACHED();
  235. }
  236. bracket_balance += token.is_opening_bracket();
  237. bracket_balance -= token.is_closing_bracket();
  238. if (bracket_balance < 0)
  239. break;
  240. if (token.type == TokenType::ParenOpen) {
  241. if (last_element_type == ExpressionType) {
  242. // This is a function call.
  243. auto arguments = TRY(parse_function_arguments());
  244. is_consumed = true;
  245. stack.append(Tree { make_ref_counted<FunctionCall>(stack.take_last().get<Tree>(), move(arguments)) });
  246. --bracket_balance;
  247. } else {
  248. // This is just an opening '(' in expression.
  249. stack.append(token);
  250. }
  251. } else if (token.is_pre_merged_binary_operator()) {
  252. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  253. stack.append(token);
  254. } else if (token.is_unary_operator()) {
  255. THROW_PARSE_ERROR_IF(last_element_type == PreMergedBinaryOperatorType);
  256. stack.append(token);
  257. } else if (token.is_binary_operator() || token.is_closing_bracket()) {
  258. if (bracket_balance == 0 && token.type == TokenType::Comma)
  259. break;
  260. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  261. merge_stack(token.precedence());
  262. if (token.is_closing_bracket()) {
  263. THROW_PARSE_ERROR_IF(stack.size() == 1);
  264. THROW_PARSE_ERROR_IF(!stack[stack.size() - 2].get<Token>().matches_with(token));
  265. stack.remove(stack.size() - 2);
  266. merge_pre_merged();
  267. } else {
  268. stack.append(token);
  269. }
  270. } else {
  271. NullableTree expression;
  272. if (token.type == TokenType::Identifier) {
  273. expression = make_ref_counted<UnresolvedReference>(token.data);
  274. } else if (token.type == TokenType::WellKnownValue) {
  275. static constexpr struct {
  276. StringView name;
  277. WellKnownNode::Type type;
  278. } translations[] = {
  279. { "false"sv, WellKnownNode::Type::False },
  280. { "null"sv, WellKnownNode::Type::Null },
  281. { "this"sv, WellKnownNode::Type::This },
  282. { "true"sv, WellKnownNode::Type::True },
  283. { "undefined"sv, WellKnownNode::Type::Undefined },
  284. };
  285. for (auto [name, type] : translations) {
  286. if (token.data == name) {
  287. expression = make_ref_counted<WellKnownNode>(type);
  288. break;
  289. }
  290. }
  291. VERIFY(expression);
  292. } else if (token.type == TokenType::Enumerator) {
  293. expression = m_ctx.translation_unit()->get_node_for_enumerator_value(token.data);
  294. } else if (token.type == TokenType::Number) {
  295. expression = make_ref_counted<MathematicalConstant>(MUST(Crypto::BigFraction::from_string(token.data)));
  296. } else if (token.type == TokenType::String) {
  297. expression = make_ref_counted<StringLiteral>(token.data);
  298. } else {
  299. break;
  300. }
  301. THROW_PARSE_ERROR_IF(last_element_type == ExpressionType);
  302. stack.append(expression.release_nonnull());
  303. merge_pre_merged();
  304. }
  305. if (!is_consumed)
  306. VERIFY(consume_token().has_value());
  307. }
  308. THROW_PARSE_ERROR_IF(stack.is_empty());
  309. merge_stack(closing_bracket_precedence);
  310. THROW_PARSE_ERROR_IF(stack.size() != 1 || !stack[0].has<Tree>());
  311. rollback.disarm();
  312. return stack[0].get<Tree>();
  313. #undef THROW_PARSE_ERROR
  314. #undef THROW_PARSE_ERROR_IF
  315. }
  316. // <condition> :== <expr> | (<expr> is <expr> (or <expr>)?)
  317. TextParseErrorOr<Tree> TextParser::parse_condition()
  318. {
  319. auto rollback = rollback_point();
  320. auto expression = TRY(parse_expression());
  321. if (!consume_token_with_type(TokenType::Is).is_error()) {
  322. Vector compare_values { TRY(parse_expression()) };
  323. if (!consume_word("or"sv).is_error())
  324. compare_values.append(TRY(parse_expression()));
  325. rollback.disarm();
  326. return make_ref_counted<IsOneOfOperation>(expression, move(compare_values));
  327. }
  328. rollback.disarm();
  329. return expression;
  330. }
  331. // return <expr>
  332. TextParseErrorOr<Tree> TextParser::parse_return_statement()
  333. {
  334. auto rollback = rollback_point();
  335. TRY(consume_word("return"sv));
  336. auto return_value = TRY(parse_expression());
  337. rollback.disarm();
  338. return make_ref_counted<ReturnNode>(return_value);
  339. }
  340. // assert: <condition>
  341. TextParseErrorOr<Tree> TextParser::parse_assert()
  342. {
  343. auto rollback = rollback_point();
  344. TRY(consume_token(TokenType::Identifier, "assert"sv));
  345. TRY(consume_token_with_type(TokenType::Colon));
  346. auto condition = TRY(parse_condition());
  347. rollback.disarm();
  348. return make_ref_counted<AssertExpression>(condition);
  349. }
  350. // (let <expr> be <expr>) | (set <expr> to <expr>)
  351. TextParseErrorOr<Tree> TextParser::parse_assignment()
  352. {
  353. auto rollback = rollback_point();
  354. bool is_let = !consume_word("let"sv).is_error();
  355. if (!is_let)
  356. TRY(consume_word("set"sv));
  357. auto lvalue = TRY(parse_expression());
  358. TRY(consume_word(is_let ? "be"sv : "to"sv));
  359. auto rvalue = TRY(parse_expression());
  360. rollback.disarm();
  361. auto op = is_let ? BinaryOperator::Declaration : BinaryOperator::Assignment;
  362. return make_ref_counted<BinaryOperation>(op, lvalue, rvalue);
  363. }
  364. // <simple_step>
  365. TextParseErrorOr<Tree> TextParser::parse_simple_step_or_inline_if_branch()
  366. {
  367. auto rollback = rollback_point();
  368. // Return <expr>.$
  369. if (auto result = parse_return_statement(); !result.is_error()) {
  370. TRY(consume_token_with_type(TokenType::Dot));
  371. TRY(expect_eof());
  372. rollback.disarm();
  373. return result.release_value();
  374. }
  375. // Assert: <expr>.$
  376. if (auto result = parse_assert(); !result.is_error()) {
  377. TRY(consume_token_with_type(TokenType::Dot));
  378. TRY(expect_eof());
  379. rollback.disarm();
  380. return result.release_value();
  381. }
  382. // Let <expr> be <expr>.$
  383. // Set <expr> to <expr>.$
  384. if (auto result = parse_assignment(); !result.is_error()) {
  385. TRY(consume_token_with_type(TokenType::Dot));
  386. TRY(expect_eof());
  387. rollback.disarm();
  388. return result.release_value();
  389. }
  390. return TextParseError {};
  391. }
  392. // <if_condition> :== (If <condition>) | (Else) | (Else if <condition>),
  393. TextParseErrorOr<TextParser::IfConditionParseResult> TextParser::parse_if_beginning()
  394. {
  395. auto rollback = rollback_point();
  396. bool is_if_branch = !consume_word("if"sv).is_error();
  397. NullableTree condition = nullptr;
  398. if (is_if_branch) {
  399. condition = TRY(parse_condition());
  400. } else {
  401. TRY(consume_word("else"sv));
  402. if (!consume_word("if"sv).is_error())
  403. condition = TRY(parse_condition());
  404. }
  405. TRY(consume_token_with_type(TokenType::Comma));
  406. rollback.disarm();
  407. return IfConditionParseResult { is_if_branch, condition };
  408. }
  409. // <inline_if> :== <if_condition> <simple_step>.$
  410. TextParseErrorOr<Tree> TextParser::parse_inline_if_else()
  411. {
  412. auto rollback = rollback_point();
  413. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  414. auto then_branch = TRY(parse_simple_step_or_inline_if_branch());
  415. rollback.disarm();
  416. if (is_if_branch)
  417. return make_ref_counted<IfBranch>(condition.release_nonnull(), then_branch);
  418. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  419. }
  420. // <if> :== <if_condition> then$ <substeps>
  421. TextParseErrorOr<Tree> TextParser::parse_if(Tree then_branch)
  422. {
  423. auto rollback = rollback_point();
  424. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  425. TRY(consume_word("then"sv));
  426. TRY(expect_eof());
  427. rollback.disarm();
  428. if (is_if_branch)
  429. return make_ref_counted<IfBranch>(*condition, then_branch);
  430. else
  431. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  432. }
  433. // <else> :== Else,$ <substeps>
  434. TextParseErrorOr<Tree> TextParser::parse_else(Tree else_branch)
  435. {
  436. auto rollback = rollback_point();
  437. TRY(consume_word("else"sv));
  438. TRY(consume_token_with_type(TokenType::Comma));
  439. TRY(expect_eof());
  440. rollback.disarm();
  441. return make_ref_counted<ElseIfBranch>(nullptr, else_branch);
  442. }
  443. // <simple_step> | <inline_if>
  444. TextParseErrorOr<Tree> TextParser::parse_step_without_substeps()
  445. {
  446. auto rollback = rollback_point();
  447. // <simple_step>
  448. if (auto result = parse_simple_step_or_inline_if_branch(); !result.is_error()) {
  449. rollback.disarm();
  450. return result.release_value();
  451. }
  452. // <inline_if>
  453. if (auto result = parse_inline_if_else(); !result.is_error()) {
  454. rollback.disarm();
  455. return result.release_value();
  456. }
  457. return TextParseError {};
  458. }
  459. // <if> | <else>
  460. TextParseErrorOr<Tree> TextParser::parse_step_with_substeps(Tree substeps)
  461. {
  462. auto rollback = rollback_point();
  463. // <if>
  464. if (auto result = parse_if(substeps); !result.is_error()) {
  465. rollback.disarm();
  466. return result.release_value();
  467. }
  468. // <else>
  469. if (auto result = parse_else(substeps); !result.is_error()) {
  470. rollback.disarm();
  471. return result.release_value();
  472. }
  473. return TextParseError {};
  474. }
  475. TextParseErrorOr<ClauseHeader> TextParser::parse_clause_header()
  476. {
  477. ClauseHeader result;
  478. auto section_number_token = TRY(consume_token_with_type(TokenType::SectionNumber));
  479. result.section_number = section_number_token.data;
  480. ClauseHeader::FunctionDefinition function_definition;
  481. function_definition.name = TRY(consume_token_with_type(TokenType::Word)).data;
  482. TRY(consume_token_with_type(TokenType::ParenOpen));
  483. while (true) {
  484. if (function_definition.arguments.is_empty()) {
  485. auto argument = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Identifier }));
  486. if (argument.type == TokenType::ParenClose)
  487. break;
  488. function_definition.arguments.append({ argument.data });
  489. } else {
  490. function_definition.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier)).data });
  491. }
  492. auto next_token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
  493. if (next_token.type == TokenType::ParenClose)
  494. break;
  495. }
  496. TRY(expect_eof());
  497. result.header = function_definition;
  498. return result;
  499. }
  500. FailedTextParseDiagnostic TextParser::get_diagnostic() const
  501. {
  502. StringBuilder message;
  503. message.append("unexpected "sv);
  504. if (m_max_parsed_tokens == m_tokens.size()) {
  505. message.append("EOF"sv);
  506. } else {
  507. auto token = m_tokens[m_max_parsed_tokens];
  508. if (token.type == TokenType::Word)
  509. message.appendff("'{}'", token.data);
  510. else if (token.type == TokenType::Identifier)
  511. message.appendff("identifier '{}'", token.data);
  512. else
  513. message.append(token.name_for_diagnostic());
  514. }
  515. message.appendff(", expected ");
  516. size_t size = m_suitable_continuations.size();
  517. VERIFY(size > 0);
  518. for (size_t i = 0; i < size; ++i) {
  519. m_suitable_continuations[i].visit(
  520. [&](TokenType type) { message.append(token_info[to_underlying(type)].name_for_diagnostic); },
  521. [&](StringView word) { message.appendff("'{}'", word); },
  522. [&](CustomMessage continuation) { message.append(continuation.message); });
  523. if (i + 1 != size) {
  524. if (size == 2)
  525. message.append(" or "sv);
  526. else if (i + 2 == size)
  527. message.append(", or "sv);
  528. else
  529. message.append(", "sv);
  530. }
  531. }
  532. Location location = Location::global_scope();
  533. if (m_max_parsed_tokens < m_tokens.size()) {
  534. location = m_tokens[m_max_parsed_tokens].location;
  535. } else {
  536. // FIXME: Would be nice to point to the closing tag not the opening one. This is also the
  537. // only place where we use m_location.
  538. location = m_ctx.location_from_xml_offset(m_node->offset);
  539. }
  540. return { location, MUST(message.to_string()) };
  541. }
  542. }