TextParser.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include "Parser/SpecParser.h"
  8. #include "Parser/TextParser.h"
  9. namespace JSSpecCompiler {
  10. void TextParser::save_error(Variant<TokenType, StringView, CustomMessage>&& expected)
  11. {
  12. if (m_max_parsed_tokens > m_next_token_index)
  13. return;
  14. if (m_max_parsed_tokens < m_next_token_index)
  15. m_suitable_continuations.clear();
  16. m_max_parsed_tokens = m_next_token_index;
  17. m_suitable_continuations.append(move(expected));
  18. }
  19. void TextParser::retreat()
  20. {
  21. --m_next_token_index;
  22. }
  23. auto TextParser::rollback_point()
  24. {
  25. return ArmedScopeGuard {
  26. [this, index = this->m_next_token_index] {
  27. m_next_token_index = index;
  28. }
  29. };
  30. }
  31. Optional<Token> TextParser::peek_token()
  32. {
  33. if (m_next_token_index == m_tokens.size())
  34. return {};
  35. return m_tokens[m_next_token_index];
  36. }
  37. Optional<Token> TextParser::consume_token()
  38. {
  39. auto result = peek_token();
  40. if (result.has_value())
  41. ++m_next_token_index;
  42. return result;
  43. }
  44. TextParseErrorOr<Token> TextParser::consume_token_with_one_of_types(std::initializer_list<TokenType> types)
  45. {
  46. auto token = peek_token();
  47. if (token.has_value()) {
  48. for (TokenType type : types) {
  49. if (token->type == type) {
  50. (void)consume_token();
  51. return *token;
  52. } else {
  53. save_error(type);
  54. }
  55. }
  56. } else {
  57. for (TokenType type : types)
  58. save_error(type);
  59. }
  60. return TextParseError {};
  61. }
  62. TextParseErrorOr<Token> TextParser::consume_token_with_type(TokenType type)
  63. {
  64. return consume_token_with_one_of_types({ type });
  65. }
  66. TextParseErrorOr<void> TextParser::consume_token(TokenType type, StringView data)
  67. {
  68. auto token = consume_token();
  69. if (!token.has_value() || token->type != type || !token->data.equals_ignoring_ascii_case(data)) {
  70. retreat();
  71. save_error(data);
  72. return TextParseError {};
  73. }
  74. return {};
  75. }
  76. TextParseErrorOr<void> TextParser::consume_word(StringView word)
  77. {
  78. auto token = consume_token();
  79. if (!token.has_value() || token->type != TokenType::Word || !token->data.equals_ignoring_ascii_case(word)) {
  80. retreat();
  81. save_error(word);
  82. return TextParseError {};
  83. }
  84. return {};
  85. }
  86. TextParseErrorOr<void> TextParser::consume_words(std::initializer_list<StringView> words)
  87. {
  88. for (auto word : words)
  89. TRY(consume_word(word));
  90. return {};
  91. }
  92. bool TextParser::is_eof() const
  93. {
  94. return m_next_token_index == m_tokens.size();
  95. }
  96. TextParseErrorOr<void> TextParser::expect_eof()
  97. {
  98. if (!is_eof()) {
  99. save_error(CustomMessage { "EOF"sv });
  100. return TextParseError {};
  101. }
  102. return {};
  103. }
  104. // (the)? <record_name> { (<name>: <value>,)* }
  105. TextParseErrorOr<Tree> TextParser::parse_record_direct_list_initialization()
  106. {
  107. auto rollback = rollback_point();
  108. (void)consume_word("the"sv);
  109. auto identifier = TRY(consume_token_with_type(TokenType::Identifier));
  110. TRY(consume_token_with_type(TokenType::BraceOpen));
  111. Vector<RecordDirectListInitialization::Argument> arguments;
  112. while (true) {
  113. auto name = TRY(consume_token_with_one_of_types({ TokenType::Identifier, TokenType::BraceClose }));
  114. if (name.is_bracket()) {
  115. break;
  116. } else {
  117. TRY(consume_token_with_type(TokenType::Colon));
  118. auto value = TRY(parse_expression());
  119. (void)consume_token_with_type(TokenType::Comma);
  120. arguments.append({ make_ref_counted<UnresolvedReference>(name.data), value });
  121. }
  122. }
  123. rollback.disarm();
  124. return make_ref_counted<RecordDirectListInitialization>(
  125. make_ref_counted<UnresolvedReference>(identifier.data), move(arguments));
  126. }
  127. // <expr>
  128. TextParseErrorOr<Tree> TextParser::parse_expression()
  129. {
  130. auto rollback = rollback_point();
  131. if (auto record_init = parse_record_direct_list_initialization(); !record_init.is_error()) {
  132. rollback.disarm();
  133. return record_init.release_value();
  134. }
  135. #define THROW_PARSE_ERROR_IF(expr) \
  136. do { \
  137. if (expr) { \
  138. save_error(CustomMessage { "valid expression continuation (not valid because " #expr ")"##sv }); \
  139. return TextParseError {}; \
  140. } \
  141. } while (false)
  142. #define THROW_PARSE_ERROR THROW_PARSE_ERROR_IF(true)
  143. Vector<Variant<Tree, Token>> stack;
  144. auto merge_stack = [&](i32 precedence) {
  145. if (!stack.last().has<Tree>())
  146. return;
  147. while (stack.size() >= 2) {
  148. auto const& maybe_operator = stack[stack.size() - 2];
  149. if (!maybe_operator.has<Token>())
  150. break;
  151. auto last_operator = maybe_operator.get<Token>();
  152. auto right = stack.last().get<Tree>();
  153. if (last_operator.is_unary_operator()) {
  154. auto operation = make_ref_counted<UnaryOperation>(last_operator.as_unary_operator(), right);
  155. stack.shrink(stack.size() - 2);
  156. stack.empend(operation);
  157. } else if (last_operator.is_binary_operator() && last_operator.precedence() < precedence) {
  158. auto left = stack[stack.size() - 3].get<Tree>();
  159. auto operation = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), left, right);
  160. stack.shrink(stack.size() - 3);
  161. stack.empend(operation);
  162. } else {
  163. break;
  164. }
  165. }
  166. };
  167. auto merge_pre_merged = [&] {
  168. if (stack.size() < 3)
  169. return;
  170. auto const& maybe_left = stack[stack.size() - 3];
  171. auto const& maybe_operator = stack[stack.size() - 2];
  172. auto const& maybe_right = stack.last();
  173. if (!maybe_left.has<Tree>() || !maybe_operator.has<Token>() || !maybe_right.has<Tree>())
  174. return;
  175. auto last_operator = maybe_operator.get<Token>();
  176. if (!last_operator.is_pre_merged_binary_operator())
  177. return;
  178. auto expression = make_ref_counted<BinaryOperation>(last_operator.as_binary_operator(), maybe_left.get<Tree>(), maybe_right.get<Tree>());
  179. stack.shrink(stack.size() - 3);
  180. stack.empend(expression);
  181. };
  182. i32 bracket_balance = 0;
  183. while (true) {
  184. auto token_or_error = peek_token();
  185. if (!token_or_error.has_value())
  186. break;
  187. auto token = token_or_error.release_value();
  188. enum {
  189. NoneType,
  190. ExpressionType,
  191. PreMergedBinaryOperatorType,
  192. UnaryOperatorType,
  193. BinaryOperatorType,
  194. BracketType,
  195. } last_element_type;
  196. if (stack.is_empty())
  197. last_element_type = NoneType;
  198. else if (stack.last().has<Tree>())
  199. last_element_type = ExpressionType;
  200. else if (stack.last().get<Token>().is_pre_merged_binary_operator())
  201. last_element_type = PreMergedBinaryOperatorType;
  202. else if (stack.last().get<Token>().is_unary_operator())
  203. last_element_type = UnaryOperatorType;
  204. else if (stack.last().get<Token>().is_binary_operator())
  205. last_element_type = BinaryOperatorType;
  206. else if (stack.last().get<Token>().is_bracket())
  207. last_element_type = BracketType;
  208. else
  209. VERIFY_NOT_REACHED();
  210. if (token.is_ambiguous_operator()) {
  211. if (token.type == TokenType::AmbiguousMinus)
  212. token.type = last_element_type == ExpressionType ? TokenType::BinaryMinus : TokenType::UnaryMinus;
  213. else
  214. VERIFY_NOT_REACHED();
  215. }
  216. bracket_balance += token.is_opening_bracket();
  217. bracket_balance -= token.is_closing_bracket();
  218. if (bracket_balance < 0)
  219. break;
  220. if (token.type == TokenType::ParenOpen) {
  221. if (last_element_type == ExpressionType)
  222. stack.append(Token { TokenType::FunctionCall, ""sv, token.node, token.location });
  223. stack.append(token);
  224. if (m_next_token_index + 1 < m_tokens.size()
  225. && m_tokens[m_next_token_index + 1].type == TokenType::ParenClose) {
  226. // This is a call to function which does not take parameters. We cannot handle it
  227. // normally since we need text between parenthesis to be a valid expression. As a
  228. // workaround, we push an artificial tree to stack to act as an argument (it'll be
  229. // removed later during function call canonicalization).
  230. stack.append(zero_argument_function_call);
  231. }
  232. } else if (token.is_pre_merged_binary_operator()) {
  233. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  234. stack.append(token);
  235. } else if (token.is_unary_operator()) {
  236. THROW_PARSE_ERROR_IF(last_element_type == PreMergedBinaryOperatorType);
  237. stack.append(token);
  238. } else if (token.is_binary_operator() || token.is_closing_bracket()) {
  239. if (bracket_balance == 0 && token.type == TokenType::Comma)
  240. break;
  241. THROW_PARSE_ERROR_IF(last_element_type != ExpressionType);
  242. merge_stack(token.precedence());
  243. if (token.is_closing_bracket()) {
  244. THROW_PARSE_ERROR_IF(stack.size() == 1);
  245. THROW_PARSE_ERROR_IF(!stack[stack.size() - 2].get<Token>().matches_with(token));
  246. stack.remove(stack.size() - 2);
  247. merge_pre_merged();
  248. } else {
  249. stack.append(token);
  250. }
  251. } else {
  252. NullableTree expression;
  253. if (token.type == TokenType::Identifier) {
  254. expression = make_ref_counted<UnresolvedReference>(token.data);
  255. } else if (token.type == TokenType::Number) {
  256. expression = make_ref_counted<MathematicalConstant>(token.data.to_number<i64>().value());
  257. } else if (token.type == TokenType::String) {
  258. expression = make_ref_counted<StringLiteral>(token.data);
  259. } else {
  260. break;
  261. }
  262. THROW_PARSE_ERROR_IF(last_element_type == ExpressionType);
  263. stack.append(expression.release_nonnull());
  264. merge_pre_merged();
  265. }
  266. VERIFY(consume_token().has_value());
  267. }
  268. THROW_PARSE_ERROR_IF(stack.is_empty());
  269. merge_stack(closing_bracket_precedence);
  270. THROW_PARSE_ERROR_IF(stack.size() != 1 || !stack[0].has<Tree>());
  271. rollback.disarm();
  272. return stack[0].get<Tree>();
  273. #undef THROW_PARSE_ERROR
  274. #undef THROW_PARSE_ERROR_IF
  275. }
  276. // <condition> :== <expr> | (<expr> is <expr> (or <expr>)?)
  277. TextParseErrorOr<Tree> TextParser::parse_condition()
  278. {
  279. auto rollback = rollback_point();
  280. auto expression = TRY(parse_expression());
  281. if (!consume_token_with_type(TokenType::Is).is_error()) {
  282. Vector compare_values { TRY(parse_expression()) };
  283. if (!consume_word("or"sv).is_error())
  284. compare_values.append(TRY(parse_expression()));
  285. rollback.disarm();
  286. return make_ref_counted<IsOneOfOperation>(expression, move(compare_values));
  287. }
  288. rollback.disarm();
  289. return expression;
  290. }
  291. // return <expr>
  292. TextParseErrorOr<Tree> TextParser::parse_return_statement()
  293. {
  294. auto rollback = rollback_point();
  295. TRY(consume_word("return"sv));
  296. auto return_value = TRY(parse_expression());
  297. rollback.disarm();
  298. return make_ref_counted<ReturnNode>(return_value);
  299. }
  300. // assert: <condition>
  301. TextParseErrorOr<Tree> TextParser::parse_assert()
  302. {
  303. auto rollback = rollback_point();
  304. TRY(consume_token(TokenType::Identifier, "assert"sv));
  305. TRY(consume_token_with_type(TokenType::Colon));
  306. auto condition = TRY(parse_condition());
  307. rollback.disarm();
  308. return make_ref_counted<AssertExpression>(condition);
  309. }
  310. // (let <expr> be <expr>) | (set <expr> to <expr>)
  311. TextParseErrorOr<Tree> TextParser::parse_assignment()
  312. {
  313. auto rollback = rollback_point();
  314. bool is_let = !consume_word("let"sv).is_error();
  315. if (!is_let)
  316. TRY(consume_word("set"sv));
  317. auto lvalue = TRY(parse_expression());
  318. TRY(consume_word(is_let ? "be"sv : "to"sv));
  319. auto rvalue = TRY(parse_expression());
  320. rollback.disarm();
  321. auto op = is_let ? BinaryOperator::Declaration : BinaryOperator::Assignment;
  322. return make_ref_counted<BinaryOperation>(op, lvalue, rvalue);
  323. }
  324. // <simple_step>
  325. TextParseErrorOr<Tree> TextParser::parse_simple_step_or_inline_if_branch()
  326. {
  327. auto rollback = rollback_point();
  328. // Return <expr>.$
  329. if (auto result = parse_return_statement(); !result.is_error()) {
  330. TRY(consume_token_with_type(TokenType::Dot));
  331. TRY(expect_eof());
  332. rollback.disarm();
  333. return result.release_value();
  334. }
  335. // Assert: <expr>.$
  336. if (auto result = parse_assert(); !result.is_error()) {
  337. TRY(consume_token_with_type(TokenType::Dot));
  338. TRY(expect_eof());
  339. rollback.disarm();
  340. return result.release_value();
  341. }
  342. // Let <expr> be <expr>.$
  343. // Set <expr> to <expr>.$
  344. if (auto result = parse_assignment(); !result.is_error()) {
  345. TRY(consume_token_with_type(TokenType::Dot));
  346. TRY(expect_eof());
  347. rollback.disarm();
  348. return result.release_value();
  349. }
  350. return TextParseError {};
  351. }
  352. // <if_condition> :== (If <condition>) | (Else) | (Else if <condition>),
  353. TextParseErrorOr<TextParser::IfConditionParseResult> TextParser::parse_if_beginning()
  354. {
  355. auto rollback = rollback_point();
  356. bool is_if_branch = !consume_word("if"sv).is_error();
  357. NullableTree condition = nullptr;
  358. if (is_if_branch) {
  359. condition = TRY(parse_condition());
  360. } else {
  361. TRY(consume_word("else"sv));
  362. if (!consume_word("if"sv).is_error())
  363. condition = TRY(parse_condition());
  364. }
  365. TRY(consume_token_with_type(TokenType::Comma));
  366. rollback.disarm();
  367. return IfConditionParseResult { is_if_branch, condition };
  368. }
  369. // <inline_if> :== <if_condition> <simple_step>.$
  370. TextParseErrorOr<Tree> TextParser::parse_inline_if_else()
  371. {
  372. auto rollback = rollback_point();
  373. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  374. auto then_branch = TRY(parse_simple_step_or_inline_if_branch());
  375. rollback.disarm();
  376. if (is_if_branch)
  377. return make_ref_counted<IfBranch>(condition.release_nonnull(), then_branch);
  378. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  379. }
  380. // <if> :== <if_condition> then$ <substeps>
  381. TextParseErrorOr<Tree> TextParser::parse_if(Tree then_branch)
  382. {
  383. auto rollback = rollback_point();
  384. auto [is_if_branch, condition] = TRY(parse_if_beginning());
  385. TRY(consume_word("then"sv));
  386. TRY(expect_eof());
  387. rollback.disarm();
  388. if (is_if_branch)
  389. return make_ref_counted<IfBranch>(*condition, then_branch);
  390. else
  391. return make_ref_counted<ElseIfBranch>(condition, then_branch);
  392. }
  393. // <else> :== Else,$ <substeps>
  394. TextParseErrorOr<Tree> TextParser::parse_else(Tree else_branch)
  395. {
  396. auto rollback = rollback_point();
  397. TRY(consume_word("else"sv));
  398. TRY(consume_token_with_type(TokenType::Comma));
  399. TRY(expect_eof());
  400. rollback.disarm();
  401. return make_ref_counted<ElseIfBranch>(nullptr, else_branch);
  402. }
  403. // <simple_step> | <inline_if>
  404. TextParseErrorOr<Tree> TextParser::parse_step_without_substeps()
  405. {
  406. auto rollback = rollback_point();
  407. // <simple_step>
  408. if (auto result = parse_simple_step_or_inline_if_branch(); !result.is_error()) {
  409. rollback.disarm();
  410. return result.release_value();
  411. }
  412. // <inline_if>
  413. if (auto result = parse_inline_if_else(); !result.is_error()) {
  414. rollback.disarm();
  415. return result.release_value();
  416. }
  417. return TextParseError {};
  418. }
  419. // <if> | <else>
  420. TextParseErrorOr<Tree> TextParser::parse_step_with_substeps(Tree substeps)
  421. {
  422. auto rollback = rollback_point();
  423. // <if>
  424. if (auto result = parse_if(substeps); !result.is_error()) {
  425. rollback.disarm();
  426. return result.release_value();
  427. }
  428. // <else>
  429. if (auto result = parse_else(substeps); !result.is_error()) {
  430. rollback.disarm();
  431. return result.release_value();
  432. }
  433. return TextParseError {};
  434. }
  435. TextParseErrorOr<ClauseHeader> TextParser::parse_clause_header()
  436. {
  437. ClauseHeader result;
  438. auto section_number_token = TRY(consume_token_with_type(TokenType::SectionNumber));
  439. result.section_number = section_number_token.data;
  440. ClauseHeader::FunctionDefinition function_definition;
  441. function_definition.name = TRY(consume_token_with_type(TokenType::Word)).data;
  442. TRY(consume_token_with_type(TokenType::ParenOpen));
  443. while (true) {
  444. if (function_definition.arguments.is_empty()) {
  445. auto argument = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Identifier }));
  446. if (argument.type == TokenType::ParenClose)
  447. break;
  448. function_definition.arguments.append({ argument.data });
  449. } else {
  450. function_definition.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier)).data });
  451. }
  452. auto next_token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
  453. if (next_token.type == TokenType::ParenClose)
  454. break;
  455. }
  456. TRY(expect_eof());
  457. result.header = function_definition;
  458. return result;
  459. }
  460. FailedTextParseDiagnostic TextParser::get_diagnostic() const
  461. {
  462. StringBuilder message;
  463. message.append("unexpected "sv);
  464. if (m_max_parsed_tokens == m_tokens.size()) {
  465. message.append("EOF"sv);
  466. } else {
  467. auto token = m_tokens[m_max_parsed_tokens];
  468. if (token.type == TokenType::Word)
  469. message.appendff("'{}'", token.data);
  470. else if (token.type == TokenType::Identifier)
  471. message.appendff("identifier '{}'", token.data);
  472. else
  473. message.append(token.name_for_diagnostic());
  474. }
  475. message.appendff(", expected ");
  476. size_t size = m_suitable_continuations.size();
  477. VERIFY(size > 0);
  478. for (size_t i = 0; i < size; ++i) {
  479. m_suitable_continuations[i].visit(
  480. [&](TokenType type) { message.append(token_info[to_underlying(type)].name_for_diagnostic); },
  481. [&](StringView word) { message.appendff("'{}'", word); },
  482. [&](CustomMessage continuation) { message.append(continuation.message); });
  483. if (i + 1 != size) {
  484. if (size == 2)
  485. message.append(" or "sv);
  486. else if (i + 2 == size)
  487. message.append(", or "sv);
  488. else
  489. message.append(", "sv);
  490. }
  491. }
  492. Location location = Location::global_scope();
  493. if (m_max_parsed_tokens < m_tokens.size()) {
  494. location = m_tokens[m_max_parsed_tokens].location;
  495. } else {
  496. // FIXME: Would be nice to point to the closing tag not the opening one. This is also the
  497. // only place where we use m_location.
  498. location = m_ctx.location_from_xml_offset(m_node->offset);
  499. }
  500. return { location, MUST(message.to_string()) };
  501. }
  502. }