SpecParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/NonnullOwnPtr.h>
  7. #include <LibCore/File.h>
  8. #include <LibXML/Parser/Parser.h>
  9. #include "Function.h"
  10. #include "Parser/Lexer.h"
  11. #include "Parser/SpecParser.h"
  12. #include "Parser/TextParser.h"
  13. #include "Parser/XMLUtils.h"
  14. namespace JSSpecCompiler {
  15. TranslationUnitRef SpecificationParsingContext::translation_unit()
  16. {
  17. return m_translation_unit;
  18. }
  19. DiagnosticEngine& SpecificationParsingContext::diag()
  20. {
  21. return m_translation_unit->diag();
  22. }
  23. template<typename Func>
  24. auto SpecificationParsingContext::with_new_logical_scope(Func&& func)
  25. {
  26. TemporaryChange<RefPtr<LogicalLocation>> change(m_current_logical_scope, make_ref_counted<LogicalLocation>());
  27. return func();
  28. }
  29. LogicalLocation& SpecificationParsingContext::current_logical_scope()
  30. {
  31. return *m_current_logical_scope;
  32. }
  33. template<typename Func>
  34. auto SpecificationParsingContext::with_new_step_list_nesting_level(Func&& func)
  35. {
  36. TemporaryChange change(m_step_list_nesting_level, m_step_list_nesting_level + 1);
  37. return func();
  38. }
  39. int SpecificationParsingContext::step_list_nesting_level() const
  40. {
  41. return m_step_list_nesting_level;
  42. }
  43. Location SpecificationParsingContext::file_scope() const
  44. {
  45. return { .filename = m_translation_unit->filename() };
  46. }
  47. Location SpecificationParsingContext::location_from_xml_offset(LineTrackingLexer::Position position) const
  48. {
  49. return {
  50. .filename = m_translation_unit->filename(),
  51. .line = position.line,
  52. .column = position.column,
  53. .logical_location = m_current_logical_scope,
  54. };
  55. }
  56. Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, XML::Node const* element)
  57. {
  58. VERIFY(element->as_element().name == tag_li);
  59. auto [maybe_tokens, substeps] = tokenize_step(ctx, element);
  60. AlgorithmStep result(ctx);
  61. result.m_node = element;
  62. if (substeps) {
  63. // FIXME: Remove this once macOS Lagom CI updates to Clang >= 16.
  64. auto substeps_copy = substeps;
  65. auto step_list = ctx.with_new_step_list_nesting_level([&] {
  66. return AlgorithmStepList::create(ctx, substeps_copy);
  67. });
  68. result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
  69. }
  70. if (!maybe_tokens.has_value())
  71. return {};
  72. result.m_tokens = maybe_tokens.release_value();
  73. if (!result.parse())
  74. return {};
  75. return result;
  76. }
  77. bool AlgorithmStep::parse()
  78. {
  79. TextParser parser(m_ctx, m_tokens, m_node);
  80. TextParseErrorOr<Tree> parse_result = TextParseError {};
  81. if (m_substeps)
  82. parse_result = parser.parse_step_with_substeps(RefPtr(m_substeps).release_nonnull());
  83. else
  84. parse_result = parser.parse_step_without_substeps();
  85. if (parse_result.is_error()) {
  86. auto [location, message] = parser.get_diagnostic();
  87. m_ctx.diag().error(location, "{}", message);
  88. return false;
  89. } else {
  90. m_expression = parse_result.release_value();
  91. return true;
  92. }
  93. }
  94. Optional<AlgorithmStepList> AlgorithmStepList::create(SpecificationParsingContext& ctx, XML::Node const* element)
  95. {
  96. VERIFY(element->as_element().name == tag_ol);
  97. AlgorithmStepList result;
  98. Vector<Tree> step_expressions;
  99. bool all_steps_parsed = true;
  100. int step_number = 0;
  101. auto const& parent_scope = ctx.current_logical_scope();
  102. for (auto const& child : element->as_element().children) {
  103. child->content.visit(
  104. [&](XML::Node::Element const& element) {
  105. if (element.name == tag_li) {
  106. auto step_creation_result = ctx.with_new_logical_scope([&] {
  107. update_logical_scope_for_step(ctx, parent_scope, step_number);
  108. return AlgorithmStep::create(ctx, child);
  109. });
  110. if (!step_creation_result.has_value())
  111. all_steps_parsed = false;
  112. else
  113. step_expressions.append(step_creation_result.release_value().tree());
  114. ++step_number;
  115. return;
  116. }
  117. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  118. "<{}> should not be a child of algorithm step list"sv, element.name);
  119. },
  120. [&](XML::Node::Text const&) {
  121. if (!contains_empty_text(child)) {
  122. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  123. "non-empty text node should not be a child of algorithm step list");
  124. }
  125. },
  126. [&](auto const&) {});
  127. }
  128. if (!all_steps_parsed)
  129. return {};
  130. result.m_expression = make_ref_counted<TreeList>(move(step_expressions));
  131. return result;
  132. }
  133. void AlgorithmStepList::update_logical_scope_for_step(SpecificationParsingContext& ctx, LogicalLocation const& parent_scope, int step_number)
  134. {
  135. int nesting_level = ctx.step_list_nesting_level();
  136. String list_step_number;
  137. if (nesting_level == 0 || nesting_level == 3) {
  138. list_step_number = MUST(String::formatted("{}", step_number + 1));
  139. } else if (nesting_level == 1 || nesting_level == 4) {
  140. if (step_number < 26)
  141. list_step_number = String::from_code_point('a' + step_number);
  142. else
  143. list_step_number = MUST(String::formatted("{}", step_number + 1));
  144. } else {
  145. list_step_number = MUST(String::from_byte_string(ByteString::roman_number_from(step_number + 1).to_lowercase()));
  146. }
  147. auto& scope = ctx.current_logical_scope();
  148. scope.section = parent_scope.section;
  149. if (parent_scope.step.is_empty())
  150. scope.step = list_step_number;
  151. else
  152. scope.step = MUST(String::formatted("{}.{}", parent_scope.step, list_step_number));
  153. }
  154. Optional<Algorithm> Algorithm::create(SpecificationParsingContext& ctx, XML::Node const* element)
  155. {
  156. VERIFY(element->as_element().name == tag_emu_alg);
  157. Vector<XML::Node const*> steps_list;
  158. for (auto const& child : element->as_element().children) {
  159. child->content.visit(
  160. [&](XML::Node::Element const& element) {
  161. if (element.name == tag_ol) {
  162. steps_list.append(child);
  163. return;
  164. }
  165. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  166. "<{}> should not be a child of <emu-alg>"sv, element.name);
  167. },
  168. [&](XML::Node::Text const&) {
  169. if (!contains_empty_text(child)) {
  170. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  171. "non-empty text node should not be a child of <emu-alg>");
  172. }
  173. },
  174. [&](auto const&) {});
  175. }
  176. if (steps_list.size() != 1) {
  177. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  178. "<emu-alg> should have exactly one <ol> child"sv);
  179. return {};
  180. }
  181. auto steps_creation_result = AlgorithmStepList::create(ctx, steps_list[0]);
  182. if (steps_creation_result.has_value()) {
  183. Algorithm algorithm;
  184. algorithm.m_tree = steps_creation_result.release_value().tree();
  185. return algorithm;
  186. }
  187. return {};
  188. }
  189. NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
  190. {
  191. return ctx.with_new_logical_scope([&] {
  192. VERIFY(element->as_element().name == tag_emu_clause);
  193. SpecificationClause specification_clause(ctx);
  194. specification_clause.parse(element);
  195. OwnPtr<SpecificationClause> result;
  196. specification_clause.m_header.header.visit(
  197. [&](AK::Empty const&) {
  198. result = make<SpecificationClause>(move(specification_clause));
  199. },
  200. [&](OneOf<ClauseHeader::AbstractOperation, ClauseHeader::Accessor> auto const&) {
  201. result = make<SpecFunction>(move(specification_clause));
  202. });
  203. if (!result->post_initialize(element))
  204. result = make<SpecificationClause>(move(*result));
  205. return result.release_nonnull();
  206. });
  207. }
  208. void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
  209. {
  210. do_collect(translation_unit);
  211. for (auto& subclause : m_subclauses)
  212. subclause->collect_into(translation_unit);
  213. }
  214. Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node const* element)
  215. {
  216. auto& ctx = *m_ctx_pointer;
  217. VERIFY(element->as_element().name == tag_h1);
  218. auto maybe_tokens = tokenize_header(ctx, element);
  219. if (!maybe_tokens.has_value())
  220. return {};
  221. auto const& tokens = maybe_tokens.release_value();
  222. TextParser parser(ctx, tokens, element);
  223. auto parse_result = parser.parse_clause_header();
  224. if (parse_result.is_error()) {
  225. // Still try to at least scavenge section number.
  226. if (tokens.size() && tokens[0].type == TokenType::SectionNumber)
  227. ctx.current_logical_scope().section = MUST(String::from_utf8(tokens[0].data));
  228. return parser.get_diagnostic();
  229. }
  230. m_header = parse_result.release_value();
  231. ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
  232. return {};
  233. }
  234. void SpecificationClause::parse(XML::Node const* element)
  235. {
  236. auto& ctx = context();
  237. u32 child_index = 0;
  238. bool node_ignored_warning_issued = false;
  239. Optional<FailedTextParseDiagnostic> header_parse_error;
  240. for (auto const& child : element->as_element().children) {
  241. child->content.visit(
  242. [&](XML::Node::Element const& element) {
  243. if (child_index == 0) {
  244. if (element.name != tag_h1) {
  245. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  246. "<h1> must be the first child of <emu-clause>");
  247. return;
  248. }
  249. header_parse_error = parse_header(child);
  250. } else {
  251. if (element.name == tag_h1) {
  252. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  253. "<h1> can only be the first child of <emu-clause>");
  254. return;
  255. }
  256. if (element.name == tag_emu_clause) {
  257. m_subclauses.append(create(ctx, child));
  258. return;
  259. }
  260. if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) {
  261. node_ignored_warning_issued = true;
  262. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  263. "node content will be ignored since section header was not parsed successfully");
  264. if (header_parse_error.has_value())
  265. ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
  266. }
  267. }
  268. ++child_index;
  269. },
  270. [&](XML::Node::Text const&) {
  271. if (!contains_empty_text(child)) {
  272. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  273. "non-empty text node should not be a child of <emu-clause>");
  274. }
  275. },
  276. [&](auto) {});
  277. }
  278. }
  279. bool SpecFunction::post_initialize(XML::Node const* element)
  280. {
  281. VERIFY(element->as_element().name == tag_emu_clause);
  282. auto& ctx = context();
  283. auto maybe_id = get_attribute_by_name(element, attribute_id);
  284. if (!maybe_id.has_value()) {
  285. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  286. "no id attribute");
  287. } else {
  288. m_id = maybe_id.value();
  289. }
  290. m_header.header.visit(
  291. [&](ClauseHeader::AbstractOperation const& abstract_operation) {
  292. auto maybe_abstract_operation_id = get_attribute_by_name(element, attribute_aoid);
  293. if (maybe_abstract_operation_id.has_value())
  294. m_name = MUST(String::from_utf8(maybe_abstract_operation_id.value()));
  295. auto const& [function_name, arguments] = abstract_operation;
  296. m_arguments = arguments;
  297. if (m_name != function_name) {
  298. ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
  299. "function name in header and <emu-clause>[aoid] do not match");
  300. }
  301. },
  302. [&](ClauseHeader::Accessor const& accessor) {
  303. m_name = MUST(String::formatted("%get {}%", MUST(String::join("."sv, accessor.qualified_name))));
  304. },
  305. [&](auto const&) {
  306. VERIFY_NOT_REACHED();
  307. });
  308. Vector<XML::Node const*> algorithm_nodes;
  309. for (auto const& child : element->as_element().children) {
  310. child->content.visit(
  311. [&](XML::Node::Element const& element) {
  312. if (element.name == tag_h1) {
  313. // Processed in SpecificationClause
  314. } else if (element.name == tag_p) {
  315. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  316. "prose is ignored");
  317. } else if (element.name == tag_emu_alg) {
  318. algorithm_nodes.append(child);
  319. } else {
  320. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  321. "<{}> should not be a child of <emu-clause> specifing function"sv, element.name);
  322. }
  323. },
  324. [&](auto const&) {});
  325. }
  326. if (algorithm_nodes.size() != 1) {
  327. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  328. "<emu-clause> specifing function should have exactly one <emu-alg> child"sv);
  329. return false;
  330. }
  331. auto maybe_algorithm = Algorithm::create(ctx, algorithm_nodes[0]);
  332. if (maybe_algorithm.has_value()) {
  333. m_algorithm = maybe_algorithm.release_value();
  334. return true;
  335. } else {
  336. return false;
  337. }
  338. }
  339. void SpecFunction::do_collect(TranslationUnitRef translation_unit)
  340. {
  341. translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.tree(), move(m_arguments)));
  342. }
  343. NonnullOwnPtr<Specification> Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
  344. {
  345. VERIFY(element->as_element().name == tag_specification);
  346. auto specification = make<Specification>();
  347. specification->parse(ctx, element);
  348. return specification;
  349. }
  350. void Specification::collect_into(TranslationUnitRef translation_unit)
  351. {
  352. for (auto& clause : m_clauses)
  353. clause->collect_into(translation_unit);
  354. }
  355. void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  356. {
  357. for (auto const& child : element->as_element().children) {
  358. child->content.visit(
  359. [&](XML::Node::Element const& element) {
  360. if (element.name == tag_emu_intro) {
  361. // Introductory comments are ignored.
  362. } else if (element.name == tag_emu_clause) {
  363. m_clauses.append(SpecificationClause::create(ctx, child));
  364. } else if (element.name == tag_emu_import) {
  365. parse(ctx, child);
  366. } else {
  367. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  368. "<{}> should not be a child of <specification>", element.name);
  369. }
  370. },
  371. [&](XML::Node::Text const&) {
  372. if (!contains_empty_text(child)) {
  373. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  374. "non-empty text node should not be a child of <specification>");
  375. }
  376. },
  377. [&](auto) {});
  378. }
  379. }
  380. SpecParsingStep::SpecParsingStep()
  381. : CompilationStep("parser"sv)
  382. {
  383. }
  384. SpecParsingStep::~SpecParsingStep() = default;
  385. void SpecParsingStep::run(TranslationUnitRef translation_unit)
  386. {
  387. SpecificationParsingContext ctx(translation_unit);
  388. auto filename = translation_unit->filename();
  389. auto file_or_error = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
  390. if (file_or_error.is_error()) {
  391. ctx.diag().fatal_error(Location::global_scope(),
  392. "unable to open '{}': {}", filename, file_or_error.error());
  393. return;
  394. }
  395. auto input_or_error = file_or_error.value()->read_until_eof();
  396. if (input_or_error.is_error()) {
  397. ctx.diag().fatal_error(Location::global_scope(),
  398. "unable to read '{}': {}", filename, input_or_error.error());
  399. return;
  400. }
  401. m_input = input_or_error.release_value();
  402. XML::Parser parser { m_input };
  403. auto document_or_error = parser.parse();
  404. if (document_or_error.is_error()) {
  405. ctx.diag().fatal_error(ctx.file_scope(),
  406. "XML::Parser failed to parse input: {}", document_or_error.error());
  407. ctx.diag().note(ctx.file_scope(),
  408. "since XML::Parser backtracks on error, the message above is likely to point to the "
  409. "first tag in the input - use external XML verifier to find out the exact cause of error");
  410. return;
  411. }
  412. m_document = make<XML::Document>(document_or_error.release_value());
  413. auto const& root = m_document->root();
  414. if (!root.is_element() || root.as_element().name != tag_specification) {
  415. ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
  416. "document root must be <specification> tag");
  417. return;
  418. }
  419. m_specification = Specification::create(ctx, &root);
  420. m_specification->collect_into(translation_unit);
  421. }
  422. }