SpecParser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/NonnullOwnPtr.h>
  7. #include <LibCore/File.h>
  8. #include <LibXML/Parser/Parser.h>
  9. #include "Function.h"
  10. #include "Parser/Lexer.h"
  11. #include "Parser/SpecParser.h"
  12. #include "Parser/TextParser.h"
  13. #include "Parser/XMLUtils.h"
  14. namespace JSSpecCompiler {
  15. DiagnosticEngine& SpecificationParsingContext::diag()
  16. {
  17. return m_translation_unit->diag();
  18. }
  19. template<typename Func>
  20. auto SpecificationParsingContext::with_new_logical_scope(Func&& func)
  21. {
  22. TemporaryChange<RefPtr<LogicalLocation>> change(m_current_logical_scope, make_ref_counted<LogicalLocation>());
  23. return func();
  24. }
  25. LogicalLocation& SpecificationParsingContext::current_logical_scope()
  26. {
  27. return *m_current_logical_scope;
  28. }
  29. template<typename Func>
  30. auto SpecificationParsingContext::with_new_step_list_nesting_level(Func&& func)
  31. {
  32. TemporaryChange change(m_step_list_nesting_level, m_step_list_nesting_level + 1);
  33. return func();
  34. }
  35. int SpecificationParsingContext::step_list_nesting_level() const
  36. {
  37. return m_step_list_nesting_level;
  38. }
  39. Location SpecificationParsingContext::file_scope() const
  40. {
  41. return { .filename = m_translation_unit->filename() };
  42. }
  43. Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const
  44. {
  45. return {
  46. .filename = m_translation_unit->filename(),
  47. .line = offset.line,
  48. .column = offset.column,
  49. .logical_location = m_current_logical_scope,
  50. };
  51. }
  52. Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, XML::Node const* element)
  53. {
  54. VERIFY(element->as_element().name == tag_li);
  55. auto tokenization_result = tokenize_tree(element, true);
  56. if (tokenization_result.is_error()) {
  57. ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
  58. "{}", tokenization_result.error()->to_string());
  59. return {};
  60. }
  61. auto [tokens, substeps] = tokenization_result.release_value();
  62. AlgorithmStep result { .m_tokens = move(tokens), .m_node = element };
  63. if (substeps) {
  64. // FIXME: Remove this once macOS Lagom CI updates to Clang >= 16.
  65. auto substeps_copy = substeps;
  66. auto step_list = ctx.with_new_step_list_nesting_level([&] {
  67. return AlgorithmStepList::create(ctx, substeps_copy);
  68. });
  69. if (step_list.has_value())
  70. result.m_substeps = step_list->m_expression;
  71. }
  72. auto parse_result = result.parse();
  73. if (parse_result.is_error()) {
  74. ctx.diag().error(ctx.location_from_xml_offset(parse_result.error()->offset()),
  75. "{}", parse_result.error()->to_string());
  76. return {};
  77. }
  78. result.m_expression = parse_result.release_value();
  79. return result;
  80. }
  81. ParseErrorOr<Tree> AlgorithmStep::parse()
  82. {
  83. TextParser parser(m_tokens, m_node);
  84. if (m_substeps)
  85. return parser.parse_step_with_substeps(RefPtr(m_substeps).release_nonnull());
  86. else
  87. return parser.parse_step_without_substeps();
  88. }
  89. Optional<AlgorithmStepList> AlgorithmStepList::create(SpecificationParsingContext& ctx, XML::Node const* element)
  90. {
  91. VERIFY(element->as_element().name == tag_ol);
  92. AlgorithmStepList result;
  93. auto& steps = result.m_steps;
  94. Vector<Tree> step_expressions;
  95. bool all_steps_parsed = true;
  96. int step_number = 0;
  97. auto const& parent_scope = ctx.current_logical_scope();
  98. for (auto const& child : element->as_element().children) {
  99. child->content.visit(
  100. [&](XML::Node::Element const& element) {
  101. if (element.name == tag_li) {
  102. auto step_creation_result = ctx.with_new_logical_scope([&] {
  103. update_logical_scope_for_step(ctx, parent_scope, step_number);
  104. return AlgorithmStep::create(ctx, child);
  105. });
  106. if (!step_creation_result.has_value()) {
  107. all_steps_parsed = false;
  108. } else {
  109. steps.append(step_creation_result.release_value());
  110. step_expressions.append(steps.last().m_expression);
  111. }
  112. ++step_number;
  113. return;
  114. }
  115. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  116. "<{}> should not be a child of algorithm step list"sv, element.name);
  117. },
  118. [&](XML::Node::Text const&) {
  119. if (!contains_empty_text(child)) {
  120. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  121. "non-empty text node should not be a child of algorithm step list");
  122. }
  123. },
  124. [&](auto const&) {});
  125. }
  126. if (!all_steps_parsed)
  127. return {};
  128. result.m_expression = make_ref_counted<TreeList>(move(step_expressions));
  129. return result;
  130. }
  131. void AlgorithmStepList::update_logical_scope_for_step(SpecificationParsingContext& ctx, LogicalLocation const& parent_scope, int step_number)
  132. {
  133. int nesting_level = ctx.step_list_nesting_level();
  134. String list_step_number;
  135. if (nesting_level == 0 || nesting_level == 3) {
  136. list_step_number = MUST(String::formatted("{}", step_number + 1));
  137. } else if (nesting_level == 1 || nesting_level == 4) {
  138. if (step_number < 26)
  139. list_step_number = String::from_code_point('a' + step_number);
  140. else
  141. list_step_number = MUST(String::formatted("{}", step_number + 1));
  142. } else {
  143. list_step_number = MUST(String::from_byte_string(ByteString::roman_number_from(step_number + 1).to_lowercase()));
  144. }
  145. auto& scope = ctx.current_logical_scope();
  146. scope.section = parent_scope.section;
  147. if (parent_scope.step.is_empty())
  148. scope.step = list_step_number;
  149. else
  150. scope.step = MUST(String::formatted("{}.{}", parent_scope.step, list_step_number));
  151. }
  152. Optional<Algorithm> Algorithm::create(SpecificationParsingContext& ctx, XML::Node const* element)
  153. {
  154. VERIFY(element->as_element().name == tag_emu_alg);
  155. Vector<XML::Node const*> steps_list;
  156. for (auto const& child : element->as_element().children) {
  157. child->content.visit(
  158. [&](XML::Node::Element const& element) {
  159. if (element.name == tag_ol) {
  160. steps_list.append(child);
  161. return;
  162. }
  163. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  164. "<{}> should not be a child of <emu-alg>"sv, element.name);
  165. },
  166. [&](XML::Node::Text const&) {
  167. if (!contains_empty_text(child)) {
  168. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  169. "non-empty text node should not be a child of <emu-alg>");
  170. }
  171. },
  172. [&](auto const&) {});
  173. }
  174. if (steps_list.size() != 1) {
  175. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  176. "<emu-alg> should have exactly one <ol> child"sv);
  177. return {};
  178. }
  179. auto steps_creation_result = AlgorithmStepList::create(ctx, steps_list[0]);
  180. if (steps_creation_result.has_value()) {
  181. Algorithm algorithm;
  182. algorithm.m_steps = steps_creation_result.release_value();
  183. algorithm.m_tree = algorithm.m_steps.m_expression;
  184. return algorithm;
  185. }
  186. return {};
  187. }
  188. NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
  189. {
  190. return ctx.with_new_logical_scope([&] {
  191. VERIFY(element->as_element().name == tag_emu_clause);
  192. SpecificationClause specification_clause;
  193. specification_clause.parse(ctx, element);
  194. OwnPtr<SpecificationClause> result;
  195. specification_clause.m_header.header.visit(
  196. [&](AK::Empty const&) {
  197. result = make<SpecificationClause>(move(specification_clause));
  198. },
  199. [&](ClauseHeader::FunctionDefinition const&) {
  200. result = make<SpecFunction>(move(specification_clause));
  201. });
  202. if (!result->post_initialize(ctx, element))
  203. result = make<SpecificationClause>(move(*result));
  204. return result.release_nonnull();
  205. });
  206. }
  207. void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
  208. {
  209. do_collect(translation_unit);
  210. for (auto& subclause : m_subclauses)
  211. subclause->collect_into(translation_unit);
  212. }
  213. ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
  214. {
  215. VERIFY(element->as_element().name == tag_h1);
  216. auto tokens = TRY(tokenize_tree(element));
  217. TextParser parser(tokens.tokens, element);
  218. m_header = TRY(parser.parse_clause_header());
  219. return {};
  220. }
  221. void SpecificationClause::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  222. {
  223. u32 child_index = 0;
  224. Optional<NonnullRefPtr<ParseError>> header_parse_error;
  225. for (auto const& child : element->as_element().children) {
  226. child->content.visit(
  227. [&](XML::Node::Element const& element) {
  228. if (child_index == 0) {
  229. if (element.name != tag_h1) {
  230. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  231. "<h1> must be the first child of <emu-clause>");
  232. return;
  233. }
  234. if (auto error = parse_header(child); error.is_error())
  235. header_parse_error = error.release_error();
  236. else
  237. ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
  238. } else {
  239. if (element.name == tag_h1) {
  240. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  241. "<h1> can only be the first child of <emu-clause>");
  242. return;
  243. }
  244. if (element.name == tag_emu_clause) {
  245. m_subclauses.append(create(ctx, child));
  246. return;
  247. }
  248. if (header_parse_error.has_value()) {
  249. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  250. "node content will be ignored since section header was not parsed successfully");
  251. // TODO: Integrate backtracing parser errors better
  252. ctx.diag().note(ctx.location_from_xml_offset(header_parse_error.value()->offset()),
  253. "{}", header_parse_error.value()->to_string());
  254. header_parse_error.clear();
  255. }
  256. }
  257. ++child_index;
  258. },
  259. [&](XML::Node::Text const&) {
  260. if (!contains_empty_text(child)) {
  261. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  262. "non-empty text node should not be a child of <emu-clause>");
  263. }
  264. },
  265. [&](auto) {});
  266. }
  267. }
  268. bool SpecFunction::post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
  269. {
  270. VERIFY(element->as_element().name == tag_emu_clause);
  271. auto maybe_id = get_attribute_by_name(element, attribute_id);
  272. if (!maybe_id.has_value()) {
  273. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  274. "no id attribute");
  275. } else {
  276. m_id = maybe_id.value();
  277. }
  278. auto maybe_abstract_operation_id = get_attribute_by_name(element, attribute_aoid);
  279. if (maybe_abstract_operation_id.has_value())
  280. m_name = maybe_abstract_operation_id.value();
  281. m_section_number = m_header.section_number;
  282. auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
  283. m_arguments = arguments;
  284. if (m_name != function_name) {
  285. ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
  286. "function name in header and <emu-clause>[aoid] do not match");
  287. }
  288. Vector<XML::Node const*> algorithm_nodes;
  289. for (auto const& child : element->as_element().children) {
  290. child->content.visit(
  291. [&](XML::Node::Element const& element) {
  292. if (element.name == tag_h1) {
  293. // Processed in SpecificationClause
  294. } else if (element.name == tag_p) {
  295. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  296. "prose is ignored");
  297. } else if (element.name == tag_emu_alg) {
  298. algorithm_nodes.append(child);
  299. } else {
  300. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  301. "<{}> should not be a child of <emu-clause> specifing function"sv, element.name);
  302. }
  303. },
  304. [&](auto const&) {});
  305. }
  306. if (algorithm_nodes.size() != 1) {
  307. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  308. "<emu-clause> specifing function should have exactly one <emu-alg> child"sv);
  309. return false;
  310. }
  311. auto maybe_algorithm = Algorithm::create(ctx, algorithm_nodes[0]);
  312. if (maybe_algorithm.has_value()) {
  313. m_algorithm = maybe_algorithm.release_value();
  314. return true;
  315. } else {
  316. return false;
  317. }
  318. }
  319. void SpecFunction::do_collect(TranslationUnitRef translation_unit)
  320. {
  321. translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.m_tree, move(m_arguments)));
  322. }
  323. Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
  324. {
  325. VERIFY(element->as_element().name == tag_specification);
  326. Specification specification;
  327. specification.parse(ctx, element);
  328. return specification;
  329. }
  330. void Specification::collect_into(TranslationUnitRef translation_unit)
  331. {
  332. for (auto& clause : m_clauses)
  333. clause->collect_into(translation_unit);
  334. }
  335. void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  336. {
  337. for (auto const& child : element->as_element().children) {
  338. child->content.visit(
  339. [&](XML::Node::Element const& element) {
  340. if (element.name == tag_emu_intro) {
  341. // Introductory comments are ignored.
  342. } else if (element.name == tag_emu_clause) {
  343. m_clauses.append(SpecificationClause::create(ctx, child));
  344. } else if (element.name == tag_emu_import) {
  345. parse(ctx, child);
  346. } else {
  347. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  348. "<{}> should not be a child of <specification>", element.name);
  349. }
  350. },
  351. [&](XML::Node::Text const&) {
  352. if (!contains_empty_text(child)) {
  353. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  354. "non-empty text node should not be a child of <specification>");
  355. }
  356. },
  357. [&](auto) {});
  358. }
  359. }
  360. SpecParsingStep::SpecParsingStep()
  361. : CompilationStep("parser"sv)
  362. {
  363. }
  364. SpecParsingStep::~SpecParsingStep() = default;
  365. void SpecParsingStep::run(TranslationUnitRef translation_unit)
  366. {
  367. SpecificationParsingContext ctx(translation_unit);
  368. auto filename = translation_unit->filename();
  369. auto file_or_error = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
  370. if (file_or_error.is_error()) {
  371. ctx.diag().fatal_error(Location::global_scope(),
  372. "unable to open '{}': {}", filename, file_or_error.error());
  373. return;
  374. }
  375. auto input_or_error = file_or_error.value()->read_until_eof();
  376. if (input_or_error.is_error()) {
  377. ctx.diag().fatal_error(Location::global_scope(),
  378. "unable to read '{}': {}", filename, input_or_error.error());
  379. return;
  380. }
  381. m_input = input_or_error.release_value();
  382. XML::Parser parser { m_input };
  383. auto document_or_error = parser.parse();
  384. if (document_or_error.is_error()) {
  385. ctx.diag().fatal_error(ctx.file_scope(),
  386. "XML::Parser failed to parse input: {}", document_or_error.error());
  387. ctx.diag().note(ctx.file_scope(),
  388. "since XML::Parser backtracks on error, the message above is likely to point to the "
  389. "first tag in the input - use external XML verifier to find out the exact cause of error");
  390. return;
  391. }
  392. m_document = make<XML::Document>(document_or_error.release_value());
  393. auto const& root = m_document->root();
  394. if (!root.is_element() || root.as_element().name != tag_specification) {
  395. ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
  396. "document root must be <specification> tag");
  397. return;
  398. }
  399. auto specification = Specification::create(ctx, &root);
  400. specification.collect_into(translation_unit);
  401. }
  402. }