SpecParser.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/NonnullOwnPtr.h>
  7. #include <LibCore/File.h>
  8. #include <LibXML/Parser/Parser.h>
  9. #include "Function.h"
  10. #include "Parser/Lexer.h"
  11. #include "Parser/SpecParser.h"
  12. #include "Parser/TextParser.h"
  13. #include "Parser/XMLUtils.h"
  14. namespace JSSpecCompiler {
  15. DiagnosticEngine& SpecificationParsingContext::diag()
  16. {
  17. return m_translation_unit->diag();
  18. }
  19. template<typename Func>
  20. auto SpecificationParsingContext::with_new_logical_scope(Func&& func)
  21. {
  22. TemporaryChange<RefPtr<LogicalLocation>> change(m_current_logical_scope, make_ref_counted<LogicalLocation>());
  23. return func();
  24. }
  25. LogicalLocation& SpecificationParsingContext::current_logical_scope()
  26. {
  27. return *m_current_logical_scope;
  28. }
  29. Location SpecificationParsingContext::file_scope() const
  30. {
  31. return { .filename = m_translation_unit->filename() };
  32. }
  33. Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const
  34. {
  35. return {
  36. .filename = m_translation_unit->filename(),
  37. .line = offset.line,
  38. .column = offset.column,
  39. .logical_location = m_current_logical_scope,
  40. };
  41. }
  42. ParseErrorOr<AlgorithmStep> AlgorithmStep::create(XML::Node const* node)
  43. {
  44. VERIFY(node->as_element().name == tag_li);
  45. auto [tokens, substeps] = TRY(tokenize_tree(node, true));
  46. AlgorithmStep result { .m_tokens = move(tokens), .m_node = node };
  47. if (substeps)
  48. result.m_substeps = TRY(AlgorithmStepList::create(substeps->as_element())).m_expression;
  49. result.m_expression = TRY(result.parse());
  50. return result;
  51. }
  52. ParseErrorOr<Tree> AlgorithmStep::parse()
  53. {
  54. TextParser parser(m_tokens, m_node);
  55. if (m_substeps)
  56. return parser.parse_step_with_substeps(RefPtr(m_substeps).release_nonnull());
  57. else
  58. return parser.parse_step_without_substeps();
  59. }
  60. ParseErrorOr<AlgorithmStepList> AlgorithmStepList::create(XML::Node::Element const& element)
  61. {
  62. VERIFY(element.name == tag_ol);
  63. AlgorithmStepList result;
  64. auto& steps = result.m_steps;
  65. Vector<Tree> step_expressions;
  66. for (auto const& child : element.children) {
  67. TRY(child->content.visit(
  68. [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
  69. if (element.name != tag_li)
  70. return ParseError::create("<emu-alg> <ol> > :not(<li>) should not match any elements"sv, child);
  71. steps.append(TRY(AlgorithmStep::create(child)));
  72. step_expressions.append(steps.last().m_expression);
  73. return {};
  74. },
  75. [&](XML::Node::Text const&) -> ParseErrorOr<void> {
  76. if (!contains_empty_text(child))
  77. return ParseError::create("<emu-alg> <ol> should not have non-empty child text nodes"sv, child);
  78. return {};
  79. },
  80. move(ignore_comments)));
  81. }
  82. result.m_expression = make_ref_counted<TreeList>(move(step_expressions));
  83. return result;
  84. }
  85. Optional<Algorithm> Algorithm::create(SpecificationParsingContext& ctx, XML::Node const* element)
  86. {
  87. VERIFY(element->as_element().name == tag_emu_alg);
  88. Vector<XML::Node const*> steps_list;
  89. for (auto const& child : element->as_element().children) {
  90. child->content.visit(
  91. [&](XML::Node::Element const& element) {
  92. if (element.name == tag_ol) {
  93. steps_list.append(child);
  94. return;
  95. }
  96. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  97. "<{}> should not be a child of <emu-alg>"sv, element.name);
  98. },
  99. [&](XML::Node::Text const&) {
  100. if (!contains_empty_text(child)) {
  101. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  102. "non-empty text node should not be a child of <emu-alg>");
  103. }
  104. },
  105. [&](auto const&) {});
  106. }
  107. if (steps_list.size() != 1) {
  108. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  109. "<emu-alg> should have exactly one <ol> child"sv);
  110. return {};
  111. }
  112. auto steps_creation_result = AlgorithmStepList::create(steps_list[0]->as_element());
  113. if (steps_creation_result.is_error()) {
  114. // TODO: Integrate backtracing parser errors better
  115. ctx.diag().error(ctx.location_from_xml_offset(steps_creation_result.error()->offset()),
  116. "{}", steps_creation_result.error()->to_string());
  117. return {};
  118. }
  119. Algorithm algorithm;
  120. algorithm.m_steps = steps_creation_result.release_value();
  121. algorithm.m_tree = algorithm.m_steps.m_expression;
  122. return algorithm;
  123. }
  124. NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
  125. {
  126. return ctx.with_new_logical_scope([&] {
  127. VERIFY(element->as_element().name == tag_emu_clause);
  128. SpecificationClause specification_clause;
  129. specification_clause.parse(ctx, element);
  130. OwnPtr<SpecificationClause> result;
  131. specification_clause.m_header.header.visit(
  132. [&](AK::Empty const&) {
  133. result = make<SpecificationClause>(move(specification_clause));
  134. },
  135. [&](ClauseHeader::FunctionDefinition const&) {
  136. result = make<SpecFunction>(move(specification_clause));
  137. });
  138. if (!result->post_initialize(ctx, element))
  139. result = make<SpecificationClause>(move(*result));
  140. return result.release_nonnull();
  141. });
  142. }
  143. void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
  144. {
  145. do_collect(translation_unit);
  146. for (auto& subclause : m_subclauses)
  147. subclause->collect_into(translation_unit);
  148. }
  149. ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
  150. {
  151. VERIFY(element->as_element().name == tag_h1);
  152. auto tokens = TRY(tokenize_tree(element));
  153. TextParser parser(tokens.tokens, element);
  154. m_header = TRY(parser.parse_clause_header());
  155. return {};
  156. }
  157. void SpecificationClause::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  158. {
  159. u32 child_index = 0;
  160. Optional<NonnullRefPtr<ParseError>> header_parse_error;
  161. for (auto const& child : element->as_element().children) {
  162. child->content.visit(
  163. [&](XML::Node::Element const& element) {
  164. if (child_index == 0) {
  165. if (element.name != tag_h1) {
  166. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  167. "<h1> must be the first child of <emu-clause>");
  168. return;
  169. }
  170. if (auto error = parse_header(child); error.is_error())
  171. header_parse_error = error.release_error();
  172. else
  173. ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
  174. } else {
  175. if (element.name == tag_h1) {
  176. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  177. "<h1> can only be the first child of <emu-clause>");
  178. return;
  179. }
  180. if (element.name == tag_emu_clause) {
  181. m_subclauses.append(create(ctx, child));
  182. return;
  183. }
  184. if (header_parse_error.has_value()) {
  185. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  186. "node content will be ignored since section header was not parsed successfully");
  187. // TODO: Integrate backtracing parser errors better
  188. ctx.diag().note(ctx.location_from_xml_offset(header_parse_error.value()->offset()),
  189. "{}", header_parse_error.value()->to_string());
  190. header_parse_error.clear();
  191. }
  192. }
  193. ++child_index;
  194. },
  195. [&](XML::Node::Text const&) {
  196. if (!contains_empty_text(child)) {
  197. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  198. "non-empty text node should not be a child of <emu-clause>");
  199. }
  200. },
  201. [&](auto) {});
  202. }
  203. }
  204. bool SpecFunction::post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
  205. {
  206. VERIFY(element->as_element().name == tag_emu_clause);
  207. auto maybe_id = get_attribute_by_name(element, attribute_id);
  208. if (!maybe_id.has_value()) {
  209. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  210. "no id attribute");
  211. } else {
  212. m_id = maybe_id.value();
  213. }
  214. auto maybe_abstract_operation_id = get_attribute_by_name(element, attribute_aoid);
  215. if (maybe_abstract_operation_id.has_value())
  216. m_name = maybe_abstract_operation_id.value();
  217. m_section_number = m_header.section_number;
  218. auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
  219. m_arguments = arguments;
  220. if (m_name != function_name) {
  221. ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
  222. "function name in header and <emu-clause>[aoid] do not match");
  223. }
  224. Vector<XML::Node const*> algorithm_nodes;
  225. for (auto const& child : element->as_element().children) {
  226. child->content.visit(
  227. [&](XML::Node::Element const& element) {
  228. if (element.name == tag_h1) {
  229. // Processed in SpecificationClause
  230. } else if (element.name == tag_p) {
  231. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  232. "prose is ignored");
  233. } else if (element.name == tag_emu_alg) {
  234. algorithm_nodes.append(child);
  235. } else {
  236. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  237. "<{}> should not be a child of <emu-clause> specifing function"sv, element.name);
  238. }
  239. },
  240. [&](auto const&) {});
  241. }
  242. if (algorithm_nodes.size() != 1) {
  243. ctx.diag().error(ctx.location_from_xml_offset(element->offset),
  244. "<emu-clause> specifing function should have exactly one <emu-alg> child"sv);
  245. return false;
  246. }
  247. auto maybe_algorithm = Algorithm::create(ctx, algorithm_nodes[0]);
  248. if (maybe_algorithm.has_value()) {
  249. m_algorithm = maybe_algorithm.release_value();
  250. return true;
  251. } else {
  252. return false;
  253. }
  254. }
  255. void SpecFunction::do_collect(TranslationUnitRef translation_unit)
  256. {
  257. translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.m_tree, move(m_arguments)));
  258. }
  259. Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
  260. {
  261. VERIFY(element->as_element().name == tag_specification);
  262. Specification specification;
  263. specification.parse(ctx, element);
  264. return specification;
  265. }
  266. void Specification::collect_into(TranslationUnitRef translation_unit)
  267. {
  268. for (auto& clause : m_clauses)
  269. clause->collect_into(translation_unit);
  270. }
  271. void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  272. {
  273. for (auto const& child : element->as_element().children) {
  274. child->content.visit(
  275. [&](XML::Node::Element const& element) {
  276. if (element.name == tag_emu_intro) {
  277. // Introductory comments are ignored.
  278. } else if (element.name == tag_emu_clause) {
  279. m_clauses.append(SpecificationClause::create(ctx, child));
  280. } else if (element.name == tag_emu_import) {
  281. parse(ctx, child);
  282. } else {
  283. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  284. "<{}> should not be a child of <specification>", element.name);
  285. }
  286. },
  287. [&](XML::Node::Text const&) {
  288. if (!contains_empty_text(child)) {
  289. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  290. "non-empty text node should not be a child of <specification>");
  291. }
  292. },
  293. [&](auto) {});
  294. }
  295. }
  296. SpecParsingStep::SpecParsingStep()
  297. : CompilationStep("parser"sv)
  298. {
  299. }
  300. SpecParsingStep::~SpecParsingStep() = default;
  301. void SpecParsingStep::run(TranslationUnitRef translation_unit)
  302. {
  303. SpecificationParsingContext ctx(translation_unit);
  304. auto filename = translation_unit->filename();
  305. auto file_or_error = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
  306. if (file_or_error.is_error()) {
  307. ctx.diag().fatal_error(Location::global_scope(),
  308. "unable to open '{}': {}", filename, file_or_error.error());
  309. return;
  310. }
  311. auto input_or_error = file_or_error.value()->read_until_eof();
  312. if (input_or_error.is_error()) {
  313. ctx.diag().fatal_error(Location::global_scope(),
  314. "unable to read '{}': {}", filename, input_or_error.error());
  315. return;
  316. }
  317. m_input = input_or_error.release_value();
  318. XML::Parser parser { m_input };
  319. auto document_or_error = parser.parse();
  320. if (document_or_error.is_error()) {
  321. ctx.diag().fatal_error(ctx.file_scope(),
  322. "XML::Parser failed to parse input: {}", document_or_error.error());
  323. ctx.diag().note(ctx.file_scope(),
  324. "since XML::Parser backtracks on error, the message above is likely to point to the "
  325. "first tag in the input - use external XML verifier to find out the exact cause of error");
  326. return;
  327. }
  328. m_document = make<XML::Document>(document_or_error.release_value());
  329. auto const& root = m_document->root();
  330. if (!root.is_element() || root.as_element().name != tag_specification) {
  331. ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
  332. "document root must be <specification> tag");
  333. return;
  334. }
  335. auto specification = Specification::create(ctx, &root);
  336. specification.collect_into(translation_unit);
  337. }
  338. }