SpecParser.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /*
  2. * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/NonnullOwnPtr.h>
  7. #include <LibCore/File.h>
  8. #include <LibXML/Parser/Parser.h>
  9. #include "Function.h"
  10. #include "Parser/Lexer.h"
  11. #include "Parser/SpecParser.h"
  12. #include "Parser/TextParser.h"
  13. #include "Parser/XMLUtils.h"
  14. namespace JSSpecCompiler {
  15. DiagnosticEngine& SpecificationParsingContext::diag()
  16. {
  17. return m_translation_unit->diag();
  18. }
  19. template<typename Func>
  20. auto SpecificationParsingContext::with_new_logical_scope(Func&& func)
  21. {
  22. TemporaryChange<RefPtr<LogicalLocation>> change(m_current_logical_scope, make_ref_counted<LogicalLocation>());
  23. return func();
  24. }
  25. LogicalLocation& SpecificationParsingContext::current_logical_scope()
  26. {
  27. return *m_current_logical_scope;
  28. }
  29. Location SpecificationParsingContext::file_scope() const
  30. {
  31. return { .filename = m_translation_unit->filename() };
  32. }
  33. Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const
  34. {
  35. return {
  36. .filename = m_translation_unit->filename(),
  37. .line = offset.line,
  38. .column = offset.column,
  39. .logical_location = m_current_logical_scope,
  40. };
  41. }
  42. ParseErrorOr<AlgorithmStep> AlgorithmStep::create(XML::Node const* node)
  43. {
  44. VERIFY(node->as_element().name == tag_li);
  45. auto [tokens, substeps] = TRY(tokenize_tree(node, true));
  46. AlgorithmStep result { .m_tokens = move(tokens), .m_node = node };
  47. if (substeps)
  48. result.m_substeps = TRY(AlgorithmStepList::create(substeps->as_element())).m_expression;
  49. result.m_expression = TRY(result.parse());
  50. return result;
  51. }
  52. ParseErrorOr<Tree> AlgorithmStep::parse()
  53. {
  54. TextParser parser(m_tokens, m_node);
  55. if (m_substeps)
  56. return parser.parse_step_with_substeps(RefPtr(m_substeps).release_nonnull());
  57. else
  58. return parser.parse_step_without_substeps();
  59. }
  60. ParseErrorOr<AlgorithmStepList> AlgorithmStepList::create(XML::Node::Element const& element)
  61. {
  62. VERIFY(element.name == tag_ol);
  63. AlgorithmStepList result;
  64. auto& steps = result.m_steps;
  65. Vector<Tree> step_expressions;
  66. for (auto const& child : element.children) {
  67. TRY(child->content.visit(
  68. [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
  69. if (element.name != tag_li)
  70. return ParseError::create("<emu-alg> <ol> > :not(<li>) should not match any elements"sv, child);
  71. steps.append(TRY(AlgorithmStep::create(child)));
  72. step_expressions.append(steps.last().m_expression);
  73. return {};
  74. },
  75. [&](XML::Node::Text const&) -> ParseErrorOr<void> {
  76. if (!contains_empty_text(child))
  77. return ParseError::create("<emu-alg> <ol> should not have non-empty child text nodes"sv, child);
  78. return {};
  79. },
  80. move(ignore_comments)));
  81. }
  82. result.m_expression = make_ref_counted<TreeList>(move(step_expressions));
  83. return result;
  84. }
  85. ParseErrorOr<Algorithm> Algorithm::create(XML::Node const* node)
  86. {
  87. VERIFY(node->as_element().name == tag_emu_alg);
  88. XML::Node::Element const* steps_list = nullptr;
  89. for (auto const& child : node->as_element().children) {
  90. TRY(child->content.visit(
  91. [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
  92. if (element.name == tag_ol) {
  93. if (steps_list != nullptr)
  94. return ParseError::create("<emu-alg> should have exactly one <ol> child"sv, child);
  95. steps_list = &element;
  96. return {};
  97. } else {
  98. return ParseError::create("<emu-alg> should not have children other than <ol>"sv, child);
  99. }
  100. },
  101. [&](XML::Node::Text const&) -> ParseErrorOr<void> {
  102. if (!contains_empty_text(child))
  103. return ParseError::create("<emu-alg> should not have non-empty child text nodes"sv, child);
  104. return {};
  105. },
  106. move(ignore_comments)));
  107. }
  108. if (steps_list == nullptr)
  109. return ParseError::create("<emu-alg> should have exactly one <ol> child"sv, node);
  110. Algorithm algorithm;
  111. algorithm.m_steps = TRY(AlgorithmStepList::create(*steps_list));
  112. algorithm.m_tree = algorithm.m_steps.m_expression;
  113. return algorithm;
  114. }
  115. NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
  116. {
  117. return ctx.with_new_logical_scope([&] {
  118. VERIFY(element->as_element().name == tag_emu_clause);
  119. SpecificationClause specification_clause;
  120. specification_clause.parse(ctx, element);
  121. OwnPtr<SpecificationClause> result;
  122. specification_clause.m_header.header.visit(
  123. [&](AK::Empty const&) {
  124. result = make<SpecificationClause>(move(specification_clause));
  125. },
  126. [&](ClauseHeader::FunctionDefinition const&) {
  127. result = make<SpecFunction>(move(specification_clause));
  128. });
  129. if (!result->post_initialize(ctx, element))
  130. result = make<SpecificationClause>(move(*result));
  131. return result.release_nonnull();
  132. });
  133. }
  134. void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
  135. {
  136. do_collect(translation_unit);
  137. for (auto& subclause : m_subclauses)
  138. subclause->collect_into(translation_unit);
  139. }
  140. ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
  141. {
  142. VERIFY(element->as_element().name == tag_h1);
  143. auto tokens = TRY(tokenize_tree(element));
  144. TextParser parser(tokens.tokens, element);
  145. m_header = TRY(parser.parse_clause_header());
  146. return {};
  147. }
  148. void SpecificationClause::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  149. {
  150. u32 child_index = 0;
  151. Optional<NonnullRefPtr<ParseError>> header_parse_error;
  152. for (auto const& child : element->as_element().children) {
  153. child->content.visit(
  154. [&](XML::Node::Element const& element) {
  155. if (child_index == 0) {
  156. if (element.name != tag_h1) {
  157. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  158. "<h1> must be the first child of <emu-clause>");
  159. return;
  160. }
  161. if (auto error = parse_header(child); error.is_error())
  162. header_parse_error = error.release_error();
  163. else
  164. ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
  165. } else {
  166. if (element.name == tag_emu_clause) {
  167. m_subclauses.append(create(ctx, child));
  168. return;
  169. }
  170. if (header_parse_error.has_value()) {
  171. ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
  172. "node content will be ignored since section header was not parsed successfully");
  173. // TODO: Integrate backtracing parser errors better
  174. ctx.diag().note(ctx.location_from_xml_offset(header_parse_error.value()->offset()),
  175. "{}", header_parse_error.value()->to_string());
  176. header_parse_error.clear();
  177. }
  178. }
  179. ++child_index;
  180. },
  181. [&](XML::Node::Text const&) {
  182. if (!contains_empty_text(child)) {
  183. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  184. "non-empty text node should not be a child of <emu-clause>");
  185. }
  186. },
  187. [&](auto) {});
  188. }
  189. }
  190. bool SpecFunction::post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
  191. {
  192. auto initialization_result = do_post_initialize(ctx, element);
  193. if (initialization_result.is_error()) {
  194. // TODO: Integrate backtracing parser errors better
  195. ctx.diag().error(ctx.location_from_xml_offset(initialization_result.error()->offset()),
  196. "{}", initialization_result.error()->to_string());
  197. return false;
  198. }
  199. return true;
  200. }
  201. void SpecFunction::do_collect(TranslationUnitRef translation_unit)
  202. {
  203. translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.m_tree, move(m_arguments)));
  204. }
  205. ParseErrorOr<void> SpecFunction::do_post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
  206. {
  207. VERIFY(element->as_element().name == tag_emu_clause);
  208. m_id = TRY(get_attribute_by_name(element, attribute_id));
  209. m_name = TRY(get_attribute_by_name(element, attribute_aoid));
  210. m_section_number = m_header.section_number;
  211. auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
  212. if (m_name != function_name) {
  213. ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
  214. "function name in header and <emu-clause>[aoid] do not match");
  215. }
  216. m_arguments = arguments;
  217. u32 children_count = 0;
  218. XML::Node const* algorithm_node = nullptr;
  219. XML::Node const* prose_node = nullptr;
  220. for (auto const& child : element->as_element().children) {
  221. TRY(child->content.visit(
  222. [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
  223. if (element.name == tag_h1) {
  224. if (children_count != 0)
  225. return ParseError::create("<h1> can only be the first child of <emu-clause>"sv, child);
  226. } else if (element.name == tag_p) {
  227. if (prose_node == nullptr)
  228. prose_node = child;
  229. } else if (element.name == tag_emu_alg) {
  230. algorithm_node = child;
  231. } else {
  232. return ParseError::create("Unknown child of <emu-clause>"sv, child);
  233. }
  234. ++children_count;
  235. return {};
  236. },
  237. [&](XML::Node::Text const&) -> ParseErrorOr<void> {
  238. if (!contains_empty_text(child)) {
  239. return ParseError::create("<emu-clause> should not have non-empty child text nodes"sv, child);
  240. }
  241. return {};
  242. },
  243. move(ignore_comments)));
  244. }
  245. if (algorithm_node == nullptr)
  246. return ParseError::create("No <emu-alg>"sv, element);
  247. if (prose_node) {
  248. ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
  249. "prose is ignored");
  250. }
  251. m_algorithm = TRY(Algorithm::create(algorithm_node));
  252. return {};
  253. }
  254. Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
  255. {
  256. VERIFY(element->as_element().name == tag_specification);
  257. Specification specification;
  258. specification.parse(ctx, element);
  259. return specification;
  260. }
  261. void Specification::collect_into(TranslationUnitRef translation_unit)
  262. {
  263. for (auto& clause : m_clauses)
  264. clause->collect_into(translation_unit);
  265. }
  266. void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
  267. {
  268. for (auto const& child : element->as_element().children) {
  269. child->content.visit(
  270. [&](XML::Node::Element const& element) {
  271. if (element.name == tag_emu_intro) {
  272. // Introductory comments are ignored.
  273. } else if (element.name == tag_emu_clause) {
  274. m_clauses.append(SpecificationClause::create(ctx, child));
  275. } else if (element.name == tag_emu_import) {
  276. parse(ctx, child);
  277. } else {
  278. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  279. "<{}> should not be a child of <specification>", element.name);
  280. }
  281. },
  282. [&](XML::Node::Text const&) {
  283. if (!contains_empty_text(child)) {
  284. ctx.diag().error(ctx.location_from_xml_offset(child->offset),
  285. "non-empty text node should not be a child of <specification>");
  286. }
  287. },
  288. [&](auto) {});
  289. }
  290. }
  291. SpecParsingStep::SpecParsingStep()
  292. : CompilationStep("parser"sv)
  293. {
  294. }
  295. SpecParsingStep::~SpecParsingStep() = default;
  296. void SpecParsingStep::run(TranslationUnitRef translation_unit)
  297. {
  298. SpecificationParsingContext ctx(translation_unit);
  299. auto filename = translation_unit->filename();
  300. auto file_or_error = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
  301. if (file_or_error.is_error()) {
  302. ctx.diag().fatal_error(Location::global_scope(),
  303. "unable to open '{}': {}", filename, file_or_error.error());
  304. return;
  305. }
  306. auto input_or_error = file_or_error.value()->read_until_eof();
  307. if (input_or_error.is_error()) {
  308. ctx.diag().fatal_error(Location::global_scope(),
  309. "unable to read '{}': {}", filename, input_or_error.error());
  310. return;
  311. }
  312. m_input = input_or_error.release_value();
  313. XML::Parser parser { m_input };
  314. auto document_or_error = parser.parse();
  315. if (document_or_error.is_error()) {
  316. ctx.diag().fatal_error(ctx.file_scope(),
  317. "XML::Parser failed to parse input: {}", document_or_error.error());
  318. ctx.diag().note(ctx.file_scope(),
  319. "since XML::Parser backtracks on error, the message above is likely to point to the "
  320. "first tag in the input - use external XML verifier to find out the exact cause of error");
  321. return;
  322. }
  323. m_document = make<XML::Document>(document_or_error.release_value());
  324. auto const& root = m_document->root();
  325. if (!root.is_element() || root.as_element().name != tag_specification) {
  326. ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
  327. "document root must be <specification> tag");
  328. return;
  329. }
  330. auto specification = Specification::create(ctx, &root);
  331. specification.collect_into(translation_unit);
  332. }
  333. }