123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490 |
- /*
- * Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/NonnullOwnPtr.h>
- #include <LibCore/File.h>
- #include <LibXML/Parser/Parser.h>
- #include "Function.h"
- #include "Parser/Lexer.h"
- #include "Parser/SpecParser.h"
- #include "Parser/TextParser.h"
- #include "Parser/XMLUtils.h"
- namespace JSSpecCompiler {
- DiagnosticEngine& SpecificationParsingContext::diag()
- {
- return m_translation_unit->diag();
- }
- template<typename Func>
- auto SpecificationParsingContext::with_new_logical_scope(Func&& func)
- {
- TemporaryChange<RefPtr<LogicalLocation>> change(m_current_logical_scope, make_ref_counted<LogicalLocation>());
- return func();
- }
- LogicalLocation& SpecificationParsingContext::current_logical_scope()
- {
- return *m_current_logical_scope;
- }
- template<typename Func>
- auto SpecificationParsingContext::with_new_step_list_nesting_level(Func&& func)
- {
- TemporaryChange change(m_step_list_nesting_level, m_step_list_nesting_level + 1);
- return func();
- }
- int SpecificationParsingContext::step_list_nesting_level() const
- {
- return m_step_list_nesting_level;
- }
- Location SpecificationParsingContext::file_scope() const
- {
- return { .filename = m_translation_unit->filename() };
- }
- Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const
- {
- return {
- .filename = m_translation_unit->filename(),
- .line = offset.line,
- .column = offset.column,
- .logical_location = m_current_logical_scope,
- };
- }
- Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- VERIFY(element->as_element().name == tag_li);
- auto tokenization_result = tokenize_tree(ctx, element, true);
- if (tokenization_result.is_error()) {
- ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
- "{}", tokenization_result.error()->to_string());
- return {};
- }
- auto [tokens, substeps] = tokenization_result.release_value();
- AlgorithmStep result(ctx);
- result.m_tokens = move(tokens);
- result.m_node = element;
- if (substeps) {
- // FIXME: Remove this once macOS Lagom CI updates to Clang >= 16.
- auto substeps_copy = substeps;
- auto step_list = ctx.with_new_step_list_nesting_level([&] {
- return AlgorithmStepList::create(ctx, substeps_copy);
- });
- result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
- }
- if (!result.parse())
- return {};
- return result;
- }
- bool AlgorithmStep::parse()
- {
- TextParser parser(m_ctx, m_tokens, m_node);
- TextParseErrorOr<Tree> parse_result = TextParseError {};
- if (m_substeps)
- parse_result = parser.parse_step_with_substeps(RefPtr(m_substeps).release_nonnull());
- else
- parse_result = parser.parse_step_without_substeps();
- if (parse_result.is_error()) {
- auto [location, message] = parser.get_diagnostic();
- m_ctx.diag().error(location, "{}", message);
- return false;
- } else {
- m_expression = parse_result.release_value();
- return true;
- }
- }
- Optional<AlgorithmStepList> AlgorithmStepList::create(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- VERIFY(element->as_element().name == tag_ol);
- AlgorithmStepList result;
- Vector<Tree> step_expressions;
- bool all_steps_parsed = true;
- int step_number = 0;
- auto const& parent_scope = ctx.current_logical_scope();
- for (auto const& child : element->as_element().children) {
- child->content.visit(
- [&](XML::Node::Element const& element) {
- if (element.name == tag_li) {
- auto step_creation_result = ctx.with_new_logical_scope([&] {
- update_logical_scope_for_step(ctx, parent_scope, step_number);
- return AlgorithmStep::create(ctx, child);
- });
- if (!step_creation_result.has_value())
- all_steps_parsed = false;
- else
- step_expressions.append(step_creation_result.release_value().tree());
- ++step_number;
- return;
- }
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<{}> should not be a child of algorithm step list"sv, element.name);
- },
- [&](XML::Node::Text const&) {
- if (!contains_empty_text(child)) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "non-empty text node should not be a child of algorithm step list");
- }
- },
- [&](auto const&) {});
- }
- if (!all_steps_parsed)
- return {};
- result.m_expression = make_ref_counted<TreeList>(move(step_expressions));
- return result;
- }
- void AlgorithmStepList::update_logical_scope_for_step(SpecificationParsingContext& ctx, LogicalLocation const& parent_scope, int step_number)
- {
- int nesting_level = ctx.step_list_nesting_level();
- String list_step_number;
- if (nesting_level == 0 || nesting_level == 3) {
- list_step_number = MUST(String::formatted("{}", step_number + 1));
- } else if (nesting_level == 1 || nesting_level == 4) {
- if (step_number < 26)
- list_step_number = String::from_code_point('a' + step_number);
- else
- list_step_number = MUST(String::formatted("{}", step_number + 1));
- } else {
- list_step_number = MUST(String::from_byte_string(ByteString::roman_number_from(step_number + 1).to_lowercase()));
- }
- auto& scope = ctx.current_logical_scope();
- scope.section = parent_scope.section;
- if (parent_scope.step.is_empty())
- scope.step = list_step_number;
- else
- scope.step = MUST(String::formatted("{}.{}", parent_scope.step, list_step_number));
- }
- Optional<Algorithm> Algorithm::create(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- VERIFY(element->as_element().name == tag_emu_alg);
- Vector<XML::Node const*> steps_list;
- for (auto const& child : element->as_element().children) {
- child->content.visit(
- [&](XML::Node::Element const& element) {
- if (element.name == tag_ol) {
- steps_list.append(child);
- return;
- }
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<{}> should not be a child of <emu-alg>"sv, element.name);
- },
- [&](XML::Node::Text const&) {
- if (!contains_empty_text(child)) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "non-empty text node should not be a child of <emu-alg>");
- }
- },
- [&](auto const&) {});
- }
- if (steps_list.size() != 1) {
- ctx.diag().error(ctx.location_from_xml_offset(element->offset),
- "<emu-alg> should have exactly one <ol> child"sv);
- return {};
- }
- auto steps_creation_result = AlgorithmStepList::create(ctx, steps_list[0]);
- if (steps_creation_result.has_value()) {
- Algorithm algorithm;
- algorithm.m_tree = steps_creation_result.release_value().tree();
- return algorithm;
- }
- return {};
- }
- NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- return ctx.with_new_logical_scope([&] {
- VERIFY(element->as_element().name == tag_emu_clause);
- SpecificationClause specification_clause(ctx);
- specification_clause.parse(element);
- OwnPtr<SpecificationClause> result;
- specification_clause.m_header.header.visit(
- [&](AK::Empty const&) {
- result = make<SpecificationClause>(move(specification_clause));
- },
- [&](ClauseHeader::FunctionDefinition const&) {
- result = make<SpecFunction>(move(specification_clause));
- });
- if (!result->post_initialize(element))
- result = make<SpecificationClause>(move(*result));
- return result.release_nonnull();
- });
- }
- void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
- {
- do_collect(translation_unit);
- for (auto& subclause : m_subclauses)
- subclause->collect_into(translation_unit);
- }
- Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node const* element)
- {
- auto& ctx = *m_ctx_pointer;
- VERIFY(element->as_element().name == tag_h1);
- auto tokenization_result = tokenize_tree(ctx, element, false);
- if (tokenization_result.is_error()) {
- return FailedTextParseDiagnostic {
- ctx.location_from_xml_offset(tokenization_result.error()->offset()),
- tokenization_result.error()->to_string()
- };
- }
- auto const& tokens = tokenization_result.release_value().tokens;
- TextParser parser(ctx, tokens, element);
- auto parse_result = parser.parse_clause_header();
- if (parse_result.is_error()) {
- // Still try to at least scavenge section number.
- if (tokens.size() && tokens[0].type == TokenType::SectionNumber)
- ctx.current_logical_scope().section = MUST(String::from_utf8(tokens[0].data));
- return parser.get_diagnostic();
- }
- m_header = parse_result.release_value();
- ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
- return {};
- }
- void SpecificationClause::parse(XML::Node const* element)
- {
- auto& ctx = context();
- u32 child_index = 0;
- Optional<FailedTextParseDiagnostic> header_parse_error;
- for (auto const& child : element->as_element().children) {
- child->content.visit(
- [&](XML::Node::Element const& element) {
- if (child_index == 0) {
- if (element.name != tag_h1) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<h1> must be the first child of <emu-clause>");
- return;
- }
- header_parse_error = parse_header(child);
- } else {
- if (element.name == tag_h1) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<h1> can only be the first child of <emu-clause>");
- return;
- }
- if (element.name == tag_emu_clause) {
- m_subclauses.append(create(ctx, child));
- return;
- }
- if (header_parse_error.has_value()) {
- ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
- "node content will be ignored since section header was not parsed successfully");
- ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
- }
- }
- ++child_index;
- },
- [&](XML::Node::Text const&) {
- if (!contains_empty_text(child)) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "non-empty text node should not be a child of <emu-clause>");
- }
- },
- [&](auto) {});
- }
- }
- bool SpecFunction::post_initialize(XML::Node const* element)
- {
- VERIFY(element->as_element().name == tag_emu_clause);
- auto& ctx = context();
- auto maybe_id = get_attribute_by_name(element, attribute_id);
- if (!maybe_id.has_value()) {
- ctx.diag().error(ctx.location_from_xml_offset(element->offset),
- "no id attribute");
- } else {
- m_id = maybe_id.value();
- }
- auto maybe_abstract_operation_id = get_attribute_by_name(element, attribute_aoid);
- if (maybe_abstract_operation_id.has_value())
- m_name = maybe_abstract_operation_id.value();
- m_section_number = m_header.section_number;
- auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
- m_arguments = arguments;
- if (m_name != function_name) {
- ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
- "function name in header and <emu-clause>[aoid] do not match");
- }
- Vector<XML::Node const*> algorithm_nodes;
- for (auto const& child : element->as_element().children) {
- child->content.visit(
- [&](XML::Node::Element const& element) {
- if (element.name == tag_h1) {
- // Processed in SpecificationClause
- } else if (element.name == tag_p) {
- ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
- "prose is ignored");
- } else if (element.name == tag_emu_alg) {
- algorithm_nodes.append(child);
- } else {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<{}> should not be a child of <emu-clause> specifing function"sv, element.name);
- }
- },
- [&](auto const&) {});
- }
- if (algorithm_nodes.size() != 1) {
- ctx.diag().error(ctx.location_from_xml_offset(element->offset),
- "<emu-clause> specifing function should have exactly one <emu-alg> child"sv);
- return false;
- }
- auto maybe_algorithm = Algorithm::create(ctx, algorithm_nodes[0]);
- if (maybe_algorithm.has_value()) {
- m_algorithm = maybe_algorithm.release_value();
- return true;
- } else {
- return false;
- }
- }
- void SpecFunction::do_collect(TranslationUnitRef translation_unit)
- {
- translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.tree(), move(m_arguments)));
- }
- Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- VERIFY(element->as_element().name == tag_specification);
- Specification specification;
- specification.parse(ctx, element);
- return specification;
- }
- void Specification::collect_into(TranslationUnitRef translation_unit)
- {
- for (auto& clause : m_clauses)
- clause->collect_into(translation_unit);
- }
- void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
- {
- for (auto const& child : element->as_element().children) {
- child->content.visit(
- [&](XML::Node::Element const& element) {
- if (element.name == tag_emu_intro) {
- // Introductory comments are ignored.
- } else if (element.name == tag_emu_clause) {
- m_clauses.append(SpecificationClause::create(ctx, child));
- } else if (element.name == tag_emu_import) {
- parse(ctx, child);
- } else {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "<{}> should not be a child of <specification>", element.name);
- }
- },
- [&](XML::Node::Text const&) {
- if (!contains_empty_text(child)) {
- ctx.diag().error(ctx.location_from_xml_offset(child->offset),
- "non-empty text node should not be a child of <specification>");
- }
- },
- [&](auto) {});
- }
- }
- SpecParsingStep::SpecParsingStep()
- : CompilationStep("parser"sv)
- {
- }
- SpecParsingStep::~SpecParsingStep() = default;
- void SpecParsingStep::run(TranslationUnitRef translation_unit)
- {
- SpecificationParsingContext ctx(translation_unit);
- auto filename = translation_unit->filename();
- auto file_or_error = Core::File::open_file_or_standard_stream(filename, Core::File::OpenMode::Read);
- if (file_or_error.is_error()) {
- ctx.diag().fatal_error(Location::global_scope(),
- "unable to open '{}': {}", filename, file_or_error.error());
- return;
- }
- auto input_or_error = file_or_error.value()->read_until_eof();
- if (input_or_error.is_error()) {
- ctx.diag().fatal_error(Location::global_scope(),
- "unable to read '{}': {}", filename, input_or_error.error());
- return;
- }
- m_input = input_or_error.release_value();
- XML::Parser parser { m_input };
- auto document_or_error = parser.parse();
- if (document_or_error.is_error()) {
- ctx.diag().fatal_error(ctx.file_scope(),
- "XML::Parser failed to parse input: {}", document_or_error.error());
- ctx.diag().note(ctx.file_scope(),
- "since XML::Parser backtracks on error, the message above is likely to point to the "
- "first tag in the input - use external XML verifier to find out the exact cause of error");
- return;
- }
- m_document = make<XML::Document>(document_or_error.release_value());
- auto const& root = m_document->root();
- if (!root.is_element() || root.as_element().name != tag_specification) {
- ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
- "document root must be <specification> tag");
- return;
- }
- auto specification = Specification::create(ctx, &root);
- specification.collect_into(translation_unit);
- }
- }
|