Ver Fonte

JSSpecCompiler: Make -xspec capable of parsing the whole specification

... in theory. In practice, we fail to parse all of the functions.
Dan Klishch há 1 ano atrás
pai
commit
14ee25b8ba

+ 2 - 0
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h

@@ -63,6 +63,8 @@ class AlgorithmStep;
 class AlgorithmStepList;
 class Algorithm;
 class SpecFunction;
+class SpecificationClause;
+class Specification;
 
 // DiagnosticEngine.h
 struct LogicalLocation;

+ 3 - 0
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h

@@ -13,6 +13,8 @@ namespace JSSpecCompiler {
 
 inline constexpr StringView tag_emu_alg = "emu-alg"sv;
 inline constexpr StringView tag_emu_clause = "emu-clause"sv;
+inline constexpr StringView tag_emu_import = "emu-import"sv;
+inline constexpr StringView tag_emu_intro = "emu-intro"sv;
 inline constexpr StringView tag_emu_val = "emu-val"sv;
 inline constexpr StringView tag_emu_xref = "emu-xref"sv;
 inline constexpr StringView tag_h1 = "h1"sv;
@@ -20,6 +22,7 @@ inline constexpr StringView tag_li = "li"sv;
 inline constexpr StringView tag_ol = "ol"sv;
 inline constexpr StringView tag_p = "p"sv;
 inline constexpr StringView tag_span = "span"sv;
+inline constexpr StringView tag_specification = "specification"sv;
 inline constexpr StringView tag_var = "var"sv;
 
 inline constexpr StringView attribute_aoid = "aoid"sv;

+ 7 - 1
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/ParseError.cpp

@@ -5,6 +5,7 @@
  */
 
 #include "Parser/ParseError.h"
+#include "DiagnosticEngine.h"
 
 namespace JSSpecCompiler {
 
@@ -27,7 +28,7 @@ NonnullRefPtr<ParseError> ParseError::create(ErrorOr<String> message, XML::Node
 String ParseError::to_string() const
 {
     StringBuilder builder;
-    builder.appendff("error: {}\n", m_message);
+    builder.appendff("{}\n", m_message);
 
     XML::Node const* current = m_node;
     while (current != nullptr) {
@@ -48,4 +49,9 @@ String ParseError::to_string() const
     return MUST(builder.to_string());
 }
 
+XML::Offset ParseError::offset() const
+{
+    return m_node->offset;
+}
+
 }

+ 1 - 0
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/ParseError.h

@@ -24,6 +24,7 @@ public:
     static NonnullRefPtr<ParseError> create(ErrorOr<String> message, XML::Node const* node);
 
     String to_string() const;
+    XML::Offset offset() const;
 
 private:
     String m_message;

+ 170 - 35
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp

@@ -138,16 +138,127 @@ ParseErrorOr<Algorithm> Algorithm::create(XML::Node const* node)
     return algorithm;
 }
 
-ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
+NonnullOwnPtr<SpecificationClause> SpecificationClause::create(SpecificationParsingContext& ctx, XML::Node const* element)
+{
+    return ctx.with_new_logical_scope([&] {
+        VERIFY(element->as_element().name == tag_emu_clause);
+
+        SpecificationClause specification_clause;
+        specification_clause.parse(ctx, element);
+
+        OwnPtr<SpecificationClause> result;
+
+        specification_clause.m_header.header.visit(
+            [&](AK::Empty const&) {
+                result = make<SpecificationClause>(move(specification_clause));
+            },
+            [&](ClauseHeader::FunctionDefinition const&) {
+                result = make<SpecFunction>(move(specification_clause));
+            });
+
+        if (!result->post_initialize(ctx, element))
+            result = make<SpecificationClause>(move(*result));
+
+        return result.release_nonnull();
+    });
+}
+
+void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
+{
+    do_collect(translation_unit);
+    for (auto& subclause : m_subclauses)
+        subclause->collect_into(translation_unit);
+}
+
+ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
+{
+    VERIFY(element->as_element().name == tag_h1);
+    auto tokens = TRY(tokenize_tree(element));
+    TextParser parser(tokens.tokens, element);
+    m_header = TRY(parser.parse_clause_header());
+    return {};
+}
+
+void SpecificationClause::parse(SpecificationParsingContext& ctx, XML::Node const* element)
+{
+    u32 child_index = 0;
+
+    Optional<NonnullRefPtr<ParseError>> header_parse_error;
+
+    for (auto const& child : element->as_element().children) {
+        child->content.visit(
+            [&](XML::Node::Element const& element) {
+                if (child_index == 0) {
+                    if (element.name != tag_h1) {
+                        ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                            "<h1> must be the first child of <emu-clause>");
+                        return;
+                    }
+
+                    if (auto error = parse_header(child); error.is_error())
+                        header_parse_error = error.release_error();
+                    else
+                        ctx.current_logical_scope().section = MUST(String::from_utf8(m_header.section_number));
+                } else {
+                    if (element.name == tag_emu_clause) {
+                        m_subclauses.append(create(ctx, child));
+                        return;
+                    }
+                    if (header_parse_error.has_value()) {
+                        ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
+                            "node content will be ignored since section header was not parsed successfully");
+                        // TODO: Integrate backtracing parser errors better
+                        ctx.diag().note(ctx.location_from_xml_offset(header_parse_error.value()->offset()),
+                            "{}", header_parse_error.value()->to_string());
+                        header_parse_error.clear();
+                    }
+                }
+                ++child_index;
+            },
+            [&](XML::Node::Text const&) {
+                if (!contains_empty_text(child)) {
+                    ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                        "non-empty text node should not be a child of <emu-clause>");
+                }
+            },
+            [&](auto) {});
+    }
+}
+
+bool SpecFunction::post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
+{
+    auto initialization_result = do_post_initialize(ctx, element);
+    if (initialization_result.is_error()) {
+        // TODO: Integrate backtracing parser errors better
+        ctx.diag().error(ctx.location_from_xml_offset(initialization_result.error()->offset()),
+            "{}", initialization_result.error()->to_string());
+        return false;
+    }
+    return true;
+}
+
+void SpecFunction::do_collect(TranslationUnitRef translation_unit)
+{
+    translation_unit->adopt_function(make_ref_counted<FunctionDefinition>(m_name, m_algorithm.m_tree, move(m_arguments)));
+}
+
+ParseErrorOr<void> SpecFunction::do_post_initialize(SpecificationParsingContext& ctx, XML::Node const* element)
 {
     VERIFY(element->as_element().name == tag_emu_clause);
 
-    SpecFunction result;
-    result.m_id = TRY(get_attribute_by_name(element, attribute_id));
-    result.m_name = TRY(get_attribute_by_name(element, attribute_aoid));
+    m_id = TRY(get_attribute_by_name(element, attribute_id));
+    m_name = TRY(get_attribute_by_name(element, attribute_aoid));
+
+    m_section_number = m_header.section_number;
+    auto const& [function_name, arguments] = m_header.header.get<ClauseHeader::FunctionDefinition>();
+
+    if (m_name != function_name) {
+        ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
+            "function name in header and <emu-clause>[aoid] do not match");
+    }
+    m_arguments = arguments;
 
     u32 children_count = 0;
-    bool has_definition = false;
 
     XML::Node const* algorithm_node = nullptr;
     XML::Node const* prose_node = nullptr;
@@ -155,12 +266,9 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
     for (auto const& child : element->as_element().children) {
         TRY(child->content.visit(
             [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
-                ++children_count;
                 if (element.name == tag_h1) {
-                    if (children_count != 1)
-                        return ParseError::create("<h1> should be the first child of a <emu-clause>"sv, child);
-                    TRY(result.parse_definition(child));
-                    has_definition = true;
+                    if (children_count != 0)
+                        return ParseError::create("<h1> can only be the first child of <emu-clause>"sv, child);
                 } else if (element.name == tag_p) {
                     if (prose_node == nullptr)
                         prose_node = child;
@@ -169,6 +277,7 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
                 } else {
                     return ParseError::create("Unknown child of <emu-clause>"sv, child);
                 }
+                ++children_count;
                 return {};
             },
             [&](XML::Node::Text const&) -> ParseErrorOr<void> {
@@ -182,30 +291,56 @@ ParseErrorOr<SpecFunction> SpecFunction::create(XML::Node const* element)
 
     if (algorithm_node == nullptr)
         return ParseError::create("No <emu-alg>"sv, element);
-    if (prose_node == nullptr)
-        return ParseError::create("No prose element"sv, element);
-    if (!has_definition)
-        return ParseError::create("Definition was not found"sv, element);
 
-    result.m_algorithm = TRY(Algorithm::create(algorithm_node));
-    return result;
+    if (prose_node) {
+        ctx.diag().warn(ctx.location_from_xml_offset(element->offset),
+            "prose is ignored");
+    }
+
+    m_algorithm = TRY(Algorithm::create(algorithm_node));
+
+    return {};
 }
 
-ParseErrorOr<void> SpecFunction::parse_definition(XML::Node const* element)
+Specification Specification::create(SpecificationParsingContext& ctx, XML::Node const* element)
 {
-    auto tokens = TRY(tokenize_tree(element));
-    TextParser parser(tokens.tokens, element);
-
-    auto [section_number, function_name, arguments] = TRY(parser.parse_definition());
+    VERIFY(element->as_element().name == tag_specification);
 
-    if (function_name != m_name)
-        return ParseError::create("Function name in definition differs from <emu-clause>[aoid]"sv, element);
+    Specification specification;
+    specification.parse(ctx, element);
+    return specification;
+}
 
-    m_section_number = section_number;
-    for (auto const& argument : arguments)
-        m_arguments.append({ argument });
+void Specification::collect_into(TranslationUnitRef translation_unit)
+{
+    for (auto& clause : m_clauses)
+        clause->collect_into(translation_unit);
+}
 
-    return {};
+void Specification::parse(SpecificationParsingContext& ctx, XML::Node const* element)
+{
+    for (auto const& child : element->as_element().children) {
+        child->content.visit(
+            [&](XML::Node::Element const& element) {
+                if (element.name == tag_emu_intro) {
+                    // Introductory comments are ignored.
+                } else if (element.name == tag_emu_clause) {
+                    m_clauses.append(SpecificationClause::create(ctx, child));
+                } else if (element.name == tag_emu_import) {
+                    parse(ctx, child);
+                } else {
+                    ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                        "<{}> should not be a child of <specification>", element.name);
+                }
+            },
+            [&](XML::Node::Text const&) {
+                if (!contains_empty_text(child)) {
+                    ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                        "non-empty text node should not be a child of <specification>");
+                }
+            },
+            [&](auto) {});
+    }
 }
 
 SpecParsingStep::SpecParsingStep()
@@ -247,14 +382,14 @@ void SpecParsingStep::run(TranslationUnitRef translation_unit)
     }
     m_document = make<XML::Document>(document_or_error.release_value());
 
-    auto spec_function = SpecFunction::create(&m_document->root()).release_value_but_fixme_should_propagate_errors();
-
-    Vector<FunctionArgument> arguments;
-    for (auto const& argument : spec_function.m_arguments)
-        arguments.append({ argument.name });
+    auto const& root = m_document->root();
+    if (!root.is_element() || root.as_element().name != tag_specification) {
+        ctx.diag().fatal_error(ctx.location_from_xml_offset(root.offset),
+            "document root must be <specification> tag");
+        return;
+    }
 
-    translation_unit->adopt_function(
-        make_ref_counted<FunctionDefinition>(spec_function.m_name, spec_function.m_algorithm.m_tree, move(arguments)));
+    auto specification = Specification::create(ctx, &root);
+    specification.collect_into(translation_unit);
 }
-
 }

+ 48 - 7
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.h

@@ -12,6 +12,7 @@
 #include "CompilationPipeline.h"
 #include "Forward.h"
 #include "Parser/ParseError.h"
+#include "Parser/TextParser.h"
 #include "Parser/Token.h"
 
 namespace JSSpecCompiler {
@@ -68,24 +69,64 @@ public:
     Tree m_tree = error_tree;
 };
 
-class SpecFunction {
+class SpecificationClause {
+    AK_MAKE_DEFAULT_MOVABLE(SpecificationClause);
+
 public:
-    struct Argument {
-        StringView name;
-    };
+    static NonnullOwnPtr<SpecificationClause> create(SpecificationParsingContext& ctx, XML::Node const* element);
+
+    virtual ~SpecificationClause() = default;
+
+    void collect_into(TranslationUnitRef translation_unit);
 
-    static ParseErrorOr<SpecFunction> create(XML::Node const* element);
+protected:
+    virtual bool post_initialize(SpecificationParsingContext& /*ctx*/, XML::Node const* /*element*/) { return true; }
+    virtual void do_collect(TranslationUnitRef /*translation_unit*/) { }
+
+    ClauseHeader m_header;
+
+private:
+    SpecificationClause() = default;
+    ParseErrorOr<void> parse_header(XML::Node const* element);
+    void parse(SpecificationParsingContext& ctx, XML::Node const* element);
 
-    ParseErrorOr<void> parse_definition(XML::Node const* element);
+    Vector<NonnullOwnPtr<SpecificationClause>> m_subclauses;
+};
+
+class SpecFunction : public SpecificationClause {
+public:
+    SpecFunction(SpecificationClause&& clause)
+        : SpecificationClause(move(clause))
+    {
+    }
+
+protected:
+    bool post_initialize(SpecificationParsingContext& ctx, XML::Node const* element) override;
+    void do_collect(TranslationUnitRef translation_unit) override;
+
+private:
+    ParseErrorOr<void> do_post_initialize(SpecificationParsingContext& ctx, XML::Node const* element);
 
     StringView m_section_number;
     StringView m_id;
     StringView m_name;
 
-    Vector<Argument> m_arguments;
+    Vector<FunctionArgument> m_arguments;
     Algorithm m_algorithm;
 };
 
+class Specification {
+public:
+    static Specification create(SpecificationParsingContext& ctx, XML::Node const* element);
+
+    void collect_into(TranslationUnitRef translation_unit);
+
+private:
+    void parse(SpecificationParsingContext& ctx, XML::Node const* element);
+
+    Vector<NonnullOwnPtr<SpecificationClause>> m_clauses;
+};
+
 class SpecParsingStep : public CompilationStep {
 public:
     SpecParsingStep();

+ 8 - 4
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp

@@ -479,24 +479,28 @@ ParseErrorOr<Tree> TextParser::parse_step_with_substeps(Tree substeps)
     return ParseError::create("Unable to parse step with substeps"sv, m_node);
 }
 
-ParseErrorOr<TextParser::DefinitionParseResult> TextParser::parse_definition()
+ParseErrorOr<ClauseHeader> TextParser::parse_clause_header()
 {
-    DefinitionParseResult result;
+    ClauseHeader result;
 
     auto section_number_token = TRY(consume_token_with_type(TokenType::SectionNumber));
     result.section_number = section_number_token->data;
 
-    result.function_name = TRY(consume_token())->data;
+    ClauseHeader::FunctionDefinition function_definition;
+
+    function_definition.name = TRY(consume_token())->data;
 
     TRY(consume_token_with_type(TokenType::ParenOpen));
     while (true) {
-        result.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier))->data });
+        function_definition.arguments.append({ TRY(consume_token_with_type(TokenType::Identifier))->data });
         auto next_token = TRY(consume_token_with_one_of_types({ TokenType::ParenClose, TokenType::Comma }));
         if (next_token->type == TokenType::ParenClose)
             break;
     }
     TRY(expect_eof());
 
+    result.header = function_definition;
+
     return result;
 }
 

+ 12 - 7
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.h

@@ -7,26 +7,31 @@
 #pragma once
 
 #include "AST/AST.h"
+#include "Function.h"
 #include "Parser/ParseError.h"
 #include "Parser/Token.h"
 
 namespace JSSpecCompiler {
 
-class TextParser {
-public:
-    struct DefinitionParseResult {
-        StringView section_number;
-        StringView function_name;
-        Vector<StringView> arguments;
+struct ClauseHeader {
+    struct FunctionDefinition {
+        StringView name;
+        Vector<FunctionArgument> arguments;
     };
 
+    StringView section_number;
+    Variant<AK::Empty, FunctionDefinition> header;
+};
+
+class TextParser {
+public:
     TextParser(Vector<Token>& tokens_, XML::Node const* node_)
         : m_tokens(tokens_)
         , m_node(node_)
     {
     }
 
-    ParseErrorOr<DefinitionParseResult> parse_definition();
+    ParseErrorOr<ClauseHeader> parse_clause_header();
     ParseErrorOr<Tree> parse_step_without_substeps();
     ParseErrorOr<Tree> parse_step_with_substeps(Tree substeps);
 

+ 1 - 1
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/main.cpp

@@ -161,7 +161,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
                 outln(stderr, "{}", function->m_ast);
             }
         }
-        if (step.dump_cfg && translation_unit.functions_to_compile()[0]->m_cfg != nullptr) {
+        if (step.dump_cfg && translation_unit.functions_to_compile().size() && translation_unit.functions_to_compile()[0]->m_cfg != nullptr) {
             outln(stderr, "===== CFG after {} =====", step.step->name());
             for (auto const& function : translation_unit.functions_to_compile()) {
                 outln(stderr, "{}({}):", function->m_name, function->m_arguments);