Forráskód Böngészése

JSSpecCompiler: Get rid of ParseError in Lexer

Dan Klishch 1 éve
szülő
commit
9a2337f7ad

+ 92 - 36
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp

@@ -43,9 +43,8 @@ bool can_end_word_token(char c)
 {
     return is_ascii_space(c) || ".,"sv.contains(c);
 }
-}
 
-ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
+void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
 {
     static constexpr struct {
         StringView text_to_match;
@@ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c
         if (word.length())
             tokens.append({ TokenType::Word, word, node, move(token_location) });
     }
-    return {};
 }
 
-ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
+enum class TreeType {
+    AlgorithmStep,
+    Header,
+};
+
+struct TokenizerState {
+    Vector<Token> tokens;
+    XML::Node const* substeps = nullptr;
+    bool has_errors = false;
+};
+
+void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type)
 {
-    TokenizeTreeResult result;
-    auto& tokens = result.tokens;
+    // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
+    auto& tokens = state.tokens;
+    auto& substeps = state.substeps;
+    auto& has_errors = state.has_errors;
 
     for (auto const& child : node->as_element().children) {
-        TRY(child->content.visit(
-            [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
-                if (result.substeps != nullptr)
-                    return ParseError::create("Substeps list must be the last non-empty child"sv, child);
+        if (has_errors)
+            break;
 
+        child->content.visit(
+            [&](XML::Node::Element const& element) -> void {
                 Location child_location = ctx.location_from_xml_offset(child->offset);
+                auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) {
+                    ctx.diag().error(child_location, move(fmt), parameters...);
+                    has_errors = true;
+                };
+
+                if (substeps) {
+                    report_error("substeps list must be the last child of algorithm step");
+                    return;
+                }
 
                 if (element.name == tag_var) {
-                    tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
-                    return {};
-                }
+                    auto variable_name = get_text_contents(child);
+                    if (!variable_name.has_value())
+                        report_error("malformed <var> subtree, expected single text child node");
 
-                if (element.name == tag_span) {
-                    auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
-                    if (element_class != class_secnum)
-                        return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
-                    tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
-                    return {};
+                    tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) });
+                    return;
                 }
 
                 if (element.name == tag_emu_val) {
-                    auto contents = TRY(get_text_contents(child));
+                    auto maybe_contents = get_text_contents(child);
+                    if (!maybe_contents.has_value())
+                        report_error("malformed <emu-val> subtree, expected single text child node");
+
+                    auto contents = maybe_contents.value_or(""sv);
+
                     if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
                         tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
                     else if (contents == "undefined")
                         tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
                     else
                         tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
-                    return {};
+                    return;
                 }
 
                 if (element.name == tag_emu_xref) {
-                    auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
-                    tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
-                    return {};
+                    auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) {
+                        return get_text_contents(node).value_or(""sv);
+                    });
+                    if (!identifier.has_value() || identifier.value().is_empty())
+                        report_error("malformed <emu-xref> subtree, expected <a> with nested single text node");
+
+                    tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) });
+                    return;
                 }
 
-                if (element.name == tag_ol) {
-                    if (!allow_substeps)
-                        return ParseError::create("Found nested list but substeps are not allowed"sv, child);
-                    result.substeps = child;
-                    return {};
+                if (tree_type == TreeType::Header && element.name == tag_span) {
+                    auto element_class = get_attribute_by_name(child, attribute_class);
+                    if (element_class != class_secnum)
+                        report_error("expected <span> to have class='secnum' attribute");
+
+                    auto section_number = get_text_contents(child);
+                    if (!section_number.has_value())
+                        report_error("malformed section number span subtree, expected single text child node");
+
+                    tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) });
+                    return;
                 }
 
-                return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child);
+                if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) {
+                    substeps = child;
+                    return;
+                }
+
+                report_error("<{}> should not be a child of algorithm step", element.name);
             },
-            [&](XML::Node::Text const& text) -> ParseErrorOr<void> {
+            [&](XML::Node::Text const& text) {
                 auto view = text.builder.string_view();
-                if (result.substeps && !contains_empty_text(child))
-                    return ParseError::create("Substeps list must be the last non-empty child"sv, child);
-                return tokenize_string(ctx, child, view, tokens);
+                if (substeps != nullptr && !contains_empty_text(child)) {
+                    ctx.diag().error(ctx.location_from_xml_offset(child->offset),
+                        "substeps list must be the last child of algorithm step");
+                } else {
+                    tokenize_string(ctx, child, view, tokens);
+                }
             },
-            move(ignore_comments)));
+            [&](auto const&) {});
     }
 
     if (tokens.size() && tokens.last().type == TokenType::MemberAccess)
         tokens.last().type = TokenType::Dot;
+}
+}
 
-    return result;
+StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node)
+{
+    TokenizerState state;
+    tokenize_tree(ctx, state, node, TreeType::AlgorithmStep);
+    return {
+        .tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) },
+        .substeps = state.substeps,
+    };
+}
+
+Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node)
+{
+    TokenizerState state;
+    tokenize_tree(ctx, state, node, TreeType::Header);
+    return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens };
 }
 
 }

+ 4 - 5
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h

@@ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv;
 
 inline constexpr StringView class_secnum = "secnum"sv;
 
-ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens);
-
-struct TokenizeTreeResult {
-    Vector<Token> tokens;
+struct StepTokenizationResult {
+    Optional<Vector<Token>> tokens;
     XML::Node const* substeps = nullptr;
 };
 
-ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false);
+StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node);
+Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node);
 
 }

+ 15 - 18
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp

@@ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
 {
     VERIFY(element->as_element().name == tag_li);
 
-    auto tokenization_result = tokenize_tree(ctx, element, true);
-    if (tokenization_result.is_error()) {
-        ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
-            "{}", tokenization_result.error()->to_string());
-        return {};
-    }
+    auto [maybe_tokens, substeps] = tokenize_step(ctx, element);
 
-    auto [tokens, substeps] = tokenization_result.release_value();
     AlgorithmStep result(ctx);
-    result.m_tokens = move(tokens);
     result.m_node = element;
 
     if (substeps) {
@@ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
         result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
     }
 
+    if (!maybe_tokens.has_value())
+        return {};
+    result.m_tokens = maybe_tokens.release_value();
+
     if (!result.parse())
         return {};
     return result;
@@ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node
     auto& ctx = *m_ctx_pointer;
     VERIFY(element->as_element().name == tag_h1);
 
-    auto tokenization_result = tokenize_tree(ctx, element, false);
-    if (tokenization_result.is_error()) {
-        return FailedTextParseDiagnostic {
-            ctx.location_from_xml_offset(tokenization_result.error()->offset()),
-            tokenization_result.error()->to_string()
-        };
-    }
-    auto const& tokens = tokenization_result.release_value().tokens;
+    auto maybe_tokens = tokenize_header(ctx, element);
+    if (!maybe_tokens.has_value())
+        return {};
+
+    auto const& tokens = maybe_tokens.release_value();
 
     TextParser parser(ctx, tokens, element);
     auto parse_result = parser.parse_clause_header();
@@ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element)
     auto& ctx = context();
     u32 child_index = 0;
 
+    bool node_ignored_warning_issued = false;
     Optional<FailedTextParseDiagnostic> header_parse_error;
 
     for (auto const& child : element->as_element().children) {
@@ -312,10 +307,12 @@ void SpecificationClause::parse(XML::Node const* element)
                         m_subclauses.append(create(ctx, child));
                         return;
                     }
-                    if (header_parse_error.has_value()) {
+                    if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) {
+                        node_ignored_warning_issued = true;
                         ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
                             "node content will be ignored since section header was not parsed successfully");
-                        ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
+                        if (header_parse_error.has_value())
+                            ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
                     }
                 }
                 ++child_index;

+ 12 - 26
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.cpp

@@ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node)
     return node->as_text().builder.string_view().trim_whitespace().is_empty();
 }
 
-ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name)
-{
-    auto const& attribute = node->as_element().attributes.get(attribute_name);
-
-    if (!attribute.has_value())
-        return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node);
-    return attribute.value();
-}
-
 Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name)
 {
     auto const& attribute = node->as_element().attributes.get(attribute_name);
@@ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att
     return attribute.value();
 }
 
-ParseErrorOr<StringView> get_text_contents(XML::Node const* node)
+Optional<StringView> get_text_contents(XML::Node const* node)
 {
     auto const& children = node->as_element().children;
-
     if (children.size() != 1 || !children[0]->is_text())
-        return ParseError::create("Expected single text node in a child list of the node"sv, node);
+        return {};
     return children[0]->as_text().builder.string_view();
 }
 
-ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name)
+Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name)
 {
     XML::Node const* result = nullptr;
 
     for (auto const& child : element->as_element().children) {
-        TRY(child->content.visit(
-            [&](XML::Node::Element const& element) -> ParseErrorOr<void> {
-                if (element.name != tag_name)
-                    return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child);
-                if (result != nullptr)
-                    return ParseError::create("Element must have only one child"sv, child);
+        auto is_valid = child->content.visit(
+            [&](XML::Node::Element const& element) {
                 result = child;
-                return {};
+                return result != nullptr || element.name != tag_name;
             },
-            [&](XML::Node::Text const&) -> ParseErrorOr<void> {
-                if (!contains_empty_text(child))
-                    return ParseError::create("Element should not have non-empty child text nodes"sv, element);
-                return {};
+            [&](XML::Node::Text const&) {
+                return contains_empty_text(child);
             },
-            move(ignore_comments)));
+            [&](auto const&) { return true; });
+        if (!is_valid)
+            return {};
     }
 
     if (result == nullptr)
-        return ParseError::create(String::formatted("Element must have only one child"), element);
+        return {};
     return result;
 }
 

+ 2 - 3
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/XMLUtils.h

@@ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {};
 
 bool contains_empty_text(XML::Node const* node);
 
-ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name);
 Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name);
 
-ParseErrorOr<StringView> get_text_contents(XML::Node const* node);
+Optional<StringView> get_text_contents(XML::Node const* node);
 
-ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name);
+Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name);
 
 }