1 年之前 · dee4978d67
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h
@@ -59,6 +59,7 @@ class ControlFlowGraph;
 
				 class RecursiveASTVisitor;
			
 
				 
			
 
				 // Parser/SpecParser.h
			
 
				+class SpecificationParsingContext;
			
 
				 class AlgorithmStep;
			
 
				 class AlgorithmStepList;
			
 
				 class Algorithm;
			
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
@@ -4,16 +4,17 @@
 
				  * SPDX-License-Identifier: BSD-2-Clause
			
 
				  */
			
 
				 
			
 
				-#include <AK/GenericLexer.h>
			
 
				 #include <AK/NonnullOwnPtr.h>
			
 
				+#include <LibXML/Parser/Parser.h>
			
 
				 
			
 
				 #include "Parser/Lexer.h"
			
 
				+#include "Parser/SpecParser.h"
			
 
				 #include "Parser/XMLUtils.h"
			
 
				 
			
 
				 namespace JSSpecCompiler {
			
 
				 
			
 
				 namespace {
			
 
				-Optional<Token> consume_number(GenericLexer& lexer, XML::Node const* node)
			
 
				+Optional<Token> consume_number(XML::LineTrackingLexer& lexer, XML::Node const* node, Location& location)
			
 
				 {
			
 
				     u64 start = lexer.tell();
			
 
				 
			
@@ -35,7 +36,7 @@ Optional<Token> consume_number(GenericLexer& lexer, XML::Node const* node)
 
				 
			
 
				     auto length = lexer.tell() - start;
			
 
				     lexer.retreat(length);
			
 
				-    return { Token { TokenType::Number, lexer.consume(length), node } };
			
 
				+    return { Token { TokenType::Number, lexer.consume(length), node, move(location) } };
			
 
				 }
			
 
				 
			
 
				 bool can_end_word_token(char c)
			
@@ -44,56 +45,68 @@ bool can_end_word_token(char c)
 
				 }
			
 
				 }
			
 
				 
			
 
				-ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens)
			
 
				+ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
			
 
				 {
			
 
				-#define CONSUME_IF_NEXT(view, type)                                      \
			
 
				-    if (lexer.next_is(view##sv)) {                                       \
			
 
				-        size_t length = __builtin_strlen(view);                          \
			
 
				-        tokens.append({ TokenType::type, lexer.consume(length), node }); \
			
 
				-        continue;                                                        \
			
 
				-    }
			
 
				+    static constexpr struct {
			
 
				+        StringView text_to_match;
			
 
				+        TokenType token_type;
			
 
				+    } choices[] = {
			
 
				+        { "-"sv, TokenType::AmbiguousMinus },
			
 
				+        { "}"sv, TokenType::BraceClose },
			
 
				+        { "{"sv, TokenType::BraceOpen },
			
 
				+        { ":"sv, TokenType::Colon },
			
 
				+        { ","sv, TokenType::Comma },
			
 
				+        { "/"sv, TokenType::Division },
			
 
				+        { ". "sv, TokenType::Dot },
			
 
				+        { ".\n"sv, TokenType::Dot },
			
 
				+        { "="sv, TokenType::Equals },
			
 
				+        { "is equal to"sv, TokenType::Equals },
			
 
				+        { "!"sv, TokenType::ExclamationMark },
			
 
				+        { ">"sv, TokenType::Greater },
			
 
				+        { "is"sv, TokenType::Is },
			
 
				+        { "<"sv, TokenType::Less },
			
 
				+        { "."sv, TokenType::MemberAccess },
			
 
				+        { "×"sv, TokenType::Multiplication },
			
 
				+        { "is not equal to"sv, TokenType::NotEquals },
			
 
				+        { "≠"sv, TokenType::NotEquals },
			
 
				+        { ")"sv, TokenType::ParenClose },
			
 
				+        { "("sv, TokenType::ParenOpen },
			
 
				+        { "+"sv, TokenType::Plus },
			
 
				+    };
			
 
				+
			
 
				+    XML::LineTrackingLexer lexer(view, node->offset);
			
 
				 
			
 
				-    GenericLexer lexer(view);
			
 
				     while (!lexer.is_eof()) {
			
 
				         lexer.ignore_while(is_ascii_space);
			
 
				 
			
 
				-        if (auto result = consume_number(lexer, node); result.has_value()) {
			
 
				+        // FIXME: This is incorrect since we count text offset after XML reference resolution. To do
			
 
				+        //        this properly, we need support from XML::Parser.
			
 
				+        Location token_location = ctx.location_from_xml_offset(lexer.offset_for(lexer.tell()));
			
 
				+
			
 
				+        if (auto result = consume_number(lexer, node, token_location); result.has_value()) {
			
 
				             tokens.append(result.release_value());
			
 
				             continue;
			
 
				         }
			
 
				 
			
 
				-        CONSUME_IF_NEXT("(", ParenOpen);
			
 
				-        CONSUME_IF_NEXT(")", ParenClose);
			
 
				-        CONSUME_IF_NEXT("{", BraceOpen);
			
 
				-        CONSUME_IF_NEXT("}", BraceClose);
			
 
				-        CONSUME_IF_NEXT(",", Comma);
			
 
				-        CONSUME_IF_NEXT(". ", Dot);
			
 
				-        CONSUME_IF_NEXT(".\n", Dot);
			
 
				-        CONSUME_IF_NEXT(":", Colon);
			
 
				-        CONSUME_IF_NEXT(".", MemberAccess);
			
 
				-        CONSUME_IF_NEXT("<", Less);
			
 
				-        CONSUME_IF_NEXT(">", Greater);
			
 
				-        CONSUME_IF_NEXT("is not equal to", NotEquals);
			
 
				-        CONSUME_IF_NEXT("≠", NotEquals);
			
 
				-        CONSUME_IF_NEXT("is equal to", Equals);
			
 
				-        CONSUME_IF_NEXT("=", Equals);
			
 
				-        CONSUME_IF_NEXT("+", Plus);
			
 
				-        CONSUME_IF_NEXT("-", AmbiguousMinus);
			
 
				-        CONSUME_IF_NEXT("×", Multiplication);
			
 
				-        CONSUME_IF_NEXT("/", Division);
			
 
				-        CONSUME_IF_NEXT("!", ExclamationMark);
			
 
				-        CONSUME_IF_NEXT("is", Is);
			
 
				+        bool matched = false;
			
 
				+        for (auto const& [text_to_match, token_type] : choices) {
			
 
				+            if (lexer.consume_specific(text_to_match)) {
			
 
				+                tokens.append({ token_type, ""sv, node, move(token_location) });
			
 
				+                matched = true;
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+        if (matched)
			
 
				+            continue;
			
 
				 
			
 
				         StringView word = lexer.consume_until(can_end_word_token);
			
 
				         if (word.length())
			
 
				-            tokens.append({ TokenType::Word, word, node });
			
 
				+            tokens.append({ TokenType::Word, word, node, move(token_location) });
			
 
				     }
			
 
				     return {};
			
 
				-
			
 
				-#undef CONSUME_IF_NEXT
			
 
				 }
			
 
				 
			
 
				-ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps)
			
 
				+ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
			
 
				 {
			
 
				     TokenizeTreeResult result;
			
 
				     auto& tokens = result.tokens;
			
@@ -104,8 +117,10 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
 
				                 if (result.substeps != nullptr)
			
 
				                     return ParseError::create("Substeps list must be the last non-empty child"sv, child);
			
 
				 
			
 
				+                Location child_location = ctx.location_from_xml_offset(child->offset);
			
 
				+
			
 
				                 if (element.name == tag_var) {
			
 
				-                    tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child });
			
 
				+                    tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
			
 
				                     return {};
			
 
				                 }
			
 
				 
			
@@ -113,24 +128,24 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
 
				                     auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
			
 
				                     if (element_class != class_secnum)
			
 
				                         return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
			
 
				-                    tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child });
			
 
				+                    tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
			
 
				                     return {};
			
 
				                 }
			
 
				 
			
 
				                 if (element.name == tag_emu_val) {
			
 
				                     auto contents = TRY(get_text_contents(child));
			
 
				                     if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
			
 
				-                        tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child });
			
 
				+                        tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
			
 
				                     else if (contents == "undefined")
			
 
				-                        tokens.append({ TokenType::Undefined, contents, child });
			
 
				+                        tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
			
 
				                     else
			
 
				-                        tokens.append({ TokenType::Identifier, contents, child });
			
 
				+                        tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
			
 
				                     return {};
			
 
				                 }
			
 
				 
			
 
				                 if (element.name == tag_emu_xref) {
			
 
				                     auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
			
 
				-                    tokens.append({ TokenType::Identifier, contents, child });
			
 
				+                    tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
			
 
				                     return {};
			
 
				                 }
			
 
				 
			
@@ -147,7 +162,7 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
 
				                 auto view = text.builder.string_view();
			
 
				                 if (result.substeps && !contains_empty_text(child))
			
 
				                     return ParseError::create("Substeps list must be the last non-empty child"sv, child);
			
 
				-                return tokenize_string(child, view, tokens);
			
 
				+                return tokenize_string(ctx, child, view, tokens);
			
 
				             },
			
 
				             move(ignore_comments)));
			
 
				     }
			
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h
@@ -31,13 +31,13 @@ inline constexpr StringView attribute_id = "id"sv;
 
				 
			
 
				 inline constexpr StringView class_secnum = "secnum"sv;
			
 
				 
			
 
				-ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens);
			
 
				+ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens);
			
 
				 
			
 
				 struct TokenizeTreeResult {
			
 
				     Vector<Token> tokens;
			
 
				     XML::Node const* substeps = nullptr;
			
 
				 };
			
 
				 
			
 
				-ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps = false);
			
 
				+ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false);
			
 
				 
			
 
				 }
			
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp
@@ -64,7 +64,7 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
 
				 {
			
 
				     VERIFY(element->as_element().name == tag_li);
			
 
				 
			
 
				-    auto tokenization_result = tokenize_tree(element, true);
			
 
				+    auto tokenization_result = tokenize_tree(ctx, element, true);
			
 
				     if (tokenization_result.is_error()) {
			
 
				         ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
			
 
				             "{}", tokenization_result.error()->to_string());
			
@@ -253,7 +253,7 @@ void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
 
				 ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
			
 
				 {
			
 
				     VERIFY(element->as_element().name == tag_h1);
			
 
				-    auto tokens = TRY(tokenize_tree(element));
			
 
				+    auto tokens = TRY(tokenize_tree(*m_ctx_pointer, element));
			
 
				     TextParser parser(tokens.tokens, element);
			
 
				     m_header = TRY(parser.parse_clause_header());
			
 
				     return {};
			
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp
@@ -226,7 +226,7 @@ ParseErrorOr<Tree> TextParser::parse_expression()
 
				 
			
 
				         if (token.type == TokenType::ParenOpen) {
			
 
				             if (last_element_type == ExpressionType)
			
 
				-                stack.append(Token { TokenType::FunctionCall, ""sv, m_node });
			
 
				+                stack.append(Token { TokenType::FunctionCall, ""sv, token.node, token.location });
			
 
				             stack.append(token);
			
 
				 
			
 
				             if (m_next_token_index + 1 < m_tokens.size()
			
--- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h
+++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h
@@ -9,6 +9,7 @@
 
				 #include <LibXML/Forward.h>
			
 
				 
			
 
				 #include "AST/AST.h"
			
 
				+#include "DiagnosticEngine.h"
			
 
				 
			
 
				 namespace JSSpecCompiler {
			
 
				 
			
@@ -46,7 +47,6 @@ constexpr i32 closing_bracket_precedence = 18;
 
				     F(Multiplication, 5, Invalid, Multiplication, Invalid)    \
			
 
				     F(Division, 5, Invalid, Division, Invalid)                \
			
 
				     F(FunctionCall, 2, Invalid, FunctionCall, Invalid)        \
			
 
				-    F(ArraySubscript, 2, Invalid, ArraySubscript, Invalid)    \
			
 
				     F(ExclamationMark, 3, AssertCompletion, Invalid, Invalid) \
			
 
				     F(Is, -1, Invalid, Invalid, Invalid)
			
 
				 
			
@@ -110,6 +110,7 @@ struct Token {
 
				     TokenType type;
			
 
				     StringView data;
			
 
				     XML::Node const* node;
			
 
				+    Location location;
			
 
				 };
			
 
				 
			
 
				 }
			
--- a/Userland/Libraries/LibXML/Parser/Parser.h
+++ b/Userland/Libraries/LibXML/Parser/Parser.h
@@ -39,10 +39,21 @@ struct Listener {
 
				     virtual void error(ParseError const&) { }
			
 
				 };
			
 
				 
			
 
				+// FIXME: This is also used in JSSpecCompiler, so should probably live in AK or even merged with
			
 
				+//        AK::GenericLexer.
			
 
				 class LineTrackingLexer : public GenericLexer {
			
 
				 public:
			
 
				     using GenericLexer::GenericLexer;
			
 
				 
			
 
				+    LineTrackingLexer(StringView input, XML::Offset start_offset)
			
 
				+        : GenericLexer(input)
			
 
				+        , m_cached_offset {
			
 
				+            .line = start_offset.line,
			
 
				+            .column = start_offset.column,
			
 
				+        }
			
 
				+    {
			
 
				+    }
			
 
				+
			
 
				     Offset cached_offset() const { return m_cached_offset; }
			
 
				     void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; }
			
 
				     Offset offset_for(size_t) const;