Selaa lähdekoodia

LibGUi: Rework GML parser into a lexer+parser

This will make it easier to add GML syntax highlighting. :^)
Andreas Kling 4 vuotta sitten
vanhempi
commit
3d3084f088

+ 1 - 0
Libraries/LibGUI/CMakeLists.txt

@@ -30,6 +30,7 @@ set(SOURCES
     FileSystemModel.cpp
     FilteringProxyModel.cpp
     Frame.cpp
+    GMLLexer.cpp
     GMLParser.cpp
     GroupBox.cpp
     HeaderView.cpp

+ 180 - 0
Libraries/LibGUI/GMLLexer.cpp

@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "GMLLexer.h"
+#include <AK/Vector.h>
+#include <ctype.h>
+
+namespace GUI {
+
+GMLLexer::GMLLexer(const StringView& input)
+    : m_input(input)
+{
+}
+
+char GMLLexer::peek(size_t offset) const
+{
+    if ((m_index + offset) >= m_input.length())
+        return 0;
+    return m_input[m_index + offset];
+}
+
+char GMLLexer::consume()
+{
+    ASSERT(m_index < m_input.length());
+    char ch = m_input[m_index++];
+    m_previous_position = m_position;
+    if (ch == '\n') {
+        m_position.line++;
+        m_position.column = 0;
+    } else {
+        m_position.column++;
+    }
+    return ch;
+}
+
+static bool is_valid_identifier_start(char ch)
+{
+    return isalpha(ch) || ch == '_';
+}
+
+static bool is_valid_identifier_character(char ch)
+{
+    return isalnum(ch) || ch == '_';
+}
+
+static bool is_valid_class_start(char ch)
+{
+    return isalpha(ch) || ch == '_';
+}
+
+static bool is_valid_class_character(char ch)
+{
+    return isalnum(ch) || ch == '_' || ch == ':';
+}
+
+Vector<GMLToken> GMLLexer::lex()
+{
+    Vector<GMLToken> tokens;
+
+    size_t token_start_index = 0;
+    GMLPosition token_start_position;
+
+    auto begin_token = [&] {
+        token_start_index = m_index;
+        token_start_position = m_position;
+    };
+
+    auto commit_token = [&](auto type) {
+        GMLToken token;
+        token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index);
+        token.m_type = type;
+        token.m_start = token_start_position;
+        token.m_end = m_previous_position;
+        tokens.append(token);
+    };
+
+    auto consume_class = [&] {
+        begin_token();
+        consume();
+        commit_token(GMLToken::Type::ClassMarker);
+        begin_token();
+        while (is_valid_class_character(peek()))
+            consume();
+        commit_token(GMLToken::Type::ClassName);
+    };
+
+    while (m_index < m_input.length()) {
+        if (isspace(peek(0))) {
+            begin_token();
+            while (isspace(peek()))
+                consume();
+            continue;
+        }
+
+        // C++ style comments
+        if (peek(0) && peek(0) == '/' && peek(1) == '/') {
+            begin_token();
+            while (peek() && peek() != '\n')
+                consume();
+            commit_token(GMLToken::Type::Comment);
+            continue;
+        }
+
+        if (peek(0) == '{') {
+            begin_token();
+            consume();
+            commit_token(GMLToken::Type::LeftCurly);
+            continue;
+        }
+
+        if (peek(0) == '}') {
+            begin_token();
+            consume();
+            commit_token(GMLToken::Type::RightCurly);
+            continue;
+        }
+
+        if (peek(0) == '@' && is_valid_class_start(peek(1))) {
+            consume_class();
+            continue;
+        }
+
+        if (is_valid_identifier_start(peek(0))) {
+            begin_token();
+            consume();
+            while (is_valid_identifier_character(peek(0)))
+                consume();
+            commit_token(GMLToken::Type::Identifier);
+            continue;
+        }
+
+        if (peek(0) == ':') {
+            begin_token();
+            consume();
+            commit_token(GMLToken::Type::Colon);
+
+            while (isspace(peek()))
+                consume();
+
+            if (peek(0) == '@' && is_valid_class_start(peek(1))) {
+                consume_class();
+            } else {
+                begin_token();
+                while (peek() && peek() != '\n')
+                    consume();
+                commit_token(GMLToken::Type::JsonValue);
+            }
+            continue;
+        }
+
+        consume();
+        commit_token(GMLToken::Type::Unknown);
+    }
+    return tokens;
+}
+
+}

+ 90 - 0
Libraries/LibGUI/GMLLexer.h

@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/StringView.h>
+
+namespace GUI {
+
+#define FOR_EACH_TOKEN_TYPE \
+    __TOKEN(Unknown)        \
+    __TOKEN(Comment)        \
+    __TOKEN(ClassMarker)    \
+    __TOKEN(ClassName)      \
+    __TOKEN(LeftCurly)      \
+    __TOKEN(RightCurly)     \
+    __TOKEN(Identifier)     \
+    __TOKEN(Colon)          \
+    __TOKEN(JsonValue)
+
+struct GMLPosition {
+    size_t line;
+    size_t column;
+};
+
+struct GMLToken {
+    enum class Type {
+#define __TOKEN(x) x,
+        FOR_EACH_TOKEN_TYPE
+#undef __TOKEN
+    };
+
+    const char* to_string() const
+    {
+        switch (m_type) {
+#define __TOKEN(x) \
+    case Type::x:  \
+        return #x;
+            FOR_EACH_TOKEN_TYPE
+#undef __TOKEN
+        }
+        ASSERT_NOT_REACHED();
+    }
+
+    Type m_type { Type::Unknown };
+    StringView m_view;
+    GMLPosition m_start;
+    GMLPosition m_end;
+};
+
+class GMLLexer {
+public:
+    GMLLexer(const StringView&);
+
+    Vector<GMLToken> lex();
+
+private:
+    char peek(size_t offset = 0) const;
+    char consume();
+
+    StringView m_input;
+    size_t m_index { 0 };
+    GMLPosition m_previous_position { 0, 0 };
+    GMLPosition m_position { 0, 0 };
+};
+
+}

+ 52 - 49
Libraries/LibGUI/GMLParser.cpp

@@ -27,87 +27,81 @@
 #include <AK/GenericLexer.h>
 #include <AK/JsonObject.h>
 #include <AK/JsonValue.h>
+#include <AK/Queue.h>
+#include <LibGUI/GMLLexer.h>
 #include <LibGUI/GMLParser.h>
 #include <ctype.h>
 
 namespace GUI {
 
-static bool is_valid_class_name_character(char ch)
-{
-    return isalpha(ch) || ch == ':';
-}
-
-static bool is_valid_property_name_character(char ch)
-{
-    return isalpha(ch) || ch == '_';
-}
-
-static void swallow_whitespace(GenericLexer& scanner)
-{
-    scanner.consume_while([](auto ch) { return isspace(ch); });
-}
-
-static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
+static Optional<JsonValue> parse_core_object(Queue<GMLToken>& tokens)
 {
     JsonObject object;
     JsonArray children;
 
-    // '@Foo' means new Core::Object of class Foo
-    if (!scanner.consume_specific('@')) {
-        dbgln("Expected '@'");
+    auto peek = [&] {
+        if (tokens.is_empty())
+            return GMLToken::Type::Unknown;
+        return tokens.head().m_type;
+    };
+
+    if (peek() != GMLToken::Type::ClassMarker) {
+        dbgln("Expected class marker");
         return {};
     }
 
-    auto class_name = scanner.consume_while([](auto ch) { return is_valid_class_name_character(ch); });
-    object.set("class", JsonValue(class_name));
-
-    swallow_whitespace(scanner);
+    tokens.dequeue();
 
-    if (!scanner.consume_specific('{')) {
-        dbgln("Expected '{{'");
+    if (peek() != GMLToken::Type::ClassName) {
+        dbgln("Expected class name");
         return {};
     }
 
-    swallow_whitespace(scanner);
+    auto class_name = tokens.dequeue();
+    object.set("class", JsonValue(class_name.m_view));
 
-    for (;;) {
-        swallow_whitespace(scanner);
+    if (peek() != GMLToken::Type::LeftCurly) {
+        dbgln("Expected {{");
+        return {};
+    }
+    tokens.dequeue();
 
-        if (scanner.peek() == '}') {
+    for (;;) {
+        if (peek() == GMLToken::Type::RightCurly) {
             // End of object
             break;
         }
 
-        if (scanner.peek() == '@') {
+        if (peek() == GMLToken::Type::ClassMarker) {
             // It's a child object.
-            auto value = parse_core_object(scanner);
-            if (!value.has_value())
+            auto value = parse_core_object(tokens);
+            if (!value.has_value()) {
+                dbgln("Parsing child object failed");
                 return {};
+            }
             if (!value.value().is_object()) {
                 dbgln("Expected child to be Core::Object");
                 return {};
             }
             children.append(value.release_value());
-        } else {
+        } else if (peek() == GMLToken::Type::Identifier) {
             // It's a property.
-            auto property_name = scanner.consume_while([](auto ch) { return is_valid_property_name_character(ch); });
-            swallow_whitespace(scanner);
+            auto property_name = tokens.dequeue();
 
-            if (property_name.is_empty()) {
+            if (property_name.m_view.is_empty()) {
                 dbgln("Expected non-empty property name");
                 return {};
             }
 
-            if (!scanner.consume_specific(':')) {
+            if (peek() != GMLToken::Type::Colon) {
                 dbgln("Expected ':'");
                 return {};
             }
-
-            swallow_whitespace(scanner);
+            tokens.dequeue();
 
             JsonValue value;
-            if (scanner.peek() == '@') {
-                auto parsed_value = parse_core_object(scanner);
+            if (peek() == GMLToken::Type::ClassMarker) {
+                auto parsed_value = parse_core_object(tokens);
                 if (!parsed_value.has_value())
                     return {};
                 if (!parsed_value.value().is_object()) {
@@ -115,23 +109,27 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
                     return {};
                 }
                 value = parsed_value.release_value();
-            } else {
-                auto value_string = scanner.consume_line();
-                auto parsed_value = JsonValue::from_string(value_string);
+            } else if (peek() == GMLToken::Type::JsonValue) {
+                auto value_string = tokens.dequeue();
+                auto parsed_value = JsonValue::from_string(value_string.m_view);
                 if (!parsed_value.has_value()) {
                     dbgln("Expected property to be JSON value");
                     return {};
                 }
                 value = parsed_value.release_value();
             }
-            object.set(property_name, move(value));
+            object.set(property_name.m_view, move(value));
+        } else {
+            dbgln("Expected child, property, or }}");
+            return {};
         }
     }
 
-    if (!scanner.consume_specific('}')) {
-        dbgln("Expected '}'");
+    if (peek() != GMLToken::Type::RightCurly) {
+        dbgln("Expected }}");
         return {};
     }
+    tokens.dequeue();
 
     if (!children.is_empty())
         object.set("children", move(children));
@@ -141,8 +139,13 @@ static Optional<JsonValue> parse_core_object(GenericLexer& scanner)
 
 JsonValue parse_gml(const StringView& string)
 {
-    GenericLexer scanner(string);
-    auto root = parse_core_object(scanner);
+    auto lexer = GMLLexer(string);
+
+    Queue<GMLToken> tokens;
+    for (auto& token : lexer.lex())
+        tokens.enqueue(token);
+
+    auto root = parse_core_object(tokens);
 
     if (!root.has_value())
         return JsonValue();