Sfoglia il codice sorgente

LibWeb: Add a super basic HTML syntax highlighter

This can currently highlight tag names and attribute names/values.
Ali Mohammad Pur 4 anni fa
parent
commit
97a230e4ef

+ 5 - 4
Userland/Libraries/LibSyntax/Highlighter.h

@@ -15,13 +15,14 @@
 namespace Syntax {
 
 enum class Language {
-    PlainText,
     Cpp,
-    JavaScript,
-    INI,
     GML,
-    Shell,
+    HTML,
+    INI,
+    JavaScript,
+    PlainText,
     SQL,
+    Shell,
 };
 
 struct TextStyle {

+ 1 - 0
Userland/Libraries/LibWeb/CMakeLists.txt

@@ -153,6 +153,7 @@ set(SOURCES
     HTML/Parser/ListOfActiveFormattingElements.cpp
     HTML/Parser/StackOfOpenElements.cpp
     HTML/SubmitEvent.cpp
+    HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
     HTML/TagNames.cpp
     HTML/WebSocket.cpp
     HighResolutionTime/Performance.cpp

+ 5 - 0
Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp

@@ -229,6 +229,11 @@ Optional<u32> HTMLTokenizer::peek_code_point(size_t offset) const
 
 Optional<HTMLToken> HTMLTokenizer::next_token()
 {
+    {
+        auto last_position = m_source_positions.last();
+        m_source_positions.clear();
+        m_source_positions.append(move(last_position));
+    }
 _StartOfFunction:
     if (!m_queued_tokens.is_empty())
         return m_queued_tokens.dequeue();

+ 4 - 0
Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h

@@ -110,6 +110,10 @@ public:
     Optional<HTMLToken> next_token();
 
     void switch_to(Badge<HTMLDocumentParser>, State new_state);
+    void switch_to(State new_state)
+    {
+        m_state = new_state;
+    }
 
     void set_blocked(bool b) { m_blocked = b; }
     bool is_blocked() const { return m_blocked; }

+ 147 - 0
Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp

@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
+#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
+
+namespace Web::HTML {
+
+enum class AugmentedTokenKind : u32 {
+    AttributeName,
+    AttributeValue,
+    OpenTag,
+    CloseTag,
+    Comment,
+    Doctype,
+};
+
+bool SyntaxHighlighter::is_identifier(void* token) const
+{
+    if (!token)
+        return false;
+    return false;
+}
+
+bool SyntaxHighlighter::is_navigatable(void*) const
+{
+    return false;
+}
+
+void SyntaxHighlighter::rehighlight(const Palette& palette)
+{
+    (void)palette;
+    auto text = m_client->get_text();
+
+    Vector<GUI::TextDocumentSpan> spans;
+    auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
+        spans.empend(
+            GUI::TextRange {
+                { start_line, start_column },
+                { end_line, end_column },
+            },
+            move(attributes),
+            (void*)kind,
+            false);
+    };
+
+    HTMLTokenizer tokenizer { text, "utf-8" };
+    [[maybe_unused]] enum class State {
+        HTML,
+        Javascript,
+        CSS,
+    } state { State::HTML };
+    for (;;) {
+        auto token = tokenizer.next_token();
+        if (!token.has_value())
+            break;
+
+        if (token->is_start_tag()) {
+            if (token->tag_name() == "script"sv) {
+                tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
+                state = State::Javascript;
+            } else if (token->tag_name() == "style"sv) {
+                tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
+                state = State::CSS;
+            }
+        } else if (token->is_end_tag()) {
+            if (token->tag_name().is_one_of("script"sv, "style"sv)) {
+                if (state == State::Javascript) {
+                    // FIXME: Highlight javascript code here instead.
+                } else if (state == State::CSS) {
+                    // FIXME: Highlight CSS code here instead.
+                }
+                state = State::HTML;
+            }
+        }
+
+        size_t token_start_offset = token->is_end_tag() ? 1 : 0;
+
+        if (token->is_comment()) {
+            highlight(
+                token->start_position().line,
+                token->start_position().column,
+                token->start_position().line,
+                token->start_position().column,
+                { palette.syntax_comment(), {} },
+                AugmentedTokenKind::Comment);
+        } else if (token->is_start_tag() || token->is_end_tag()) {
+            // FIXME: This breaks with single-character tag names.
+            highlight(
+                token->start_position().line,
+                token->start_position().column + token_start_offset,
+                token->start_position().line,
+                token->start_position().column + token->tag_name().length() + token_start_offset - 1,
+                { palette.syntax_keyword(), {} },
+                token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
+
+            for (auto& attribute : token->attributes()) {
+                highlight(
+                    attribute.name_start_position.line,
+                    attribute.name_start_position.column + token_start_offset,
+                    attribute.name_end_position.line,
+                    attribute.name_end_position.column + token_start_offset,
+                    { palette.syntax_identifier(), {} },
+                    AugmentedTokenKind::AttributeName);
+                highlight(
+                    attribute.value_start_position.line,
+                    attribute.value_start_position.column + token_start_offset,
+                    attribute.value_end_position.line,
+                    attribute.value_end_position.column + token_start_offset,
+                    { palette.syntax_string(), {} },
+                    AugmentedTokenKind::AttributeValue);
+            }
+        } else if (token->is_doctype()) {
+            highlight(
+                token->start_position().line,
+                token->start_position().column,
+                token->start_position().line,
+                token->start_position().column,
+                { palette.syntax_preprocessor_statement(), {} },
+                AugmentedTokenKind::Doctype);
+        }
+    }
+
+    m_client->do_set_spans(move(spans));
+    m_has_brace_buddies = false;
+    highlight_matching_token_pair();
+    m_client->do_update();
+}
+
+Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs() const
+{
+    static Vector<MatchingTokenPair> pairs;
+    if (pairs.is_empty()) {
+        pairs.append({ (void*)AugmentedTokenKind::OpenTag, (void*)AugmentedTokenKind::CloseTag });
+    }
+    return pairs;
+}
+
+bool SyntaxHighlighter::token_types_equal(void* token0, void* token1) const
+{
+    return token0 == token1;
+}
+
+}

+ 32 - 0
Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h

@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibSyntax/Highlighter.h>
+
+namespace Web::HTML {
+
+class SyntaxHighlighter : public Syntax::Highlighter {
+public:
+    SyntaxHighlighter() = default;
+    virtual ~SyntaxHighlighter() override = default;
+
+    virtual bool is_identifier(void*) const override;
+    virtual bool is_navigatable(void*) const override;
+
+    virtual Syntax::Language language() const override { return Syntax::Language::HTML; }
+    virtual void rehighlight(const Palette&) override;
+
+protected:
+    virtual Vector<MatchingTokenPair> matching_token_pairs() const override;
+    virtual bool token_types_equal(void*, void*) const override;
+
+    size_t m_line { 1 };
+    size_t m_column { 0 };
+};
+
+}