Преглед изворни кода

LibWebView: Use LibSyntax to highlight document source

This has no visible effect, but internally it's also highlighting any
CSS and JS embedded in the page, which will be made use of later. We'll
also be able to use this code for highlighting CSS or JS files directly
in the future.

It's not a perfect fit - the syntax highlighters give specific styles to
their spans, which we then ignore and just use their data integer to
figure out which CSS class to give to the span. It feels cleaner to me
to produce HTML styled that way, instead of every token having
`style="color: ...; font-weight: ...; text-decoration: ...;"` set on
it.

Most of this new `to_html_string()` code is adapted from Serenity's
`TextEditor::paint_event()`, so it should be pretty solid.
Sam Atkins пре 10 месеци
родитељ
комит
1db243c006

+ 1 - 1
Userland/Libraries/LibWebView/CMakeLists.txt

@@ -48,7 +48,7 @@ set(GENERATED_SOURCES
 )
 
 serenity_lib(LibWebView webview)
-target_link_libraries(LibWebView PRIVATE LibCore LibFileSystem LibGfx LibImageDecoderClient LibIPC LibRequests LibJS LibWeb LibUnicode LibURL)
+target_link_libraries(LibWebView PRIVATE LibCore LibFileSystem LibGfx LibImageDecoderClient LibIPC LibRequests LibJS LibWeb LibUnicode LibURL LibSyntax)
 target_compile_definitions(LibWebView PRIVATE ENABLE_PUBLIC_SUFFIX=$<BOOL:${ENABLE_PUBLIC_SUFFIX_DOWNLOAD}>)
 
 # Third-party

+ 199 - 64
Userland/Libraries/LibWebView/SourceHighlighter.cpp

@@ -1,16 +1,136 @@
 /*
  * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
 #include <AK/StringBuilder.h>
 #include <LibURL/URL.h>
-#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
+#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
 #include <LibWebView/SourceHighlighter.h>
 
 namespace WebView {
 
+SourceDocument::SourceDocument(StringView source)
+    : m_source(source)
+{
+    m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) {
+        m_lines.append(Syntax::TextDocumentLine { *this, line });
+    });
+}
+
+Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
+{
+    return m_lines[line_index];
+}
+
+Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
+{
+    return m_lines[line_index];
+}
+
+SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language)
+    : m_document(SourceDocument::create(source))
+{
+    // HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
+    //       Also, getting a palette from the chrome is nontrivial. So, create a dummy blank one and use that.
+    auto buffer = MUST(Core::AnonymousBuffer::create_with_size(sizeof(Gfx::SystemTheme)));
+    auto palette_impl = Gfx::PaletteImpl::create_with_anonymous_buffer(buffer);
+    Gfx::Palette dummy_palette { palette_impl };
+
+    switch (language) {
+    case Syntax::Language::HTML:
+        m_highlighter = make<Web::HTML::SyntaxHighlighter>();
+        break;
+    default:
+        break;
+    }
+
+    if (m_highlighter) {
+        m_highlighter->attach(*this);
+        m_highlighter->rehighlight(dummy_palette);
+    }
+}
+
+Vector<Syntax::TextDocumentSpan> const& SourceHighlighterClient::spans() const
+{
+    return document().spans();
+}
+
+void SourceHighlighterClient::set_span_at_index(size_t index, Syntax::TextDocumentSpan span)
+{
+    document().set_span_at_index(index, span);
+}
+
+Vector<Syntax::TextDocumentFoldingRegion>& SourceHighlighterClient::folding_regions()
+{
+    return document().folding_regions();
+}
+
+Vector<Syntax::TextDocumentFoldingRegion> const& SourceHighlighterClient::folding_regions() const
+{
+    return document().folding_regions();
+}
+
+ByteString SourceHighlighterClient::highlighter_did_request_text() const
+{
+    return document().text();
+}
+
+void SourceHighlighterClient::highlighter_did_request_update()
+{
+    // No-op
+}
+
+Syntax::Document& SourceHighlighterClient::highlighter_did_request_document()
+{
+    return document();
+}
+
+Syntax::TextPosition SourceHighlighterClient::highlighter_did_request_cursor() const
+{
+    return {};
+}
+
+void SourceHighlighterClient::highlighter_did_set_spans(Vector<Syntax::TextDocumentSpan> spans)
+{
+    document().set_spans(span_collection_index, move(spans));
+}
+
+void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax::TextDocumentFoldingRegion> folding_regions)
+{
+    document().set_folding_regions(move(folding_regions));
+}
+
+String highlight_source(URL::URL const& url, StringView source)
+{
+    SourceHighlighterClient highlighter_client { source, Syntax::Language::HTML };
+    return highlighter_client.to_html_string(url);
+}
+
+StringView SourceHighlighterClient::class_for_token(u64 token_type) const
+{
+    switch (static_cast<Web::HTML::AugmentedTokenKind>(token_type)) {
+    case Web::HTML::AugmentedTokenKind::AttributeName:
+        return "attribute-name"sv;
+    case Web::HTML::AugmentedTokenKind::AttributeValue:
+        return "attribute-value"sv;
+    case Web::HTML::AugmentedTokenKind::OpenTag:
+    case Web::HTML::AugmentedTokenKind::CloseTag:
+        return "tag"sv;
+    case Web::HTML::AugmentedTokenKind::Comment:
+        return "comment"sv;
+    case Web::HTML::AugmentedTokenKind::Doctype:
+        return "doctype"sv;
+    case Web::HTML::AugmentedTokenKind::__Count:
+    default:
+        break;
+    }
+
+    return "unknown"sv;
+}
+
 static String generate_style()
 {
     StringBuilder builder;
@@ -52,45 +172,12 @@ static String generate_style()
     return MUST(builder.to_string());
 }
 
-String highlight_source(URL::URL const& url, StringView source)
+String SourceHighlighterClient::to_html_string(URL::URL const& url) const
 {
-    Web::HTML::HTMLTokenizer tokenizer { source, "utf-8"sv };
     StringBuilder builder;
 
-    builder.append(R"~~~(
-<!DOCTYPE html>
-<html>
-<head>
-    <meta name="color-scheme" content="dark light">)~~~"sv);
-
-    builder.appendff("<title>View Source - {}</title>", url);
-    builder.appendff("<style type=\"text/css\">{}</style>", generate_style());
-    builder.append(R"~~~(
-</head>
-<body>
-<pre class="html">
-<span class="line">)~~~"sv);
-
-    size_t previous_position = 0;
-
-    auto append_source = [&](auto end_position, Optional<StringView> const& class_name = {}) {
-        if (end_position <= previous_position)
-            return;
-
-        auto segment = source.substring_view(previous_position, end_position - previous_position);
-
-        auto append_class_start = [&]() {
-            if (class_name.has_value())
-                builder.appendff("<span class=\"{}\">"sv, *class_name);
-        };
-        auto append_class_end = [&]() {
-            if (class_name.has_value())
-                builder.append("</span>"sv);
-        };
-
-        append_class_start();
-
-        for (auto code_point : Utf8View { segment }) {
+    auto append_escaped = [&](Utf32View text) {
+        for (auto code_point : text) {
             if (code_point == '&') {
                 builder.append("&amp;"sv);
             } else if (code_point == 0xA0) {
@@ -99,56 +186,104 @@ String highlight_source(URL::URL const& url, StringView source)
                 builder.append("&lt;"sv);
             } else if (code_point == '>') {
                 builder.append("&gt;"sv);
-            } else if (code_point == '\n') {
-                append_class_end();
-                builder.append("</span>\n<span class=\"line\">"sv);
-                append_class_start();
             } else {
                 builder.append_code_point(code_point);
             }
         }
+    };
 
-        append_class_end();
-        previous_position = end_position;
+    auto start_token = [&](u64 type) {
+        builder.appendff("<span class=\"{}\">", class_for_token(type));
+    };
+    auto end_token = [&]() {
+        builder.append("</span>"sv);
     };
 
-    for (auto token = tokenizer.next_token(); token.has_value(); token = tokenizer.next_token()) {
-        if (token->is_comment()) {
-            append_source(token->start_position().byte_offset);
-            append_source(token->end_position().byte_offset, "comment"sv);
-        } else if (token->is_start_tag() || token->is_end_tag()) {
-            auto tag_name_start = token->start_position().byte_offset;
+    builder.append(R"~~~(
+<!DOCTYPE html>
+<html>
+<head>
+    <meta name="color-scheme" content="dark light">)~~~"sv);
 
-            append_source(tag_name_start);
-            append_source(tag_name_start + token->tag_name().bytes().size(), "tag"sv);
+    builder.appendff("<title>View Source - {}</title>", escape_html_entities(MUST(url.to_string())));
+    builder.appendff("<style type=\"text/css\">{}</style>", generate_style());
+    builder.append(R"~~~(
+</head>
+<body>
+<pre class="html">)~~~"sv);
 
-            token->for_each_attribute([&](auto const& attribute) {
-                append_source(attribute.name_start_position.byte_offset);
-                append_source(attribute.name_end_position.byte_offset, "attribute-name"sv);
+    size_t span_index = 0;
+    for (size_t line_index = 0; line_index < document().line_count(); ++line_index) {
+        auto& line = document().line(line_index);
+        auto line_view = line.view();
+        builder.append("<div class=\"line\">"sv);
 
-                append_source(attribute.value_start_position.byte_offset);
-                append_source(attribute.value_end_position.byte_offset, "attribute-value"sv);
+        size_t next_column = 0;
 
-                return IterationDecision::Continue;
-            });
+        auto draw_text_helper = [&](size_t start, size_t end, Optional<Syntax::TextDocumentSpan const&> span) {
+            size_t length = end - start;
+            if (length == 0)
+                return;
+            auto text = line_view.substring_view(start, length);
+            if (span.has_value()) {
+                start_token(span->data);
+                append_escaped(text);
+                end_token();
+            } else {
+                append_escaped(text);
+            }
+        };
 
-            append_source(token->end_position().byte_offset);
-        } else {
-            append_source(token->end_position().byte_offset);
+        while (span_index < document().spans().size()) {
+            auto& span = document().spans()[span_index];
+            if (span.range.start().line() > line_index) {
+                // No more spans in this line, moving on
+                break;
+            }
+            size_t span_start;
+            if (span.range.start().line() < line_index) {
+                span_start = 0;
+            } else {
+                span_start = span.range.start().column();
+            }
+            size_t span_end;
+            bool span_consumed;
+            if (span.range.end().line() > line_index) {
+                span_end = line.length();
+                span_consumed = false;
+            } else {
+                span_end = span.range.end().column();
+                span_consumed = true;
+            }
 
-            if (token->is_end_of_file())
+            if (span_start != next_column) {
+                // Draw unspanned text between spans
+                draw_text_helper(next_column, span_start, {});
+            }
+            draw_text_helper(span_start, span_end, span);
+            next_column = span_end;
+            if (!span_consumed) {
+                // Continue with same span on next line
                 break;
+            } else {
+                ++span_index;
+            }
         }
+        // Draw unspanned text after last span
+        if (next_column < line.length()) {
+            draw_text_helper(next_column, line.length(), {});
+        }
+
+        builder.append("</div>"sv);
     }
 
     builder.append(R"~~~(
-</span>
 </pre>
 </body>
 </html>
 )~~~"sv);
 
-    return MUST(builder.to_string());
+    return builder.to_string_without_validation();
 }
 
 }

+ 57 - 0
Userland/Libraries/LibWebView/SourceHighlighter.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -8,9 +9,65 @@
 
 #include <AK/String.h>
 #include <AK/StringView.h>
+#include <LibSyntax/Document.h>
+#include <LibSyntax/HighlighterClient.h>
+#include <LibSyntax/Language.h>
 
 namespace WebView {
 
+class SourceDocument final : public Syntax::Document {
+public:
+    static NonnullRefPtr<SourceDocument> create(StringView source)
+    {
+        return adopt_ref(*new (nothrow) SourceDocument(source));
+    }
+    virtual ~SourceDocument() = default;
+
+    StringView text() const { return m_source; }
+    size_t line_count() const { return m_lines.size(); }
+
+    // ^ Syntax::Document
+    virtual Syntax::TextDocumentLine const& line(size_t line_index) const override;
+    virtual Syntax::TextDocumentLine& line(size_t line_index) override;
+
+private:
+    SourceDocument(StringView source);
+
+    // ^ Syntax::Document
+    virtual void update_views(Badge<Syntax::TextDocumentLine>) override { }
+
+    StringView m_source;
+    Vector<Syntax::TextDocumentLine> m_lines;
+};
+
+class SourceHighlighterClient final : public Syntax::HighlighterClient {
+public:
+    SourceHighlighterClient(StringView source, Syntax::Language);
+    virtual ~SourceHighlighterClient() = default;
+
+    String to_html_string(URL::URL const&) const;
+
+private:
+    // ^ Syntax::HighlighterClient
+    virtual Vector<Syntax::TextDocumentSpan> const& spans() const override;
+    virtual void set_span_at_index(size_t index, Syntax::TextDocumentSpan span) override;
+    virtual Vector<Syntax::TextDocumentFoldingRegion>& folding_regions() override;
+    virtual Vector<Syntax::TextDocumentFoldingRegion> const& folding_regions() const override;
+    virtual ByteString highlighter_did_request_text() const override;
+    virtual void highlighter_did_request_update() override;
+    virtual Syntax::Document& highlighter_did_request_document() override;
+    virtual Syntax::TextPosition highlighter_did_request_cursor() const override;
+    virtual void highlighter_did_set_spans(Vector<Syntax::TextDocumentSpan>) override;
+    virtual void highlighter_did_set_folding_regions(Vector<Syntax::TextDocumentFoldingRegion>) override;
+
+    StringView class_for_token(u64 token_type) const;
+
+    SourceDocument& document() const { return *m_document; }
+
+    NonnullRefPtr<SourceDocument> m_document;
+    OwnPtr<Syntax::Highlighter> m_highlighter;
+};
+
 String highlight_source(URL::URL const&, StringView);
 
 constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~(