SourceHighlighter.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. * Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/StringBuilder.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibJS/Token.h>
  10. #include <LibURL/URL.h>
  11. #include <LibWeb/CSS/Parser/Token.h>
  12. #include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
  13. #include <LibWeb/DOMURL/DOMURL.h>
  14. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  15. #include <LibWebView/SourceHighlighter.h>
  16. namespace WebView {
  17. SourceDocument::SourceDocument(StringView source)
  18. : m_source(source)
  19. {
  20. m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) {
  21. m_lines.append(Syntax::TextDocumentLine { *this, line });
  22. });
  23. }
  24. Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
  25. {
  26. return m_lines[line_index];
  27. }
  28. Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
  29. {
  30. return m_lines[line_index];
  31. }
  32. SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language)
  33. : m_document(SourceDocument::create(source))
  34. {
  35. // HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
  36. // Also, getting a palette from the chrome is nontrivial. So, create a dummy blank one and use that.
  37. auto buffer = MUST(Core::AnonymousBuffer::create_with_size(sizeof(Gfx::SystemTheme)));
  38. auto palette_impl = Gfx::PaletteImpl::create_with_anonymous_buffer(buffer);
  39. Gfx::Palette dummy_palette { palette_impl };
  40. switch (language) {
  41. case Syntax::Language::CSS:
  42. m_highlighter = make<Web::CSS::SyntaxHighlighter>();
  43. break;
  44. case Syntax::Language::HTML:
  45. m_highlighter = make<Web::HTML::SyntaxHighlighter>();
  46. break;
  47. case Syntax::Language::JavaScript:
  48. m_highlighter = make<JS::SyntaxHighlighter>();
  49. break;
  50. default:
  51. break;
  52. }
  53. if (m_highlighter) {
  54. m_highlighter->attach(*this);
  55. m_highlighter->rehighlight(dummy_palette);
  56. }
  57. }
  58. Vector<Syntax::TextDocumentSpan> const& SourceHighlighterClient::spans() const
  59. {
  60. return document().spans();
  61. }
  62. void SourceHighlighterClient::set_span_at_index(size_t index, Syntax::TextDocumentSpan span)
  63. {
  64. document().set_span_at_index(index, span);
  65. }
  66. Vector<Syntax::TextDocumentFoldingRegion>& SourceHighlighterClient::folding_regions()
  67. {
  68. return document().folding_regions();
  69. }
  70. Vector<Syntax::TextDocumentFoldingRegion> const& SourceHighlighterClient::folding_regions() const
  71. {
  72. return document().folding_regions();
  73. }
  74. ByteString SourceHighlighterClient::highlighter_did_request_text() const
  75. {
  76. return document().text();
  77. }
  78. void SourceHighlighterClient::highlighter_did_request_update()
  79. {
  80. // No-op
  81. }
  82. Syntax::Document& SourceHighlighterClient::highlighter_did_request_document()
  83. {
  84. return document();
  85. }
  86. Syntax::TextPosition SourceHighlighterClient::highlighter_did_request_cursor() const
  87. {
  88. return {};
  89. }
  90. void SourceHighlighterClient::highlighter_did_set_spans(Vector<Syntax::TextDocumentSpan> spans)
  91. {
  92. document().set_spans(span_collection_index, move(spans));
  93. }
  94. void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax::TextDocumentFoldingRegion> folding_regions)
  95. {
  96. document().set_folding_regions(move(folding_regions));
  97. }
  98. String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView source, Syntax::Language language, HighlightOutputMode mode)
  99. {
  100. SourceHighlighterClient highlighter_client { source, language };
  101. return highlighter_client.to_html_string(url, base_url, mode);
  102. }
  103. StringView SourceHighlighterClient::class_for_token(u64 token_type) const
  104. {
  105. auto class_for_css_token = [](u64 token_type) {
  106. switch (static_cast<Web::CSS::Parser::Token::Type>(token_type)) {
  107. case Web::CSS::Parser::Token::Type::Invalid:
  108. case Web::CSS::Parser::Token::Type::BadString:
  109. case Web::CSS::Parser::Token::Type::BadUrl:
  110. return "invalid"sv;
  111. case Web::CSS::Parser::Token::Type::Ident:
  112. return "identifier"sv;
  113. case Web::CSS::Parser::Token::Type::Function:
  114. return "function"sv;
  115. case Web::CSS::Parser::Token::Type::AtKeyword:
  116. return "at-keyword"sv;
  117. case Web::CSS::Parser::Token::Type::Hash:
  118. return "hash"sv;
  119. case Web::CSS::Parser::Token::Type::String:
  120. return "string"sv;
  121. case Web::CSS::Parser::Token::Type::Url:
  122. return "url"sv;
  123. case Web::CSS::Parser::Token::Type::Number:
  124. case Web::CSS::Parser::Token::Type::Dimension:
  125. case Web::CSS::Parser::Token::Type::Percentage:
  126. return "number"sv;
  127. case Web::CSS::Parser::Token::Type::Whitespace:
  128. return "whitespace"sv;
  129. case Web::CSS::Parser::Token::Type::Delim:
  130. case Web::CSS::Parser::Token::Type::Colon:
  131. case Web::CSS::Parser::Token::Type::Semicolon:
  132. case Web::CSS::Parser::Token::Type::Comma:
  133. case Web::CSS::Parser::Token::Type::OpenSquare:
  134. case Web::CSS::Parser::Token::Type::CloseSquare:
  135. case Web::CSS::Parser::Token::Type::OpenParen:
  136. case Web::CSS::Parser::Token::Type::CloseParen:
  137. case Web::CSS::Parser::Token::Type::OpenCurly:
  138. case Web::CSS::Parser::Token::Type::CloseCurly:
  139. return "delimiter"sv;
  140. case Web::CSS::Parser::Token::Type::CDO:
  141. case Web::CSS::Parser::Token::Type::CDC:
  142. return "comment"sv;
  143. case Web::CSS::Parser::Token::Type::EndOfFile:
  144. default:
  145. break;
  146. }
  147. return ""sv;
  148. };
  149. auto class_for_js_token = [](u64 token_type) {
  150. auto category = JS::Token::category(static_cast<JS::TokenType>(token_type));
  151. switch (category) {
  152. case JS::TokenCategory::Invalid:
  153. return "invalid"sv;
  154. case JS::TokenCategory::Number:
  155. return "number"sv;
  156. case JS::TokenCategory::String:
  157. return "string"sv;
  158. case JS::TokenCategory::Punctuation:
  159. return "punctuation"sv;
  160. case JS::TokenCategory::Operator:
  161. return "operator"sv;
  162. case JS::TokenCategory::Keyword:
  163. return "keyword"sv;
  164. case JS::TokenCategory::ControlKeyword:
  165. return "control-keyword"sv;
  166. case JS::TokenCategory::Identifier:
  167. return "identifier"sv;
  168. default:
  169. break;
  170. }
  171. return ""sv;
  172. };
  173. switch (m_highlighter->language()) {
  174. case Syntax::Language::CSS:
  175. return class_for_css_token(token_type);
  176. case Syntax::Language::JavaScript:
  177. return class_for_js_token(token_type);
  178. case Syntax::Language::HTML: {
  179. // HTML has nested CSS and JS highlighters, so we have to decode their token types.
  180. // HTML
  181. if (token_type < Web::HTML::SyntaxHighlighter::JS_TOKEN_START_VALUE) {
  182. switch (static_cast<Web::HTML::AugmentedTokenKind>(token_type)) {
  183. case Web::HTML::AugmentedTokenKind::AttributeName:
  184. return "attribute-name"sv;
  185. case Web::HTML::AugmentedTokenKind::AttributeValue:
  186. return "attribute-value"sv;
  187. case Web::HTML::AugmentedTokenKind::OpenTag:
  188. case Web::HTML::AugmentedTokenKind::CloseTag:
  189. return "tag"sv;
  190. case Web::HTML::AugmentedTokenKind::Comment:
  191. return "comment"sv;
  192. case Web::HTML::AugmentedTokenKind::Doctype:
  193. return "doctype"sv;
  194. case Web::HTML::AugmentedTokenKind::__Count:
  195. default:
  196. return ""sv;
  197. }
  198. }
  199. // JS
  200. if (token_type < Web::HTML::SyntaxHighlighter::CSS_TOKEN_START_VALUE) {
  201. return class_for_js_token(token_type - Web::HTML::SyntaxHighlighter::JS_TOKEN_START_VALUE);
  202. }
  203. // CSS
  204. return class_for_css_token(token_type - Web::HTML::SyntaxHighlighter::CSS_TOKEN_START_VALUE);
  205. }
  206. default:
  207. return "unknown"sv;
  208. }
  209. }
  210. String SourceHighlighterClient::to_html_string(URL::URL const& url, URL::URL const& base_url, HighlightOutputMode mode) const
  211. {
  212. StringBuilder builder;
  213. auto append_escaped = [&](Utf32View text) {
  214. for (auto code_point : text) {
  215. if (code_point == '&') {
  216. builder.append("&amp;"sv);
  217. } else if (code_point == 0xA0) {
  218. builder.append("&nbsp;"sv);
  219. } else if (code_point == '<') {
  220. builder.append("&lt;"sv);
  221. } else if (code_point == '>') {
  222. builder.append("&gt;"sv);
  223. } else {
  224. builder.append_code_point(code_point);
  225. }
  226. }
  227. };
  228. auto start_token = [&](u64 type) {
  229. builder.appendff("<span class=\"{}\">", class_for_token(type));
  230. };
  231. auto end_token = [&]() {
  232. builder.append("</span>"sv);
  233. };
  234. if (mode == HighlightOutputMode::FullDocument) {
  235. builder.append(R"~~~(
  236. <!DOCTYPE html>
  237. <html>
  238. <head>
  239. <meta name="color-scheme" content="dark light">)~~~"sv);
  240. builder.appendff("<title>View Source - {}</title>", escape_html_entities(url.serialize_for_display()));
  241. builder.appendff("<style type=\"text/css\">{}</style>", HTML_HIGHLIGHTER_STYLE);
  242. builder.append(R"~~~(
  243. </head>
  244. <body>)~~~"sv);
  245. }
  246. builder.append("<pre class=\"html\">"sv);
  247. static constexpr auto href = to_array<u32>({ 'h', 'r', 'e', 'f' });
  248. static constexpr auto src = to_array<u32>({ 's', 'r', 'c' });
  249. bool linkify_attribute = false;
  250. auto resolve_url_for_attribute = [&](Utf32View const& attribute_value) -> Optional<URL::URL> {
  251. if (!linkify_attribute)
  252. return {};
  253. auto attribute_url = MUST(String::formatted("{}", attribute_value));
  254. auto attribute_url_without_quotes = attribute_url.bytes_as_string_view().trim("\""sv);
  255. if (auto resolved = Web::DOMURL::parse(attribute_url_without_quotes, base_url); resolved.is_valid())
  256. return resolved;
  257. return {};
  258. };
  259. size_t span_index = 0;
  260. for (size_t line_index = 0; line_index < document().line_count(); ++line_index) {
  261. auto& line = document().line(line_index);
  262. auto line_view = line.view();
  263. builder.append("<div class=\"line\">"sv);
  264. size_t next_column = 0;
  265. auto draw_text_helper = [&](size_t start, size_t end, Optional<Syntax::TextDocumentSpan const&> span) {
  266. size_t length = end - start;
  267. if (length == 0)
  268. return;
  269. auto text = line_view.substring_view(start, length);
  270. if (span.has_value()) {
  271. bool append_anchor_close = false;
  272. if (span->data == to_underlying(Web::HTML::AugmentedTokenKind::AttributeName)) {
  273. linkify_attribute = text == Utf32View { href } || text == Utf32View { src };
  274. } else if (span->data == to_underlying(Web::HTML::AugmentedTokenKind::AttributeValue)) {
  275. if (auto href = resolve_url_for_attribute(text); href.has_value()) {
  276. builder.appendff("<a href=\"{}\">", *href);
  277. append_anchor_close = true;
  278. }
  279. }
  280. start_token(span->data);
  281. append_escaped(text);
  282. end_token();
  283. if (append_anchor_close)
  284. builder.append("</a>"sv);
  285. } else {
  286. append_escaped(text);
  287. }
  288. };
  289. while (span_index < document().spans().size()) {
  290. auto& span = document().spans()[span_index];
  291. if (span.range.start().line() > line_index) {
  292. // No more spans in this line, moving on
  293. break;
  294. }
  295. size_t span_start;
  296. if (span.range.start().line() < line_index) {
  297. span_start = 0;
  298. } else {
  299. span_start = span.range.start().column();
  300. }
  301. size_t span_end;
  302. bool span_consumed;
  303. if (span.range.end().line() > line_index) {
  304. span_end = line.length();
  305. span_consumed = false;
  306. } else {
  307. span_end = span.range.end().column();
  308. span_consumed = true;
  309. }
  310. if (span_start != next_column) {
  311. // Draw unspanned text between spans
  312. draw_text_helper(next_column, span_start, {});
  313. }
  314. draw_text_helper(span_start, span_end, span);
  315. next_column = span_end;
  316. if (!span_consumed) {
  317. // Continue with same span on next line
  318. break;
  319. } else {
  320. ++span_index;
  321. }
  322. }
  323. // Draw unspanned text after last span
  324. if (next_column < line.length()) {
  325. draw_text_helper(next_column, line.length(), {});
  326. }
  327. builder.append("</div>"sv);
  328. }
  329. builder.append("</pre>"sv);
  330. if (mode == HighlightOutputMode::FullDocument) {
  331. builder.append(R"~~~(
  332. </body>
  333. </html>
  334. )~~~"sv);
  335. }
  336. return builder.to_string_without_validation();
  337. }
  338. }