SyntaxHighlighter.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /*
  2. * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
  10. #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
  11. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  12. namespace Web::HTML {
  13. enum class AugmentedTokenKind : u32 {
  14. AttributeName,
  15. AttributeValue,
  16. OpenTag,
  17. CloseTag,
  18. Comment,
  19. Doctype,
  20. __Count,
  21. };
  22. bool SyntaxHighlighter::is_identifier(u64 token) const
  23. {
  24. if (!token)
  25. return false;
  26. return false;
  27. }
  28. bool SyntaxHighlighter::is_navigatable(u64) const
  29. {
  30. return false;
  31. }
  32. void SyntaxHighlighter::rehighlight(Palette const& palette)
  33. {
  34. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
  35. auto text = m_client->get_text();
  36. clear_nested_token_pairs();
  37. // FIXME: Add folding regions for start and end tags.
  38. Vector<GUI::TextDocumentFoldingRegion> folding_regions;
  39. Vector<GUI::TextDocumentSpan> spans;
  40. auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
  41. if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
  42. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
  43. return;
  44. }
  45. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
  46. spans.empend(
  47. GUI::TextRange {
  48. { start_line, start_column },
  49. { end_line, end_column },
  50. },
  51. move(attributes),
  52. static_cast<u64>(kind),
  53. false);
  54. };
  55. HTMLTokenizer tokenizer { text, "utf-8" };
  56. [[maybe_unused]] enum class State {
  57. HTML,
  58. Javascript,
  59. CSS,
  60. } state { State::HTML };
  61. StringBuilder substring_builder;
  62. GUI::TextPosition substring_start_position;
  63. for (;;) {
  64. auto token = tokenizer.next_token();
  65. if (!token.has_value() || token.value().is_end_of_file())
  66. break;
  67. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_deprecated_string());
  68. if (token->is_start_tag()) {
  69. if (token->tag_name() == "script"sv) {
  70. tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
  71. state = State::Javascript;
  72. substring_start_position = { token->end_position().line, token->end_position().column };
  73. } else if (token->tag_name() == "style"sv) {
  74. tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
  75. state = State::CSS;
  76. substring_start_position = { token->end_position().line, token->end_position().column };
  77. }
  78. } else if (token->is_end_tag()) {
  79. if (token->tag_name().is_one_of("script"sv, "style"sv)) {
  80. if (state == State::Javascript) {
  81. Syntax::ProxyHighlighterClient proxy_client {
  82. *m_client,
  83. substring_start_position,
  84. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  85. substring_builder.string_view()
  86. };
  87. {
  88. JS::SyntaxHighlighter highlighter;
  89. highlighter.attach(proxy_client);
  90. highlighter.rehighlight(palette);
  91. highlighter.detach();
  92. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  93. }
  94. spans.extend(proxy_client.corrected_spans());
  95. folding_regions.extend(proxy_client.corrected_folding_regions());
  96. substring_builder.clear();
  97. } else if (state == State::CSS) {
  98. Syntax::ProxyHighlighterClient proxy_client {
  99. *m_client,
  100. substring_start_position,
  101. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  102. substring_builder.string_view()
  103. };
  104. {
  105. CSS::SyntaxHighlighter highlighter;
  106. highlighter.attach(proxy_client);
  107. highlighter.rehighlight(palette);
  108. highlighter.detach();
  109. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  110. }
  111. spans.extend(proxy_client.corrected_spans());
  112. folding_regions.extend(proxy_client.corrected_folding_regions());
  113. substring_builder.clear();
  114. }
  115. state = State::HTML;
  116. }
  117. } else if (state != State::HTML) {
  118. VERIFY(token->is_character());
  119. substring_builder.append_code_point(token->code_point());
  120. continue;
  121. }
  122. if (token->is_comment()) {
  123. highlight(
  124. token->start_position().line,
  125. token->start_position().column,
  126. token->end_position().line,
  127. token->end_position().column,
  128. { palette.syntax_comment(), {} },
  129. AugmentedTokenKind::Comment);
  130. GUI::TextDocumentFoldingRegion region;
  131. region.range.set_start({ token->start_position().line, token->start_position().column + comment_prefix()->length() });
  132. region.range.set_end({ token->end_position().line, token->end_position().column - comment_suffix()->length() });
  133. folding_regions.append(move(region));
  134. } else if (token->is_start_tag() || token->is_end_tag()) {
  135. highlight(
  136. token->start_position().line,
  137. token->start_position().column,
  138. token->start_position().line,
  139. token->start_position().column + token->tag_name().length(),
  140. { palette.syntax_keyword(), {}, true },
  141. token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
  142. token->for_each_attribute([&](auto& attribute) {
  143. highlight(
  144. attribute.name_start_position.line,
  145. attribute.name_start_position.column,
  146. attribute.name_end_position.line,
  147. attribute.name_end_position.column,
  148. { palette.syntax_identifier(), {} },
  149. AugmentedTokenKind::AttributeName);
  150. highlight(
  151. attribute.value_start_position.line,
  152. attribute.value_start_position.column,
  153. attribute.value_end_position.line,
  154. attribute.value_end_position.column,
  155. { palette.syntax_string(), {} },
  156. AugmentedTokenKind::AttributeValue);
  157. return IterationDecision::Continue;
  158. });
  159. } else if (token->is_doctype()) {
  160. highlight(
  161. token->start_position().line,
  162. token->start_position().column,
  163. token->start_position().line,
  164. token->start_position().column,
  165. { palette.syntax_preprocessor_statement(), {} },
  166. AugmentedTokenKind::Doctype);
  167. }
  168. }
  169. if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
  170. dbgln("(HTML::SyntaxHighlighter) list of all spans:");
  171. for (auto& span : spans)
  172. dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
  173. dbgln("(HTML::SyntaxHighlighter) end of list");
  174. }
  175. m_client->do_set_spans(move(spans));
  176. m_client->do_set_folding_regions(move(folding_regions));
  177. m_has_brace_buddies = false;
  178. highlight_matching_token_pair();
  179. m_client->do_update();
  180. }
  181. Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
  182. {
  183. static Vector<MatchingTokenPair> pairs;
  184. if (pairs.is_empty()) {
  185. pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
  186. }
  187. return pairs;
  188. }
  189. bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
  190. {
  191. return token0 == token1;
  192. }
  193. }