SyntaxHighlighter.cpp 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
  10. #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
  11. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  12. namespace Web::HTML {
  13. enum class AugmentedTokenKind : u32 {
  14. AttributeName,
  15. AttributeValue,
  16. OpenTag,
  17. CloseTag,
  18. Comment,
  19. Doctype,
  20. __Count,
  21. };
  22. bool SyntaxHighlighter::is_identifier(u64 token) const
  23. {
  24. if (!token)
  25. return false;
  26. return false;
  27. }
  28. bool SyntaxHighlighter::is_navigatable(u64) const
  29. {
  30. return false;
  31. }
  32. void SyntaxHighlighter::rehighlight(Palette const& palette)
  33. {
  34. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
  35. auto text = m_client->get_text();
  36. clear_nested_token_pairs();
  37. // FIXME: Add folding regions for start and end tags.
  38. Vector<GUI::TextDocumentFoldingRegion> folding_regions;
  39. Vector<GUI::TextDocumentSpan> spans;
  40. auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
  41. if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
  42. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
  43. return;
  44. }
  45. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
  46. spans.empend(
  47. GUI::TextRange {
  48. { start_line, start_column },
  49. { end_line, end_column },
  50. },
  51. move(attributes),
  52. static_cast<u64>(kind),
  53. false);
  54. };
  55. HTMLTokenizer tokenizer { text, "utf-8" };
  56. [[maybe_unused]] enum class State {
  57. HTML,
  58. Javascript,
  59. CSS,
  60. } state { State::HTML };
  61. StringBuilder substring_builder;
  62. GUI::TextPosition substring_start_position;
  63. for (;;) {
  64. auto token = tokenizer.next_token();
  65. if (!token.has_value() || token.value().is_end_of_file())
  66. break;
  67. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_deprecated_string());
  68. if (token->is_start_tag()) {
  69. if (token->tag_name() == "script"sv) {
  70. tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
  71. state = State::Javascript;
  72. substring_start_position = { token->end_position().line, token->end_position().column };
  73. } else if (token->tag_name() == "style"sv) {
  74. tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
  75. state = State::CSS;
  76. substring_start_position = { token->end_position().line, token->end_position().column };
  77. }
  78. } else if (token->is_end_tag()) {
  79. if (token->tag_name().is_one_of("script"sv, "style"sv)) {
  80. if (state == State::Javascript) {
  81. Syntax::ProxyHighlighterClient proxy_client {
  82. *m_client,
  83. substring_start_position,
  84. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  85. substring_builder.string_view()
  86. };
  87. {
  88. JS::SyntaxHighlighter highlighter;
  89. highlighter.attach(proxy_client);
  90. highlighter.rehighlight(palette);
  91. highlighter.detach();
  92. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  93. }
  94. spans.extend(proxy_client.corrected_spans());
  95. folding_regions.extend(proxy_client.corrected_folding_regions());
  96. substring_builder.clear();
  97. } else if (state == State::CSS) {
  98. Syntax::ProxyHighlighterClient proxy_client {
  99. *m_client,
  100. substring_start_position,
  101. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  102. substring_builder.string_view()
  103. };
  104. {
  105. CSS::SyntaxHighlighter highlighter;
  106. highlighter.attach(proxy_client);
  107. highlighter.rehighlight(palette);
  108. highlighter.detach();
  109. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  110. }
  111. spans.extend(proxy_client.corrected_spans());
  112. folding_regions.extend(proxy_client.corrected_folding_regions());
  113. substring_builder.clear();
  114. }
  115. state = State::HTML;
  116. }
  117. } else if (state != State::HTML) {
  118. VERIFY(token->is_character());
  119. substring_builder.append_code_point(token->code_point());
  120. continue;
  121. }
  122. size_t token_start_offset = token->is_end_tag() ? 1 : 0;
  123. if (token->is_comment()) {
  124. highlight(
  125. token->start_position().line,
  126. token->start_position().column,
  127. token->end_position().line,
  128. token->end_position().column,
  129. { palette.syntax_comment(), {} },
  130. AugmentedTokenKind::Comment);
  131. GUI::TextDocumentFoldingRegion region;
  132. region.range.set_start({ token->start_position().line, token->start_position().column + comment_prefix()->length() });
  133. region.range.set_end({ token->end_position().line, token->end_position().column - comment_suffix()->length() });
  134. folding_regions.append(move(region));
  135. } else if (token->is_start_tag() || token->is_end_tag()) {
  136. highlight(
  137. token->start_position().line,
  138. token->start_position().column + token_start_offset,
  139. token->start_position().line,
  140. token->start_position().column + token_start_offset + token->tag_name().length(),
  141. { palette.syntax_keyword(), {}, true },
  142. token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
  143. token->for_each_attribute([&](auto& attribute) {
  144. highlight(
  145. attribute.name_start_position.line,
  146. attribute.name_start_position.column + token_start_offset,
  147. attribute.name_end_position.line,
  148. attribute.name_end_position.column + token_start_offset,
  149. { palette.syntax_identifier(), {} },
  150. AugmentedTokenKind::AttributeName);
  151. highlight(
  152. attribute.value_start_position.line,
  153. attribute.value_start_position.column + token_start_offset,
  154. attribute.value_end_position.line,
  155. attribute.value_end_position.column + token_start_offset,
  156. { palette.syntax_string(), {} },
  157. AugmentedTokenKind::AttributeValue);
  158. return IterationDecision::Continue;
  159. });
  160. } else if (token->is_doctype()) {
  161. highlight(
  162. token->start_position().line,
  163. token->start_position().column,
  164. token->start_position().line,
  165. token->start_position().column,
  166. { palette.syntax_preprocessor_statement(), {} },
  167. AugmentedTokenKind::Doctype);
  168. }
  169. }
  170. if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
  171. dbgln("(HTML::SyntaxHighlighter) list of all spans:");
  172. for (auto& span : spans)
  173. dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
  174. dbgln("(HTML::SyntaxHighlighter) end of list");
  175. }
  176. m_client->do_set_spans(move(spans));
  177. m_client->do_set_folding_regions(move(folding_regions));
  178. m_has_brace_buddies = false;
  179. highlight_matching_token_pair();
  180. m_client->do_update();
  181. }
  182. Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
  183. {
  184. static Vector<MatchingTokenPair> pairs;
  185. if (pairs.is_empty()) {
  186. pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
  187. }
  188. return pairs;
  189. }
  190. bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
  191. {
  192. return token0 == token1;
  193. }
  194. }