SyntaxHighlighter.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. /*
  2. * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibJS/Token.h>
  10. #include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
  11. #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
  12. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  13. namespace Web::HTML {
  14. bool SyntaxHighlighter::is_identifier(u64 token) const
  15. {
  16. if (!token)
  17. return false;
  18. return false;
  19. }
  20. bool SyntaxHighlighter::is_navigatable(u64) const
  21. {
  22. return false;
  23. }
  24. void SyntaxHighlighter::rehighlight(Palette const& palette)
  25. {
  26. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
  27. auto text = m_client->get_text();
  28. clear_nested_token_pairs();
  29. // FIXME: Add folding regions for start and end tags.
  30. Vector<Syntax::TextDocumentFoldingRegion> folding_regions;
  31. Vector<Syntax::TextDocumentSpan> spans;
  32. auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
  33. if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
  34. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
  35. return;
  36. }
  37. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
  38. spans.empend(
  39. Syntax::TextRange {
  40. { start_line, start_column },
  41. { end_line, end_column },
  42. },
  43. move(attributes),
  44. static_cast<u64>(kind),
  45. false);
  46. };
  47. HTMLTokenizer tokenizer { text, "utf-8" };
  48. [[maybe_unused]] enum class State {
  49. HTML,
  50. Javascript,
  51. CSS,
  52. } state { State::HTML };
  53. StringBuilder substring_builder;
  54. Syntax::TextPosition substring_start_position;
  55. for (;;) {
  56. auto token = tokenizer.next_token();
  57. if (!token.has_value() || token.value().is_end_of_file())
  58. break;
  59. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_string());
  60. if (token->is_start_tag()) {
  61. if (token->tag_name() == "script"sv) {
  62. tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
  63. state = State::Javascript;
  64. substring_start_position = { token->end_position().line, token->end_position().column };
  65. } else if (token->tag_name() == "style"sv) {
  66. tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
  67. state = State::CSS;
  68. substring_start_position = { token->end_position().line, token->end_position().column };
  69. }
  70. } else if (token->is_end_tag()) {
  71. if (token->tag_name().is_one_of("script"sv, "style"sv)) {
  72. if (state == State::Javascript) {
  73. VERIFY(static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value() < JS_TOKEN_START_VALUE);
  74. Syntax::ProxyHighlighterClient proxy_client {
  75. *m_client,
  76. substring_start_position,
  77. JS_TOKEN_START_VALUE,
  78. substring_builder.string_view()
  79. };
  80. {
  81. JS::SyntaxHighlighter highlighter;
  82. highlighter.attach(proxy_client);
  83. highlighter.rehighlight(palette);
  84. highlighter.detach();
  85. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  86. }
  87. spans.extend(proxy_client.corrected_spans());
  88. folding_regions.extend(proxy_client.corrected_folding_regions());
  89. substring_builder.clear();
  90. } else if (state == State::CSS) {
  91. VERIFY(static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value() + static_cast<u64>(JS::TokenType::_COUNT_OF_TOKENS) < CSS_TOKEN_START_VALUE);
  92. Syntax::ProxyHighlighterClient proxy_client {
  93. *m_client,
  94. substring_start_position,
  95. CSS_TOKEN_START_VALUE,
  96. substring_builder.string_view()
  97. };
  98. {
  99. CSS::SyntaxHighlighter highlighter;
  100. highlighter.attach(proxy_client);
  101. highlighter.rehighlight(palette);
  102. highlighter.detach();
  103. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  104. }
  105. spans.extend(proxy_client.corrected_spans());
  106. folding_regions.extend(proxy_client.corrected_folding_regions());
  107. substring_builder.clear();
  108. }
  109. state = State::HTML;
  110. }
  111. } else if (state != State::HTML) {
  112. VERIFY(token->is_character());
  113. substring_builder.append_code_point(token->code_point());
  114. continue;
  115. }
  116. if (token->is_comment()) {
  117. highlight(
  118. token->start_position().line,
  119. token->start_position().column,
  120. token->end_position().line,
  121. token->end_position().column,
  122. { palette.syntax_comment(), {} },
  123. AugmentedTokenKind::Comment);
  124. Syntax::TextDocumentFoldingRegion region;
  125. region.range.set_start({ token->start_position().line, token->start_position().column + comment_prefix()->length() });
  126. region.range.set_end({ token->end_position().line, token->end_position().column - comment_suffix()->length() });
  127. folding_regions.append(move(region));
  128. } else if (token->is_start_tag() || token->is_end_tag()) {
  129. highlight(
  130. token->start_position().line,
  131. token->start_position().column,
  132. token->start_position().line,
  133. token->start_position().column + token->tag_name().bytes().size(),
  134. { palette.syntax_keyword(), {}, true },
  135. token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
  136. token->for_each_attribute([&](auto& attribute) {
  137. highlight(
  138. attribute.name_start_position.line,
  139. attribute.name_start_position.column,
  140. attribute.name_end_position.line,
  141. attribute.name_end_position.column,
  142. { palette.syntax_identifier(), {} },
  143. AugmentedTokenKind::AttributeName);
  144. highlight(
  145. attribute.value_start_position.line,
  146. attribute.value_start_position.column,
  147. attribute.value_end_position.line,
  148. attribute.value_end_position.column,
  149. { palette.syntax_string(), {} },
  150. AugmentedTokenKind::AttributeValue);
  151. return IterationDecision::Continue;
  152. });
  153. } else if (token->is_doctype()) {
  154. highlight(
  155. token->start_position().line,
  156. token->start_position().column,
  157. token->start_position().line,
  158. token->start_position().column,
  159. { palette.syntax_preprocessor_statement(), {} },
  160. AugmentedTokenKind::Doctype);
  161. }
  162. }
  163. if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
  164. dbgln("(HTML::SyntaxHighlighter) list of all spans:");
  165. for (auto& span : spans)
  166. dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
  167. dbgln("(HTML::SyntaxHighlighter) end of list");
  168. }
  169. m_client->do_set_spans(move(spans));
  170. m_client->do_set_folding_regions(move(folding_regions));
  171. m_has_brace_buddies = false;
  172. highlight_matching_token_pair();
  173. m_client->do_update();
  174. }
  175. Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
  176. {
  177. static Vector<MatchingTokenPair> pairs;
  178. if (pairs.is_empty()) {
  179. pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
  180. }
  181. return pairs;
  182. }
  183. bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
  184. {
  185. return token0 == token1;
  186. }
  187. }