SyntaxHighlighter.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /*
  2. * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
  10. #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
  11. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  12. namespace Web::HTML {
  13. enum class AugmentedTokenKind : u32 {
  14. AttributeName,
  15. AttributeValue,
  16. OpenTag,
  17. CloseTag,
  18. Comment,
  19. Doctype,
  20. __Count,
  21. };
  22. bool SyntaxHighlighter::is_identifier(u64 token) const
  23. {
  24. if (!token)
  25. return false;
  26. return false;
  27. }
  28. bool SyntaxHighlighter::is_navigatable(u64) const
  29. {
  30. return false;
  31. }
  32. void SyntaxHighlighter::rehighlight(Palette const& palette)
  33. {
  34. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
  35. auto text = m_client->get_text();
  36. clear_nested_token_pairs();
  37. Vector<GUI::TextDocumentSpan> spans;
  38. auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
  39. if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
  40. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
  41. return;
  42. }
  43. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
  44. spans.empend(
  45. GUI::TextRange {
  46. { start_line, start_column },
  47. { end_line, end_column },
  48. },
  49. move(attributes),
  50. static_cast<u64>(kind),
  51. false);
  52. };
  53. HTMLTokenizer tokenizer { text, "utf-8" };
  54. [[maybe_unused]] enum class State {
  55. HTML,
  56. Javascript,
  57. CSS,
  58. } state { State::HTML };
  59. StringBuilder substring_builder;
  60. GUI::TextPosition substring_start_position;
  61. for (;;) {
  62. auto token = tokenizer.next_token();
  63. if (!token.has_value() || token.value().is_end_of_file())
  64. break;
  65. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_string());
  66. if (token->is_start_tag()) {
  67. if (token->tag_name() == "script"sv) {
  68. tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
  69. state = State::Javascript;
  70. substring_start_position = { token->end_position().line, token->end_position().column };
  71. } else if (token->tag_name() == "style"sv) {
  72. tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
  73. state = State::CSS;
  74. substring_start_position = { token->end_position().line, token->end_position().column };
  75. }
  76. } else if (token->is_end_tag()) {
  77. if (token->tag_name().is_one_of("script"sv, "style"sv)) {
  78. if (state == State::Javascript) {
  79. Syntax::ProxyHighlighterClient proxy_client {
  80. *m_client,
  81. substring_start_position,
  82. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  83. substring_builder.string_view()
  84. };
  85. {
  86. JS::SyntaxHighlighter highlighter;
  87. highlighter.attach(proxy_client);
  88. highlighter.rehighlight(palette);
  89. highlighter.detach();
  90. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  91. }
  92. spans.extend(proxy_client.corrected_spans());
  93. substring_builder.clear();
  94. } else if (state == State::CSS) {
  95. Syntax::ProxyHighlighterClient proxy_client {
  96. *m_client,
  97. substring_start_position,
  98. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  99. substring_builder.string_view()
  100. };
  101. {
  102. CSS::SyntaxHighlighter highlighter;
  103. highlighter.attach(proxy_client);
  104. highlighter.rehighlight(palette);
  105. highlighter.detach();
  106. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  107. }
  108. spans.extend(proxy_client.corrected_spans());
  109. substring_builder.clear();
  110. }
  111. state = State::HTML;
  112. }
  113. } else if (state != State::HTML) {
  114. VERIFY(token->is_character());
  115. substring_builder.append_code_point(token->code_point());
  116. continue;
  117. }
  118. size_t token_start_offset = token->is_end_tag() ? 1 : 0;
  119. if (token->is_comment()) {
  120. highlight(
  121. token->start_position().line,
  122. token->start_position().column,
  123. token->end_position().line,
  124. token->end_position().column,
  125. { palette.syntax_comment(), {} },
  126. AugmentedTokenKind::Comment);
  127. } else if (token->is_start_tag() || token->is_end_tag()) {
  128. highlight(
  129. token->start_position().line,
  130. token->start_position().column + token_start_offset,
  131. token->start_position().line,
  132. token->start_position().column + token_start_offset + token->tag_name().length(),
  133. { palette.syntax_keyword(), {}, false, true },
  134. token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
  135. token->for_each_attribute([&](auto& attribute) {
  136. highlight(
  137. attribute.name_start_position.line,
  138. attribute.name_start_position.column + token_start_offset,
  139. attribute.name_end_position.line,
  140. attribute.name_end_position.column + token_start_offset,
  141. { palette.syntax_identifier(), {} },
  142. AugmentedTokenKind::AttributeName);
  143. highlight(
  144. attribute.value_start_position.line,
  145. attribute.value_start_position.column + token_start_offset,
  146. attribute.value_end_position.line,
  147. attribute.value_end_position.column + token_start_offset,
  148. { palette.syntax_string(), {} },
  149. AugmentedTokenKind::AttributeValue);
  150. return IterationDecision::Continue;
  151. });
  152. } else if (token->is_doctype()) {
  153. highlight(
  154. token->start_position().line,
  155. token->start_position().column,
  156. token->start_position().line,
  157. token->start_position().column,
  158. { palette.syntax_preprocessor_statement(), {} },
  159. AugmentedTokenKind::Doctype);
  160. }
  161. }
  162. if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
  163. dbgln("(HTML::SyntaxHighlighter) list of all spans:");
  164. for (auto& span : spans)
  165. dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
  166. dbgln("(HTML::SyntaxHighlighter) end of list");
  167. }
  168. m_client->do_set_spans(move(spans));
  169. m_has_brace_buddies = false;
  170. highlight_matching_token_pair();
  171. m_client->do_update();
  172. }
  173. Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
  174. {
  175. static Vector<MatchingTokenPair> pairs;
  176. if (pairs.is_empty()) {
  177. pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
  178. }
  179. return pairs;
  180. }
  181. bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
  182. {
  183. return token0 == token1;
  184. }
  185. }