SyntaxHighlighter.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
  3. * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <LibJS/SyntaxHighlighter.h>
  9. #include <LibWeb/HTML/Parser/HTMLTokenizer.h>
  10. #include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
  11. namespace Web::HTML {
  12. enum class AugmentedTokenKind : u32 {
  13. AttributeName,
  14. AttributeValue,
  15. OpenTag,
  16. CloseTag,
  17. Comment,
  18. Doctype,
  19. __Count,
  20. };
  21. bool SyntaxHighlighter::is_identifier(u64 token) const
  22. {
  23. if (!token)
  24. return false;
  25. return false;
  26. }
  27. bool SyntaxHighlighter::is_navigatable(u64) const
  28. {
  29. return false;
  30. }
  31. void SyntaxHighlighter::rehighlight(Palette const& palette)
  32. {
  33. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
  34. auto text = m_client->get_text();
  35. clear_nested_token_pairs();
  36. Vector<GUI::TextDocumentSpan> spans;
  37. auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
  38. if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
  39. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
  40. return;
  41. }
  42. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
  43. spans.empend(
  44. GUI::TextRange {
  45. { start_line, start_column },
  46. { end_line, end_column },
  47. },
  48. move(attributes),
  49. static_cast<u64>(kind),
  50. false);
  51. };
  52. HTMLTokenizer tokenizer { text, "utf-8" };
  53. [[maybe_unused]] enum class State {
  54. HTML,
  55. Javascript,
  56. CSS,
  57. } state { State::HTML };
  58. StringBuilder substring_builder;
  59. GUI::TextPosition substring_start_position;
  60. for (;;) {
  61. auto token = tokenizer.next_token();
  62. if (!token.has_value() || token.value().is_end_of_file())
  63. break;
  64. dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_string());
  65. if (token->is_start_tag()) {
  66. if (token->tag_name() == "script"sv) {
  67. tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
  68. state = State::Javascript;
  69. substring_start_position = { token->end_position().line, token->end_position().column };
  70. } else if (token->tag_name() == "style"sv) {
  71. tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
  72. state = State::CSS;
  73. substring_start_position = { token->end_position().line, token->end_position().column };
  74. }
  75. } else if (token->is_end_tag()) {
  76. if (token->tag_name().is_one_of("script"sv, "style"sv)) {
  77. if (state == State::Javascript) {
  78. Syntax::ProxyHighlighterClient proxy_client {
  79. *m_client,
  80. substring_start_position,
  81. static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
  82. substring_builder.string_view()
  83. };
  84. {
  85. JS::SyntaxHighlighter highlighter;
  86. highlighter.attach(proxy_client);
  87. highlighter.rehighlight(palette);
  88. highlighter.detach();
  89. register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
  90. }
  91. spans.extend(proxy_client.corrected_spans());
  92. substring_builder.clear();
  93. } else if (state == State::CSS) {
  94. // FIXME: Highlight CSS code here instead.
  95. substring_builder.clear();
  96. }
  97. state = State::HTML;
  98. }
  99. } else if (state != State::HTML) {
  100. VERIFY(token->is_character());
  101. substring_builder.append_code_point(token->code_point());
  102. continue;
  103. }
  104. size_t token_start_offset = token->is_end_tag() ? 1 : 0;
  105. if (token->is_comment()) {
  106. highlight(
  107. token->start_position().line,
  108. token->start_position().column,
  109. token->end_position().line,
  110. token->end_position().column,
  111. { palette.syntax_comment(), {} },
  112. AugmentedTokenKind::Comment);
  113. } else if (token->is_start_tag() || token->is_end_tag()) {
  114. highlight(
  115. token->start_position().line,
  116. token->start_position().column + token_start_offset,
  117. token->start_position().line,
  118. token->start_position().column + token_start_offset + token->tag_name().length(),
  119. { palette.syntax_keyword(), {}, false, true },
  120. token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
  121. token->for_each_attribute([&](auto& attribute) {
  122. highlight(
  123. attribute.name_start_position.line,
  124. attribute.name_start_position.column + token_start_offset,
  125. attribute.name_end_position.line,
  126. attribute.name_end_position.column + token_start_offset,
  127. { palette.syntax_identifier(), {} },
  128. AugmentedTokenKind::AttributeName);
  129. highlight(
  130. attribute.value_start_position.line,
  131. attribute.value_start_position.column + token_start_offset,
  132. attribute.value_end_position.line,
  133. attribute.value_end_position.column + token_start_offset,
  134. { palette.syntax_string(), {} },
  135. AugmentedTokenKind::AttributeValue);
  136. return IterationDecision::Continue;
  137. });
  138. } else if (token->is_doctype()) {
  139. highlight(
  140. token->start_position().line,
  141. token->start_position().column,
  142. token->start_position().line,
  143. token->start_position().column,
  144. { palette.syntax_preprocessor_statement(), {} },
  145. AugmentedTokenKind::Doctype);
  146. }
  147. }
  148. if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
  149. dbgln("(HTML::SyntaxHighlighter) list of all spans:");
  150. for (auto& span : spans)
  151. dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
  152. dbgln("(HTML::SyntaxHighlighter) end of list");
  153. }
  154. m_client->do_set_spans(move(spans));
  155. m_has_brace_buddies = false;
  156. highlight_matching_token_pair();
  157. m_client->do_update();
  158. }
  159. Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
  160. {
  161. static Vector<MatchingTokenPair> pairs;
  162. if (pairs.is_empty()) {
  163. pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
  164. }
  165. return pairs;
  166. }
  167. bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
  168. {
  169. return token0 == token1;
  170. }
  171. }