Text.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibMarkdown/Text.h>
  10. #include <string.h>
  11. namespace Markdown {
  12. static String unescape(const StringView& text)
  13. {
  14. StringBuilder builder;
  15. for (size_t i = 0; i < text.length(); ++i) {
  16. if (text[i] == '\\' && i != text.length() - 1) {
  17. builder.append(text[i + 1]);
  18. i++;
  19. continue;
  20. }
  21. builder.append(text[i]);
  22. }
  23. return builder.build();
  24. }
  25. Text::Text(String&& text)
  26. {
  27. m_spans.append({ move(text), Style {} });
  28. }
  29. String Text::render_to_html() const
  30. {
  31. StringBuilder builder;
  32. Vector<String> open_tags;
  33. Style current_style;
  34. for (auto& span : m_spans) {
  35. struct TagAndFlag {
  36. String tag;
  37. bool Style::*flag;
  38. };
  39. TagAndFlag tags_and_flags[] = {
  40. { "i", &Style::emph },
  41. { "b", &Style::strong },
  42. { "code", &Style::code }
  43. };
  44. auto it = open_tags.find_if([&](const String& open_tag) {
  45. if (open_tag == "a" && current_style.href != span.style.href)
  46. return true;
  47. if (open_tag == "img" && current_style.img != span.style.img)
  48. return true;
  49. for (auto& tag_and_flag : tags_and_flags) {
  50. if (open_tag == tag_and_flag.tag && !(span.style.*tag_and_flag.flag))
  51. return true;
  52. }
  53. return false;
  54. });
  55. if (!it.is_end()) {
  56. // We found an open tag that should
  57. // not be open for the new span. Close
  58. // it and all the open tags that follow
  59. // it.
  60. for (ssize_t j = open_tags.size() - 1; j >= static_cast<ssize_t>(it.index()); --j) {
  61. auto& tag = open_tags[j];
  62. if (tag == "img") {
  63. builder.append("\" />");
  64. current_style.img = {};
  65. continue;
  66. }
  67. builder.appendff("</{}>", tag);
  68. if (tag == "a") {
  69. current_style.href = {};
  70. continue;
  71. }
  72. for (auto& tag_and_flag : tags_and_flags)
  73. if (tag == tag_and_flag.tag)
  74. current_style.*tag_and_flag.flag = false;
  75. }
  76. open_tags.shrink(it.index());
  77. }
  78. if (current_style.href.is_null() && !span.style.href.is_null()) {
  79. open_tags.append("a");
  80. builder.appendff("<a href=\"{}\">", span.style.href);
  81. }
  82. if (current_style.img.is_null() && !span.style.img.is_null()) {
  83. open_tags.append("img");
  84. builder.appendff("<img src=\"{}\" alt=\"", span.style.img);
  85. }
  86. for (auto& tag_and_flag : tags_and_flags) {
  87. if (current_style.*tag_and_flag.flag != span.style.*tag_and_flag.flag) {
  88. open_tags.append(tag_and_flag.tag);
  89. builder.appendff("<{}>", tag_and_flag.tag);
  90. }
  91. }
  92. current_style = span.style;
  93. builder.append(escape_html_entities(span.text));
  94. }
  95. for (ssize_t i = open_tags.size() - 1; i >= 0; --i) {
  96. auto& tag = open_tags[i];
  97. if (tag == "img") {
  98. builder.append("\" />");
  99. continue;
  100. }
  101. builder.appendff("</{}>", tag);
  102. }
  103. return builder.build();
  104. }
  105. String Text::render_for_terminal() const
  106. {
  107. StringBuilder builder;
  108. for (auto& span : m_spans) {
  109. bool needs_styling = span.style.strong || span.style.emph || span.style.code;
  110. if (needs_styling) {
  111. builder.append("\033[");
  112. bool first = true;
  113. if (span.style.strong || span.style.code) {
  114. builder.append('1');
  115. first = false;
  116. }
  117. if (span.style.emph) {
  118. if (!first)
  119. builder.append(';');
  120. builder.append('4');
  121. }
  122. builder.append('m');
  123. }
  124. if (!span.style.href.is_null()) {
  125. if (strstr(span.style.href.characters(), "://") != nullptr) {
  126. builder.append("\033]8;;");
  127. builder.append(span.style.href);
  128. builder.append("\033\\");
  129. }
  130. }
  131. builder.append(span.text.characters());
  132. if (needs_styling)
  133. builder.append("\033[0m");
  134. if (!span.style.href.is_null()) {
  135. // When rendering for the terminal, ignore any
  136. // non-absolute links, because the user has no
  137. // chance to follow them anyway.
  138. if (strstr(span.style.href.characters(), "://") != nullptr) {
  139. builder.appendff(" <{}>", span.style.href);
  140. builder.append("\033]8;;\033\\");
  141. }
  142. }
  143. if (!span.style.img.is_null()) {
  144. if (strstr(span.style.img.characters(), "://") != nullptr) {
  145. builder.appendff(" <{}>", span.style.img);
  146. }
  147. }
  148. }
  149. return builder.build();
  150. }
  151. Optional<Text> Text::parse(const StringView& str)
  152. {
  153. Style current_style;
  154. size_t current_span_start = 0;
  155. int first_span_in_the_current_link = -1;
  156. bool current_link_is_actually_img = false;
  157. Vector<Span> spans;
  158. auto append_span_if_needed = [&](size_t offset) {
  159. VERIFY(current_span_start <= offset);
  160. if (current_span_start != offset) {
  161. Span span {
  162. unescape(str.substring_view(current_span_start, offset - current_span_start)),
  163. current_style
  164. };
  165. spans.append(move(span));
  166. current_span_start = offset;
  167. }
  168. };
  169. for (size_t offset = 0; offset < str.length(); offset++) {
  170. char ch = str[offset];
  171. bool is_escape = ch == '\\';
  172. if (is_escape && offset != str.length() - 1) {
  173. offset++;
  174. continue;
  175. }
  176. bool is_special_character = false;
  177. is_special_character |= ch == '`';
  178. if (!current_style.code)
  179. is_special_character |= ch == '*' || ch == '_' || ch == '[' || ch == ']' || (ch == '!' && offset + 1 < str.length() && str[offset + 1] == '[');
  180. if (!is_special_character)
  181. continue;
  182. append_span_if_needed(offset);
  183. switch (ch) {
  184. case '`':
  185. current_style.code = !current_style.code;
  186. break;
  187. case '*':
  188. case '_':
  189. if (offset + 1 < str.length() && str[offset + 1] == ch) {
  190. offset++;
  191. current_style.strong = !current_style.strong;
  192. } else {
  193. current_style.emph = !current_style.emph;
  194. }
  195. break;
  196. case '!':
  197. current_link_is_actually_img = true;
  198. break;
  199. case '[':
  200. if constexpr (MARKDOWN_DEBUG) {
  201. if (first_span_in_the_current_link != -1)
  202. dbgln("Dropping the outer link");
  203. }
  204. first_span_in_the_current_link = spans.size();
  205. break;
  206. case ']': {
  207. if (first_span_in_the_current_link == -1) {
  208. dbgln_if(MARKDOWN_DEBUG, "Unmatched ]");
  209. continue;
  210. }
  211. ScopeGuard guard = [&] {
  212. first_span_in_the_current_link = -1;
  213. current_link_is_actually_img = false;
  214. };
  215. if (offset + 2 >= str.length() || str[offset + 1] != '(')
  216. continue;
  217. offset += 2;
  218. size_t start_of_href = offset;
  219. do
  220. offset++;
  221. while (offset < str.length() && str[offset] != ')');
  222. if (offset == str.length())
  223. offset--;
  224. const StringView href = str.substring_view(start_of_href, offset - start_of_href);
  225. for (size_t i = first_span_in_the_current_link; i < spans.size(); i++) {
  226. if (current_link_is_actually_img)
  227. spans[i].style.img = href;
  228. else
  229. spans[i].style.href = href;
  230. }
  231. break;
  232. }
  233. default:
  234. VERIFY_NOT_REACHED();
  235. }
  236. // We've processed the character as a special, so the next offset will
  237. // start after it. Note that explicit continue statements skip over this
  238. // line, effectively treating the character as not special.
  239. current_span_start = offset + 1;
  240. }
  241. append_span_if_needed(str.length());
  242. return Text(move(spans));
  243. }
  244. }