Text.cpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/ScopeGuard.h>
  27. #include <AK/StringBuilder.h>
  28. #include <LibMarkdown/Text.h>
  29. #include <string.h>
  30. //#define DEBUG_MARKDOWN
  31. namespace Markdown {
  32. static String unescape(const StringView& text)
  33. {
  34. StringBuilder builder;
  35. for (size_t i = 0; i < text.length(); ++i) {
  36. if (text[i] == '\\' && i != text.length() - 1) {
  37. builder.append(text[i + 1]);
  38. i++;
  39. continue;
  40. }
  41. builder.append(text[i]);
  42. }
  43. return builder.build();
  44. }
  45. Text::Text(String&& text)
  46. {
  47. m_spans.append({ move(text), Style {} });
  48. }
  49. String Text::render_to_html() const
  50. {
  51. StringBuilder builder;
  52. Vector<String> open_tags;
  53. Style current_style;
  54. for (auto& span : m_spans) {
  55. struct TagAndFlag {
  56. String tag;
  57. bool Style::*flag;
  58. };
  59. TagAndFlag tags_and_flags[] = {
  60. { "i", &Style::emph },
  61. { "b", &Style::strong },
  62. { "code", &Style::code }
  63. };
  64. auto it = open_tags.find_if([&](const String& open_tag) {
  65. if (open_tag == "a" && current_style.href != span.style.href)
  66. return true;
  67. if (open_tag == "img" && current_style.img != span.style.img)
  68. return true;
  69. for (auto& tag_and_flag : tags_and_flags) {
  70. if (open_tag == tag_and_flag.tag && !(span.style.*tag_and_flag.flag))
  71. return true;
  72. }
  73. return false;
  74. });
  75. if (!it.is_end()) {
  76. // We found an open tag that should
  77. // not be open for the new span. Close
  78. // it and all the open tags that follow
  79. // it.
  80. for (ssize_t j = open_tags.size() - 1; j >= static_cast<ssize_t>(it.index()); --j) {
  81. auto& tag = open_tags[j];
  82. if (tag == "img") {
  83. builder.append("\" />");
  84. current_style.img = {};
  85. continue;
  86. }
  87. builder.appendf("</%s>", tag.characters());
  88. if (tag == "a") {
  89. current_style.href = {};
  90. continue;
  91. }
  92. for (auto& tag_and_flag : tags_and_flags)
  93. if (tag == tag_and_flag.tag)
  94. current_style.*tag_and_flag.flag = false;
  95. }
  96. open_tags.shrink(it.index());
  97. }
  98. if (current_style.href.is_null() && !span.style.href.is_null()) {
  99. open_tags.append("a");
  100. builder.appendf("<a href=\"%s\">", span.style.href.characters());
  101. }
  102. if (current_style.img.is_null() && !span.style.img.is_null()) {
  103. open_tags.append("img");
  104. builder.appendf("<img src=\"%s\" alt=\"", span.style.img.characters());
  105. }
  106. for (auto& tag_and_flag : tags_and_flags) {
  107. if (current_style.*tag_and_flag.flag != span.style.*tag_and_flag.flag) {
  108. open_tags.append(tag_and_flag.tag);
  109. builder.appendf("<%s>", tag_and_flag.tag.characters());
  110. }
  111. }
  112. current_style = span.style;
  113. builder.append(escape_html_entities(span.text));
  114. }
  115. for (ssize_t i = open_tags.size() - 1; i >= 0; --i) {
  116. auto& tag = open_tags[i];
  117. if (tag == "img") {
  118. builder.append("\" />");
  119. continue;
  120. }
  121. builder.appendf("</%s>", tag.characters());
  122. }
  123. return builder.build();
  124. }
  125. String Text::render_for_terminal() const
  126. {
  127. StringBuilder builder;
  128. for (auto& span : m_spans) {
  129. bool needs_styling = span.style.strong || span.style.emph || span.style.code;
  130. if (needs_styling) {
  131. builder.append("\033[");
  132. bool first = true;
  133. if (span.style.strong || span.style.code) {
  134. builder.append('1');
  135. first = false;
  136. }
  137. if (span.style.emph) {
  138. if (!first)
  139. builder.append(';');
  140. builder.append('4');
  141. }
  142. builder.append('m');
  143. }
  144. if (!span.style.href.is_null()) {
  145. if (strstr(span.style.href.characters(), "://") != nullptr) {
  146. builder.append("\033]8;;");
  147. builder.append(span.style.href);
  148. builder.append("\033\\");
  149. }
  150. }
  151. builder.append(span.text.characters());
  152. if (needs_styling)
  153. builder.append("\033[0m");
  154. if (!span.style.href.is_null()) {
  155. // When rendering for the terminal, ignore any
  156. // non-absolute links, because the user has no
  157. // chance to follow them anyway.
  158. if (strstr(span.style.href.characters(), "://") != nullptr) {
  159. builder.appendf(" <%s>", span.style.href.characters());
  160. builder.append("\033]8;;\033\\");
  161. }
  162. }
  163. if (!span.style.img.is_null()) {
  164. if (strstr(span.style.img.characters(), "://") != nullptr) {
  165. builder.appendf(" <%s>", span.style.img.characters());
  166. }
  167. }
  168. }
  169. return builder.build();
  170. }
  171. Optional<Text> Text::parse(const StringView& str)
  172. {
  173. Style current_style;
  174. size_t current_span_start = 0;
  175. int first_span_in_the_current_link = -1;
  176. bool current_link_is_actually_img = false;
  177. Vector<Span> spans;
  178. auto append_span_if_needed = [&](size_t offset) {
  179. ASSERT(current_span_start <= offset);
  180. if (current_span_start != offset) {
  181. Span span {
  182. unescape(str.substring_view(current_span_start, offset - current_span_start)),
  183. current_style
  184. };
  185. spans.append(move(span));
  186. current_span_start = offset;
  187. }
  188. };
  189. for (size_t offset = 0; offset < str.length(); offset++) {
  190. char ch = str[offset];
  191. bool is_escape = ch == '\\';
  192. if (is_escape && offset != str.length() - 1) {
  193. offset++;
  194. continue;
  195. }
  196. bool is_special_character = false;
  197. is_special_character |= ch == '`';
  198. if (!current_style.code)
  199. is_special_character |= ch == '*' || ch == '_' || ch == '[' || ch == ']' || (ch == '!' && offset + 1 < str.length() && str[offset + 1] == '[');
  200. if (!is_special_character)
  201. continue;
  202. append_span_if_needed(offset);
  203. switch (ch) {
  204. case '`':
  205. current_style.code = !current_style.code;
  206. break;
  207. case '*':
  208. case '_':
  209. if (offset + 1 < str.length() && str[offset + 1] == ch) {
  210. offset++;
  211. current_style.strong = !current_style.strong;
  212. } else {
  213. current_style.emph = !current_style.emph;
  214. }
  215. break;
  216. case '!':
  217. current_link_is_actually_img = true;
  218. break;
  219. case '[':
  220. #ifdef DEBUG_MARKDOWN
  221. if (first_span_in_the_current_link != -1)
  222. dbgln("Dropping the outer link");
  223. #endif
  224. first_span_in_the_current_link = spans.size();
  225. break;
  226. case ']': {
  227. if (first_span_in_the_current_link == -1) {
  228. #ifdef DEBUG_MARKDOWN
  229. dbgln("Unmatched ]");
  230. #endif
  231. continue;
  232. }
  233. ScopeGuard guard = [&] {
  234. first_span_in_the_current_link = -1;
  235. current_link_is_actually_img = false;
  236. };
  237. if (offset + 2 >= str.length() || str[offset + 1] != '(')
  238. continue;
  239. offset += 2;
  240. size_t start_of_href = offset;
  241. do
  242. offset++;
  243. while (offset < str.length() && str[offset] != ')');
  244. if (offset == str.length())
  245. offset--;
  246. const StringView href = str.substring_view(start_of_href, offset - start_of_href);
  247. for (size_t i = first_span_in_the_current_link; i < spans.size(); i++) {
  248. if (current_link_is_actually_img)
  249. spans[i].style.img = href;
  250. else
  251. spans[i].style.href = href;
  252. }
  253. break;
  254. }
  255. default:
  256. ASSERT_NOT_REACHED();
  257. }
  258. // We've processed the character as a special, so the next offset will
  259. // start after it. Note that explicit continue statements skip over this
  260. // line, effectively treating the character as not special.
  261. current_span_start = offset + 1;
  262. }
  263. append_span_if_needed(str.length());
  264. return Text(move(spans));
  265. }
  266. }