Text.cpp 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/ScopeGuard.h>
  27. #include <AK/StringBuilder.h>
  28. #include <LibMarkdown/Text.h>
  29. #include <string.h>
  30. namespace Markdown {
  31. static String unescape(const StringView& text)
  32. {
  33. StringBuilder builder;
  34. for (size_t i = 0; i < text.length(); ++i) {
  35. if (text[i] == '\\' && i != text.length() - 1) {
  36. builder.append(text[i + 1]);
  37. i++;
  38. continue;
  39. }
  40. builder.append(text[i]);
  41. }
  42. return builder.build();
  43. }
  44. String Text::render_to_html() const
  45. {
  46. StringBuilder builder;
  47. Vector<String> open_tags;
  48. Style current_style;
  49. for (auto& span : m_spans) {
  50. struct TagAndFlag {
  51. String tag;
  52. bool Style::*flag;
  53. };
  54. TagAndFlag tags_and_flags[] = {
  55. { "i", &Style::emph },
  56. { "b", &Style::strong },
  57. { "code", &Style::code }
  58. };
  59. auto it = open_tags.find([&](const String& open_tag) {
  60. if (open_tag == "a" && current_style.href != span.style.href)
  61. return true;
  62. if (open_tag == "img" && current_style.img != span.style.img)
  63. return true;
  64. for (auto& tag_and_flag : tags_and_flags) {
  65. if (open_tag == tag_and_flag.tag && !(span.style.*tag_and_flag.flag))
  66. return true;
  67. }
  68. return false;
  69. });
  70. if (!it.is_end()) {
  71. // We found an open tag that should
  72. // not be open for the new span. Close
  73. // it and all the open tags that follow
  74. // it.
  75. for (ssize_t j = open_tags.size() - 1; j >= static_cast<ssize_t>(it.index()); --j) {
  76. auto& tag = open_tags[j];
  77. if (tag == "img") {
  78. builder.append("\" />");
  79. current_style.img = {};
  80. continue;
  81. }
  82. builder.appendf("</%s>", tag.characters());
  83. if (tag == "a") {
  84. current_style.href = {};
  85. continue;
  86. }
  87. for (auto& tag_and_flag : tags_and_flags)
  88. if (tag == tag_and_flag.tag)
  89. current_style.*tag_and_flag.flag = false;
  90. }
  91. open_tags.shrink(it.index());
  92. }
  93. if (current_style.href.is_null() && !span.style.href.is_null()) {
  94. open_tags.append("a");
  95. builder.appendf("<a href=\"%s\">", span.style.href.characters());
  96. }
  97. if (current_style.img.is_null() && !span.style.img.is_null()) {
  98. open_tags.append("img");
  99. builder.appendf("<img src=\"%s\" alt=\"", span.style.img.characters());
  100. }
  101. for (auto& tag_and_flag : tags_and_flags) {
  102. if (current_style.*tag_and_flag.flag != span.style.*tag_and_flag.flag) {
  103. open_tags.append(tag_and_flag.tag);
  104. builder.appendf("<%s>", tag_and_flag.tag.characters());
  105. }
  106. }
  107. current_style = span.style;
  108. builder.append(escape_html_entities(span.text));
  109. }
  110. for (ssize_t i = open_tags.size() - 1; i >= 0; --i) {
  111. auto& tag = open_tags[i];
  112. if (tag == "img") {
  113. builder.append("\" />");
  114. continue;
  115. }
  116. builder.appendf("</%s>", tag.characters());
  117. }
  118. return builder.build();
  119. }
  120. String Text::render_for_terminal() const
  121. {
  122. StringBuilder builder;
  123. for (auto& span : m_spans) {
  124. bool needs_styling = span.style.strong || span.style.emph || span.style.code;
  125. if (needs_styling) {
  126. builder.append("\033[");
  127. bool first = true;
  128. if (span.style.strong || span.style.code) {
  129. builder.append('1');
  130. first = false;
  131. }
  132. if (span.style.emph) {
  133. if (!first)
  134. builder.append(';');
  135. builder.append('4');
  136. }
  137. builder.append('m');
  138. }
  139. if (!span.style.href.is_null()) {
  140. if (strstr(span.style.href.characters(), "://") != nullptr) {
  141. builder.append("\033]8;;");
  142. builder.append(span.style.href);
  143. builder.append("\033\\");
  144. }
  145. }
  146. builder.append(span.text.characters());
  147. if (needs_styling)
  148. builder.append("\033[0m");
  149. if (!span.style.href.is_null()) {
  150. // When rendering for the terminal, ignore any
  151. // non-absolute links, because the user has no
  152. // chance to follow them anyway.
  153. if (strstr(span.style.href.characters(), "://") != nullptr) {
  154. builder.appendf(" <%s>", span.style.href.characters());
  155. builder.append("\033]8;;\033\\");
  156. }
  157. }
  158. if (!span.style.img.is_null()) {
  159. if (strstr(span.style.img.characters(), "://") != nullptr) {
  160. builder.appendf(" <%s>", span.style.img.characters());
  161. }
  162. }
  163. }
  164. return builder.build();
  165. }
  166. bool Text::parse(const StringView& str)
  167. {
  168. Style current_style;
  169. size_t current_span_start = 0;
  170. int first_span_in_the_current_link = -1;
  171. bool current_link_is_actually_img = false;
  172. auto append_span_if_needed = [&](size_t offset) {
  173. ASSERT(current_span_start <= offset);
  174. if (current_span_start != offset) {
  175. Span span {
  176. unescape(str.substring_view(current_span_start, offset - current_span_start)),
  177. current_style
  178. };
  179. m_spans.append(move(span));
  180. current_span_start = offset;
  181. }
  182. };
  183. for (size_t offset = 0; offset < str.length(); offset++) {
  184. char ch = str[offset];
  185. bool is_escape = ch == '\\';
  186. if (is_escape && offset != str.length() - 1) {
  187. offset++;
  188. continue;
  189. }
  190. bool is_special_character = false;
  191. is_special_character |= ch == '`';
  192. if (!current_style.code)
  193. is_special_character |= ch == '*' || ch == '_' || ch == '[' || ch == ']' || ch == '!';
  194. if (!is_special_character)
  195. continue;
  196. append_span_if_needed(offset);
  197. switch (ch) {
  198. case '`':
  199. current_style.code = !current_style.code;
  200. break;
  201. case '*':
  202. case '_':
  203. if (offset + 1 < str.length() && str[offset + 1] == ch) {
  204. offset++;
  205. current_style.strong = !current_style.strong;
  206. } else {
  207. current_style.emph = !current_style.emph;
  208. }
  209. break;
  210. case '!':
  211. if (offset + 1 >= str.length() || str[offset + 1] != '[')
  212. continue;
  213. current_link_is_actually_img = true;
  214. break;
  215. case '[':
  216. if (first_span_in_the_current_link != -1)
  217. dbg() << "Dropping the outer link";
  218. first_span_in_the_current_link = m_spans.size();
  219. break;
  220. case ']': {
  221. if (first_span_in_the_current_link == -1) {
  222. dbg() << "Unmatched ]";
  223. continue;
  224. }
  225. ScopeGuard guard = [&] {
  226. first_span_in_the_current_link = -1;
  227. current_link_is_actually_img = false;
  228. };
  229. if (offset + 2 >= str.length() || str[offset + 1] != '(')
  230. continue;
  231. offset += 2;
  232. size_t start_of_href = offset;
  233. do
  234. offset++;
  235. while (offset < str.length() && str[offset] != ')');
  236. if (offset == str.length())
  237. offset--;
  238. const StringView href = str.substring_view(start_of_href, offset - start_of_href);
  239. for (size_t i = first_span_in_the_current_link; i < m_spans.size(); i++) {
  240. if (current_link_is_actually_img)
  241. m_spans[i].style.img = href;
  242. else
  243. m_spans[i].style.href = href;
  244. }
  245. break;
  246. }
  247. default:
  248. ASSERT_NOT_REACHED();
  249. }
  250. // We've processed the character as a special, so the next offset will
  251. // start after it. Note that explicit continue statements skip over this
  252. // line, effectively treating the character as not special.
  253. current_span_start = offset + 1;
  254. }
  255. append_span_if_needed(str.length());
  256. return true;
  257. }
  258. }