Text.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/ScopeGuard.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibMarkdown/Text.h>
  11. #include <ctype.h>
  12. #include <string.h>
  13. namespace Markdown {
  14. void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
  15. {
  16. builder.append((strong) ? "<strong>" : "<em>");
  17. child->render_to_html(builder);
  18. builder.append((strong) ? "</strong>" : "</em>");
  19. }
  20. void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
  21. {
  22. if (strong) {
  23. builder.append("\e[1m");
  24. child->render_for_terminal(builder);
  25. builder.append("\e[22m");
  26. } else {
  27. builder.append("\e[3m");
  28. child->render_for_terminal(builder);
  29. builder.append("\e[23m");
  30. }
  31. }
  32. size_t Text::EmphasisNode::terminal_length() const
  33. {
  34. return child->terminal_length();
  35. }
  36. void Text::CodeNode::render_to_html(StringBuilder& builder) const
  37. {
  38. builder.append("<code>");
  39. code->render_to_html(builder);
  40. builder.append("</code>");
  41. }
  42. void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
  43. {
  44. builder.append("\e[1m");
  45. code->render_for_terminal(builder);
  46. builder.append("\e[22m");
  47. }
  48. size_t Text::CodeNode::terminal_length() const
  49. {
  50. return code->terminal_length();
  51. }
  52. void Text::TextNode::render_to_html(StringBuilder& builder) const
  53. {
  54. builder.append(escape_html_entities(text));
  55. }
  56. void Text::TextNode::render_for_terminal(StringBuilder& builder) const
  57. {
  58. String text_copy = text;
  59. text_copy.replace("\n", " ");
  60. builder.append(text_copy);
  61. }
  62. size_t Text::TextNode::terminal_length() const
  63. {
  64. return text.length();
  65. }
  66. void Text::LinkNode::render_to_html(StringBuilder& builder) const
  67. {
  68. if (is_image) {
  69. builder.append("<img src=\"");
  70. href->render_to_html(builder);
  71. builder.append("\" alt=\"");
  72. text->render_to_html(builder);
  73. builder.append("\" >");
  74. } else {
  75. builder.append("<a href=\"");
  76. href->render_to_html(builder);
  77. builder.append("\">");
  78. text->render_to_html(builder);
  79. builder.append("</a>");
  80. }
  81. }
  82. void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
  83. {
  84. StringBuilder href_builder;
  85. href->render_for_terminal(href_builder);
  86. String href_string = href_builder.build();
  87. bool is_linked = href_string.contains("://");
  88. if (is_linked) {
  89. builder.append("\e]8;;");
  90. builder.append(href_string);
  91. builder.append("\e\\");
  92. }
  93. text->render_for_terminal(builder);
  94. if (is_linked) {
  95. builder.appendff(" <{}>", href_string);
  96. builder.append("\033]8;;\033\\");
  97. }
  98. }
  99. size_t Text::LinkNode::terminal_length() const
  100. {
  101. return text->terminal_length();
  102. }
  103. void Text::MultiNode::render_to_html(StringBuilder& builder) const
  104. {
  105. for (auto& child : children) {
  106. child.render_to_html(builder);
  107. }
  108. }
  109. void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
  110. {
  111. for (auto& child : children) {
  112. child.render_for_terminal(builder);
  113. }
  114. }
  115. size_t Text::MultiNode::terminal_length() const
  116. {
  117. size_t length = 0;
  118. for (auto& child : children) {
  119. length += child.terminal_length();
  120. }
  121. return length;
  122. }
  123. size_t Text::terminal_length() const
  124. {
  125. return m_node->terminal_length();
  126. }
  127. String Text::render_to_html() const
  128. {
  129. StringBuilder builder;
  130. m_node->render_to_html(builder);
  131. return builder.build().trim(" \n\t");
  132. }
  133. String Text::render_for_terminal() const
  134. {
  135. StringBuilder builder;
  136. m_node->render_for_terminal(builder);
  137. return builder.build().trim(" \n\t");
  138. }
  139. Text Text::parse(StringView const& str)
  140. {
  141. Text text;
  142. auto const tokens = tokenize(str);
  143. auto iterator = tokens.begin();
  144. text.m_node = parse_sequence(iterator, false);
  145. return text;
  146. }
  147. Vector<Text::Token> Text::tokenize(StringView const& str)
  148. {
  149. Vector<Token> tokens;
  150. StringBuilder current_token;
  151. auto flush_token = [&](bool left_flanking, bool right_flanking, bool is_run) {
  152. if (current_token.is_empty())
  153. return;
  154. tokens.append({
  155. current_token.build(),
  156. left_flanking,
  157. right_flanking,
  158. is_run,
  159. });
  160. current_token.clear();
  161. };
  162. for (size_t offset = 0; offset < str.length(); ++offset) {
  163. auto has = [&](StringView const& seq) {
  164. if (offset + seq.length() > str.length())
  165. return false;
  166. return str.substring_view(offset, seq.length()) == seq;
  167. };
  168. auto expect = [&](StringView const& seq) {
  169. VERIFY(has(seq));
  170. flush_token(false, false, false);
  171. current_token.append(seq);
  172. flush_token(false, false, false);
  173. offset += seq.length() - 1;
  174. };
  175. char ch = str[offset];
  176. if (ch == '\\' && offset + 1 < str.length()) {
  177. current_token.append(str[offset + 1]);
  178. ++offset;
  179. } else if (ch == '*' || ch == '_' || ch == '`') {
  180. flush_token(false, false, false);
  181. char delim = ch;
  182. size_t run_offset;
  183. for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
  184. current_token.append(str[run_offset]);
  185. }
  186. bool left_flanking = run_offset < str.length() && !isspace(str[run_offset]);
  187. bool right_flanking = offset > 0 && !isspace(str[offset - 1]);
  188. flush_token(left_flanking, right_flanking, true);
  189. offset = run_offset - 1;
  190. } else if (ch == '\n') {
  191. flush_token(false, false, false);
  192. current_token.append(ch);
  193. flush_token(false, false, false);
  194. } else if (has("[")) {
  195. expect("[");
  196. } else if (has("![")) {
  197. expect("![");
  198. } else if (has("](")) {
  199. expect("](");
  200. } else if (has(")")) {
  201. expect(")");
  202. } else {
  203. current_token.append(ch);
  204. }
  205. }
  206. flush_token(false, false, false);
  207. return tokens;
  208. }
  209. NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
  210. {
  211. auto node = make<MultiNode>();
  212. for (; !tokens.is_end(); ++tokens) {
  213. if (tokens->is_run) {
  214. switch (tokens->run_char()) {
  215. case '*':
  216. case '_':
  217. node->children.append(parse_emph(tokens, in_link));
  218. break;
  219. case '`':
  220. node->children.append(parse_code(tokens));
  221. break;
  222. }
  223. } else if (!in_link && (*tokens == "[" || *tokens == "![")) {
  224. node->children.append(parse_link(tokens));
  225. } else if (in_link && *tokens == "](") {
  226. return node;
  227. } else {
  228. node->children.append(make<TextNode>(tokens->data));
  229. }
  230. if (in_link && !tokens.is_end() && *tokens == "](")
  231. return node;
  232. if (tokens.is_end())
  233. break;
  234. }
  235. return node;
  236. }
  237. bool Text::can_open(Token const& opening)
  238. {
  239. return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && !opening.right_flanking);
  240. }
  241. bool Text::can_close_for(Token const& opening, Text::Token const& closing)
  242. {
  243. if (opening.run_char() != closing.run_char())
  244. return false;
  245. if (opening.run_length() != closing.run_length())
  246. return false;
  247. return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && !closing.left_flanking && closing.right_flanking);
  248. }
  249. NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
  250. {
  251. auto opening = *tokens;
  252. // Check that the opening delimiter run is properly flanking.
  253. if (!can_open(opening))
  254. return make<TextNode>(opening.data);
  255. auto child = make<MultiNode>();
  256. for (++tokens; !tokens.is_end(); ++tokens) {
  257. if (tokens->is_run) {
  258. if (can_close_for(opening, *tokens)) {
  259. return make<EmphasisNode>(opening.run_length() >= 2, move(child));
  260. }
  261. switch (tokens->run_char()) {
  262. case '*':
  263. case '_':
  264. child->children.append(parse_emph(tokens, in_link));
  265. break;
  266. case '`':
  267. child->children.append(parse_code(tokens));
  268. break;
  269. }
  270. } else if (*tokens == "[" || *tokens == "![") {
  271. child->children.append(parse_link(tokens));
  272. } else if (in_link && *tokens == "](") {
  273. child->children.prepend(make<TextNode>(opening.data));
  274. return child;
  275. } else {
  276. child->children.append(make<TextNode>(tokens->data));
  277. }
  278. if (in_link && !tokens.is_end() && *tokens == "](") {
  279. child->children.prepend(make<TextNode>(opening.data));
  280. return child;
  281. }
  282. if (tokens.is_end())
  283. break;
  284. }
  285. child->children.prepend(make<TextNode>(opening.data));
  286. return child;
  287. }
  288. NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
  289. {
  290. auto opening = *tokens;
  291. auto is_closing = [&](Token const& token) {
  292. return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
  293. };
  294. bool is_all_whitespace = true;
  295. auto code = make<MultiNode>();
  296. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  297. if (is_closing(*iterator)) {
  298. tokens = iterator;
  299. // Strip first and last space, when appropriate.
  300. if (!is_all_whitespace) {
  301. auto& first = dynamic_cast<TextNode&>(code->children.first());
  302. auto& last = dynamic_cast<TextNode&>(code->children.last());
  303. if (first.text.starts_with(" ") && last.text.ends_with(" ")) {
  304. first.text = first.text.substring(1);
  305. last.text = last.text.substring(0, last.text.length() - 1);
  306. }
  307. }
  308. return make<CodeNode>(move(code));
  309. }
  310. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  311. code->children.append(make<TextNode>((*iterator == "\n") ? " " : iterator->data));
  312. }
  313. return make<TextNode>(opening.data);
  314. }
  315. NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
  316. {
  317. auto opening = *tokens++;
  318. bool is_image = opening == "![";
  319. auto link_text = parse_sequence(tokens, true);
  320. if (tokens.is_end() || *tokens != "](") {
  321. link_text->children.prepend(make<TextNode>(opening.data));
  322. return link_text;
  323. }
  324. auto seperator = *tokens;
  325. VERIFY(seperator == "](");
  326. auto address = make<MultiNode>();
  327. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  328. if (*iterator == ")") {
  329. tokens = iterator;
  330. return make<LinkNode>(is_image, move(link_text), move(address));
  331. }
  332. address->children.append(make<TextNode>(iterator->data));
  333. }
  334. link_text->children.prepend(make<TextNode>(opening.data));
  335. link_text->children.append(make<TextNode>(seperator.data));
  336. return link_text;
  337. }
  338. }