Text.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/ScopeGuard.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibMarkdown/Text.h>
  11. #include <ctype.h>
  12. #include <string.h>
  13. namespace Markdown {
  14. void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
  15. {
  16. builder.append((strong) ? "<strong>" : "<em>");
  17. child->render_to_html(builder);
  18. builder.append((strong) ? "</strong>" : "</em>");
  19. }
  20. void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
  21. {
  22. if (strong) {
  23. builder.append("\e[1m");
  24. child->render_for_terminal(builder);
  25. builder.append("\e[22m");
  26. } else {
  27. builder.append("\e[3m");
  28. child->render_for_terminal(builder);
  29. builder.append("\e[23m");
  30. }
  31. }
  32. size_t Text::EmphasisNode::terminal_length() const
  33. {
  34. return child->terminal_length();
  35. }
  36. void Text::CodeNode::render_to_html(StringBuilder& builder) const
  37. {
  38. builder.append("<code>");
  39. code->render_to_html(builder);
  40. builder.append("</code>");
  41. }
  42. void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
  43. {
  44. builder.append("\e[1m");
  45. code->render_for_terminal(builder);
  46. builder.append("\e[22m");
  47. }
  48. size_t Text::CodeNode::terminal_length() const
  49. {
  50. return code->terminal_length();
  51. }
  52. void Text::BreakNode::render_to_html(StringBuilder& builder) const
  53. {
  54. builder.append("<br />");
  55. }
  56. void Text::BreakNode::render_for_terminal(StringBuilder&) const
  57. {
  58. }
  59. size_t Text::BreakNode::terminal_length() const
  60. {
  61. return 0;
  62. }
  63. void Text::TextNode::render_to_html(StringBuilder& builder) const
  64. {
  65. builder.append(escape_html_entities(text));
  66. }
  67. void Text::TextNode::render_for_terminal(StringBuilder& builder) const
  68. {
  69. if (collapsible && (text == "\n" || text.is_whitespace())) {
  70. builder.append(" ");
  71. } else {
  72. builder.append(text);
  73. }
  74. }
  75. size_t Text::TextNode::terminal_length() const
  76. {
  77. if (collapsible && text.is_whitespace()) {
  78. return 1;
  79. }
  80. return text.length();
  81. }
  82. void Text::LinkNode::render_to_html(StringBuilder& builder) const
  83. {
  84. if (is_image) {
  85. builder.append("<img src=\"");
  86. href->render_to_html(builder);
  87. builder.append("\" alt=\"");
  88. text->render_to_html(builder);
  89. builder.append("\" >");
  90. } else {
  91. builder.append("<a href=\"");
  92. href->render_to_html(builder);
  93. builder.append("\">");
  94. text->render_to_html(builder);
  95. builder.append("</a>");
  96. }
  97. }
  98. void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
  99. {
  100. StringBuilder href_builder;
  101. href->render_for_terminal(href_builder);
  102. String href_string = href_builder.build();
  103. bool is_linked = href_string.contains("://");
  104. if (is_linked) {
  105. builder.append("\e]8;;");
  106. builder.append(href_string);
  107. builder.append("\e\\");
  108. }
  109. text->render_for_terminal(builder);
  110. if (is_linked) {
  111. builder.appendff(" <{}>", href_string);
  112. builder.append("\033]8;;\033\\");
  113. }
  114. }
  115. size_t Text::LinkNode::terminal_length() const
  116. {
  117. return text->terminal_length();
  118. }
  119. void Text::MultiNode::render_to_html(StringBuilder& builder) const
  120. {
  121. for (auto& child : children) {
  122. child.render_to_html(builder);
  123. }
  124. }
  125. void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
  126. {
  127. for (auto& child : children) {
  128. child.render_for_terminal(builder);
  129. }
  130. }
  131. size_t Text::MultiNode::terminal_length() const
  132. {
  133. size_t length = 0;
  134. for (auto& child : children) {
  135. length += child.terminal_length();
  136. }
  137. return length;
  138. }
  139. size_t Text::terminal_length() const
  140. {
  141. return m_node->terminal_length();
  142. }
  143. String Text::render_to_html() const
  144. {
  145. StringBuilder builder;
  146. m_node->render_to_html(builder);
  147. return builder.build().trim(" \n\t");
  148. }
  149. String Text::render_for_terminal() const
  150. {
  151. StringBuilder builder;
  152. m_node->render_for_terminal(builder);
  153. return builder.build().trim(" \n\t");
  154. }
  155. Text Text::parse(StringView const& str)
  156. {
  157. Text text;
  158. auto const tokens = tokenize(str);
  159. auto iterator = tokens.begin();
  160. text.m_node = parse_sequence(iterator, false);
  161. return text;
  162. }
  163. static bool flanking(StringView const& str, size_t start, size_t end, int dir)
  164. {
  165. ssize_t next = ((dir > 0) ? end : start) + dir;
  166. if (next < 0 || next >= (ssize_t)str.length())
  167. return false;
  168. if (isspace(str[next]))
  169. return false;
  170. if (!ispunct(str[next]))
  171. return true;
  172. ssize_t prev = ((dir > 0) ? start : end) - dir;
  173. if (prev < 0 || prev >= (ssize_t)str.length())
  174. return true;
  175. return isspace(str[prev]) || ispunct(str[prev]);
  176. }
  177. Vector<Text::Token> Text::tokenize(StringView const& str)
  178. {
  179. Vector<Token> tokens;
  180. StringBuilder current_token;
  181. auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
  182. if (current_token.is_empty())
  183. return;
  184. tokens.append({
  185. current_token.build(),
  186. left_flanking,
  187. right_flanking,
  188. punct_before,
  189. punct_after,
  190. is_run,
  191. });
  192. current_token.clear();
  193. };
  194. auto flush_token = [&]() {
  195. flush_run(false, false, false, false, false);
  196. };
  197. bool in_space = false;
  198. for (size_t offset = 0; offset < str.length(); ++offset) {
  199. auto has = [&](StringView const& seq) {
  200. if (offset + seq.length() > str.length())
  201. return false;
  202. return str.substring_view(offset, seq.length()) == seq;
  203. };
  204. auto expect = [&](StringView const& seq) {
  205. VERIFY(has(seq));
  206. flush_token();
  207. current_token.append(seq);
  208. flush_token();
  209. offset += seq.length() - 1;
  210. };
  211. char ch = str[offset];
  212. if (ch != ' ' && in_space) {
  213. flush_token();
  214. in_space = false;
  215. }
  216. if (ch == '\\' && offset + 1 < str.length()) {
  217. current_token.append(str[offset + 1]);
  218. ++offset;
  219. } else if (ch == '*' || ch == '_' || ch == '`') {
  220. flush_token();
  221. char delim = ch;
  222. size_t run_offset;
  223. for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
  224. current_token.append(str[run_offset]);
  225. }
  226. flush_run(flanking(str, offset, run_offset - 1, +1),
  227. flanking(str, offset, run_offset - 1, -1),
  228. offset > 0 && ispunct(str[offset - 1]),
  229. run_offset < str.length() && ispunct(str[run_offset]),
  230. true);
  231. offset = run_offset - 1;
  232. } else if (ch == ' ') {
  233. if (!in_space) {
  234. flush_token();
  235. in_space = true;
  236. }
  237. current_token.append(ch);
  238. } else if (has("\n")) {
  239. expect("\n");
  240. } else if (has("[")) {
  241. expect("[");
  242. } else if (has("![")) {
  243. expect("![");
  244. } else if (has("](")) {
  245. expect("](");
  246. } else if (has(")")) {
  247. expect(")");
  248. } else {
  249. current_token.append(ch);
  250. }
  251. }
  252. flush_token();
  253. return tokens;
  254. }
  255. NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
  256. {
  257. auto node = make<MultiNode>();
  258. for (; !tokens.is_end(); ++tokens) {
  259. if (tokens->is_space()) {
  260. node->children.append(parse_break(tokens));
  261. } else if (*tokens == "\n") {
  262. node->children.append(parse_newline(tokens));
  263. } else if (tokens->is_run) {
  264. switch (tokens->run_char()) {
  265. case '*':
  266. case '_':
  267. node->children.append(parse_emph(tokens, in_link));
  268. break;
  269. case '`':
  270. node->children.append(parse_code(tokens));
  271. break;
  272. }
  273. } else if (!in_link && (*tokens == "[" || *tokens == "![")) {
  274. node->children.append(parse_link(tokens));
  275. } else if (in_link && *tokens == "](") {
  276. return node;
  277. } else {
  278. node->children.append(make<TextNode>(tokens->data));
  279. }
  280. if (in_link && !tokens.is_end() && *tokens == "](")
  281. return node;
  282. if (tokens.is_end())
  283. break;
  284. }
  285. return node;
  286. }
  287. NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens)
  288. {
  289. auto next_tok = tokens + 1;
  290. if (next_tok.is_end() || *next_tok != "\n")
  291. return make<TextNode>(tokens->data);
  292. if (tokens->data.length() >= 2)
  293. return make<BreakNode>();
  294. return make<MultiNode>();
  295. }
  296. NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens)
  297. {
  298. auto node = make<TextNode>(tokens->data);
  299. auto next_tok = tokens + 1;
  300. if (!next_tok.is_end() && next_tok->is_space())
  301. // Skip whitespace after newline.
  302. ++tokens;
  303. return node;
  304. }
  305. bool Text::can_open(Token const& opening)
  306. {
  307. return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
  308. }
  309. bool Text::can_close_for(Token const& opening, Text::Token const& closing)
  310. {
  311. if (opening.run_char() != closing.run_char())
  312. return false;
  313. if (opening.run_length() != closing.run_length())
  314. return false;
  315. return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
  316. }
  317. NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
  318. {
  319. auto opening = *tokens;
  320. // Check that the opening delimiter run is properly flanking.
  321. if (!can_open(opening))
  322. return make<TextNode>(opening.data);
  323. auto child = make<MultiNode>();
  324. for (++tokens; !tokens.is_end(); ++tokens) {
  325. if (tokens->is_space()) {
  326. child->children.append(parse_break(tokens));
  327. } else if (*tokens == "\n") {
  328. child->children.append(parse_newline(tokens));
  329. } else if (tokens->is_run) {
  330. if (can_close_for(opening, *tokens)) {
  331. return make<EmphasisNode>(opening.run_length() >= 2, move(child));
  332. }
  333. switch (tokens->run_char()) {
  334. case '*':
  335. case '_':
  336. child->children.append(parse_emph(tokens, in_link));
  337. break;
  338. case '`':
  339. child->children.append(parse_code(tokens));
  340. break;
  341. }
  342. } else if (*tokens == "[" || *tokens == "![") {
  343. child->children.append(parse_link(tokens));
  344. } else if (in_link && *tokens == "](") {
  345. child->children.prepend(make<TextNode>(opening.data));
  346. return child;
  347. } else {
  348. child->children.append(make<TextNode>(tokens->data));
  349. }
  350. if (in_link && !tokens.is_end() && *tokens == "](") {
  351. child->children.prepend(make<TextNode>(opening.data));
  352. return child;
  353. }
  354. if (tokens.is_end())
  355. break;
  356. }
  357. child->children.prepend(make<TextNode>(opening.data));
  358. return child;
  359. }
  360. NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
  361. {
  362. auto opening = *tokens;
  363. auto is_closing = [&](Token const& token) {
  364. return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
  365. };
  366. bool is_all_whitespace = true;
  367. auto code = make<MultiNode>();
  368. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  369. if (is_closing(*iterator)) {
  370. tokens = iterator;
  371. // Strip first and last space, when appropriate.
  372. if (!is_all_whitespace) {
  373. auto& first = dynamic_cast<TextNode&>(code->children.first());
  374. auto& last = dynamic_cast<TextNode&>(code->children.last());
  375. if (first.text.starts_with(" ") && last.text.ends_with(" ")) {
  376. first.text = first.text.substring(1);
  377. last.text = last.text.substring(0, last.text.length() - 1);
  378. }
  379. }
  380. return make<CodeNode>(move(code));
  381. }
  382. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  383. code->children.append(make<TextNode>((*iterator == "\n") ? " " : iterator->data, false));
  384. }
  385. return make<TextNode>(opening.data);
  386. }
  387. NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
  388. {
  389. auto opening = *tokens++;
  390. bool is_image = opening == "![";
  391. auto link_text = parse_sequence(tokens, true);
  392. if (tokens.is_end() || *tokens != "](") {
  393. link_text->children.prepend(make<TextNode>(opening.data));
  394. return link_text;
  395. }
  396. auto separator = *tokens;
  397. VERIFY(separator == "](");
  398. auto address = make<MultiNode>();
  399. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  400. if (*iterator == ")") {
  401. tokens = iterator;
  402. return make<LinkNode>(is_image, move(link_text), move(address));
  403. }
  404. address->children.append(make<TextNode>(iterator->data));
  405. }
  406. link_text->children.prepend(make<TextNode>(opening.data));
  407. link_text->children.append(make<TextNode>(separator.data));
  408. return link_text;
  409. }
  410. }