Text.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/ScopeGuard.h>
  9. #include <AK/StringBuilder.h>
  10. #include <LibMarkdown/Text.h>
  11. #include <LibMarkdown/Visitor.h>
  12. #include <ctype.h>
  13. #include <string.h>
  14. namespace Markdown {
  15. void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
  16. {
  17. builder.append((strong) ? "<strong>"sv : "<em>"sv);
  18. child->render_to_html(builder);
  19. builder.append((strong) ? "</strong>"sv : "</em>"sv);
  20. }
  21. void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
  22. {
  23. if (strong) {
  24. builder.append("\e[1m"sv);
  25. child->render_for_terminal(builder);
  26. builder.append("\e[22m"sv);
  27. } else {
  28. builder.append("\e[3m"sv);
  29. child->render_for_terminal(builder);
  30. builder.append("\e[23m"sv);
  31. }
  32. }
  33. size_t Text::EmphasisNode::terminal_length() const
  34. {
  35. return child->terminal_length();
  36. }
  37. RecursionDecision Text::EmphasisNode::walk(Visitor& visitor) const
  38. {
  39. RecursionDecision rd = visitor.visit(*this);
  40. if (rd != RecursionDecision::Recurse)
  41. return rd;
  42. return child->walk(visitor);
  43. }
  44. void Text::CodeNode::render_to_html(StringBuilder& builder) const
  45. {
  46. builder.append("<code>"sv);
  47. code->render_to_html(builder);
  48. builder.append("</code>"sv);
  49. }
  50. void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
  51. {
  52. builder.append("\e[1m"sv);
  53. code->render_for_terminal(builder);
  54. builder.append("\e[22m"sv);
  55. }
  56. size_t Text::CodeNode::terminal_length() const
  57. {
  58. return code->terminal_length();
  59. }
  60. RecursionDecision Text::CodeNode::walk(Visitor& visitor) const
  61. {
  62. RecursionDecision rd = visitor.visit(*this);
  63. if (rd != RecursionDecision::Recurse)
  64. return rd;
  65. return code->walk(visitor);
  66. }
  67. void Text::BreakNode::render_to_html(StringBuilder& builder) const
  68. {
  69. builder.append("<br />"sv);
  70. }
  71. void Text::BreakNode::render_for_terminal(StringBuilder&) const
  72. {
  73. }
  74. size_t Text::BreakNode::terminal_length() const
  75. {
  76. return 0;
  77. }
  78. RecursionDecision Text::BreakNode::walk(Visitor& visitor) const
  79. {
  80. RecursionDecision rd = visitor.visit(*this);
  81. if (rd != RecursionDecision::Recurse)
  82. return rd;
  83. // Normalize return value
  84. return RecursionDecision::Continue;
  85. }
  86. void Text::TextNode::render_to_html(StringBuilder& builder) const
  87. {
  88. builder.append(escape_html_entities(text));
  89. }
  90. void Text::TextNode::render_for_terminal(StringBuilder& builder) const
  91. {
  92. if (collapsible && (text == "\n" || text.is_whitespace())) {
  93. builder.append(' ');
  94. } else {
  95. builder.append(text);
  96. }
  97. }
  98. size_t Text::TextNode::terminal_length() const
  99. {
  100. if (collapsible && text.is_whitespace()) {
  101. return 1;
  102. }
  103. return text.length();
  104. }
  105. RecursionDecision Text::TextNode::walk(Visitor& visitor) const
  106. {
  107. RecursionDecision rd = visitor.visit(*this);
  108. if (rd != RecursionDecision::Recurse)
  109. return rd;
  110. rd = visitor.visit(text);
  111. if (rd != RecursionDecision::Recurse)
  112. return rd;
  113. // Normalize return value
  114. return RecursionDecision::Continue;
  115. }
  116. void Text::LinkNode::render_to_html(StringBuilder& builder) const
  117. {
  118. if (is_image) {
  119. builder.append("<img src=\""sv);
  120. builder.append(escape_html_entities(href));
  121. builder.append("\" alt=\""sv);
  122. text->render_to_html(builder);
  123. builder.append("\" >"sv);
  124. } else {
  125. builder.append("<a href=\""sv);
  126. builder.append(escape_html_entities(href));
  127. builder.append("\">"sv);
  128. text->render_to_html(builder);
  129. builder.append("</a>"sv);
  130. }
  131. }
  132. void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
  133. {
  134. bool is_linked = href.contains("://"sv);
  135. if (is_linked) {
  136. builder.append("\033[0;34m\e]8;;"sv);
  137. builder.append(href);
  138. builder.append("\e\\"sv);
  139. }
  140. text->render_for_terminal(builder);
  141. if (is_linked) {
  142. builder.appendff(" <{}>", href);
  143. builder.append("\033]8;;\033\\\033[0m"sv);
  144. }
  145. }
  146. size_t Text::LinkNode::terminal_length() const
  147. {
  148. return text->terminal_length();
  149. }
  150. RecursionDecision Text::LinkNode::walk(Visitor& visitor) const
  151. {
  152. RecursionDecision rd = visitor.visit(*this);
  153. if (rd != RecursionDecision::Recurse)
  154. return rd;
  155. // Don't recurse on href.
  156. return text->walk(visitor);
  157. }
  158. void Text::MultiNode::render_to_html(StringBuilder& builder) const
  159. {
  160. for (auto& child : children) {
  161. child.render_to_html(builder);
  162. }
  163. }
  164. void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
  165. {
  166. for (auto& child : children) {
  167. child.render_for_terminal(builder);
  168. }
  169. }
  170. size_t Text::MultiNode::terminal_length() const
  171. {
  172. size_t length = 0;
  173. for (auto& child : children) {
  174. length += child.terminal_length();
  175. }
  176. return length;
  177. }
  178. RecursionDecision Text::MultiNode::walk(Visitor& visitor) const
  179. {
  180. RecursionDecision rd = visitor.visit(*this);
  181. if (rd != RecursionDecision::Recurse)
  182. return rd;
  183. for (auto const& child : children) {
  184. rd = child.walk(visitor);
  185. if (rd == RecursionDecision::Break)
  186. return rd;
  187. }
  188. return RecursionDecision::Continue;
  189. }
  190. void Text::StrikeThroughNode::render_to_html(StringBuilder& builder) const
  191. {
  192. builder.append("<del>"sv);
  193. striked_text->render_to_html(builder);
  194. builder.append("</del>"sv);
  195. }
  196. void Text::StrikeThroughNode::render_for_terminal(StringBuilder& builder) const
  197. {
  198. builder.append("\e[9m"sv);
  199. striked_text->render_for_terminal(builder);
  200. builder.append("\e[29m"sv);
  201. }
  202. size_t Text::StrikeThroughNode::terminal_length() const
  203. {
  204. return striked_text->terminal_length();
  205. }
  206. RecursionDecision Text::StrikeThroughNode::walk(Visitor& visitor) const
  207. {
  208. RecursionDecision rd = visitor.visit(*this);
  209. if (rd != RecursionDecision::Recurse)
  210. return rd;
  211. return striked_text->walk(visitor);
  212. }
  213. size_t Text::terminal_length() const
  214. {
  215. return m_node->terminal_length();
  216. }
  217. String Text::render_to_html() const
  218. {
  219. StringBuilder builder;
  220. m_node->render_to_html(builder);
  221. return builder.build().trim(" \n\t"sv);
  222. }
  223. String Text::render_for_terminal() const
  224. {
  225. StringBuilder builder;
  226. m_node->render_for_terminal(builder);
  227. return builder.build().trim(" \n\t"sv);
  228. }
  229. RecursionDecision Text::walk(Visitor& visitor) const
  230. {
  231. RecursionDecision rd = visitor.visit(*this);
  232. if (rd != RecursionDecision::Recurse)
  233. return rd;
  234. return m_node->walk(visitor);
  235. }
  236. Text Text::parse(StringView str)
  237. {
  238. Text text;
  239. auto const tokens = tokenize(str);
  240. auto iterator = tokens.begin();
  241. text.m_node = parse_sequence(iterator, false);
  242. return text;
  243. }
  244. static bool flanking(StringView str, size_t start, size_t end, int dir)
  245. {
  246. ssize_t next = ((dir > 0) ? end : start) + dir;
  247. if (next < 0 || next >= (ssize_t)str.length())
  248. return false;
  249. if (isspace(str[next]))
  250. return false;
  251. if (!ispunct(str[next]))
  252. return true;
  253. ssize_t prev = ((dir > 0) ? start : end) - dir;
  254. if (prev < 0 || prev >= (ssize_t)str.length())
  255. return true;
  256. return isspace(str[prev]) || ispunct(str[prev]);
  257. }
  258. Vector<Text::Token> Text::tokenize(StringView str)
  259. {
  260. Vector<Token> tokens;
  261. StringBuilder current_token;
  262. auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
  263. if (current_token.is_empty())
  264. return;
  265. tokens.append({
  266. current_token.build(),
  267. left_flanking,
  268. right_flanking,
  269. punct_before,
  270. punct_after,
  271. is_run,
  272. });
  273. current_token.clear();
  274. };
  275. auto flush_token = [&]() {
  276. flush_run(false, false, false, false, false);
  277. };
  278. bool in_space = false;
  279. for (size_t offset = 0; offset < str.length(); ++offset) {
  280. auto has = [&](StringView seq) {
  281. if (offset + seq.length() > str.length())
  282. return false;
  283. return str.substring_view(offset, seq.length()) == seq;
  284. };
  285. auto expect = [&](StringView seq) {
  286. VERIFY(has(seq));
  287. flush_token();
  288. current_token.append(seq);
  289. flush_token();
  290. offset += seq.length() - 1;
  291. };
  292. char ch = str[offset];
  293. if (ch != ' ' && in_space) {
  294. flush_token();
  295. in_space = false;
  296. }
  297. if (ch == '\\' && offset + 1 < str.length() && ispunct(str[offset + 1])) {
  298. current_token.append(str[offset + 1]);
  299. ++offset;
  300. } else if (ch == '*' || ch == '_' || ch == '`' || ch == '~') {
  301. flush_token();
  302. char delim = ch;
  303. size_t run_offset;
  304. for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
  305. current_token.append(str[run_offset]);
  306. }
  307. flush_run(flanking(str, offset, run_offset - 1, +1),
  308. flanking(str, offset, run_offset - 1, -1),
  309. offset > 0 && ispunct(str[offset - 1]),
  310. run_offset < str.length() && ispunct(str[run_offset]),
  311. true);
  312. offset = run_offset - 1;
  313. } else if (ch == ' ') {
  314. if (!in_space) {
  315. flush_token();
  316. in_space = true;
  317. }
  318. current_token.append(ch);
  319. } else if (has("\n"sv)) {
  320. expect("\n"sv);
  321. } else if (has("["sv)) {
  322. expect("["sv);
  323. } else if (has("!["sv)) {
  324. expect("!["sv);
  325. } else if (has("]("sv)) {
  326. expect("]("sv);
  327. } else if (has(")"sv)) {
  328. expect(")"sv);
  329. } else {
  330. current_token.append(ch);
  331. }
  332. }
  333. flush_token();
  334. return tokens;
  335. }
  336. NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
  337. {
  338. auto node = make<MultiNode>();
  339. for (; !tokens.is_end(); ++tokens) {
  340. if (tokens->is_space()) {
  341. node->children.append(parse_break(tokens));
  342. } else if (*tokens == "\n"sv) {
  343. node->children.append(parse_newline(tokens));
  344. } else if (tokens->is_run) {
  345. switch (tokens->run_char()) {
  346. case '*':
  347. case '_':
  348. node->children.append(parse_emph(tokens, in_link));
  349. break;
  350. case '`':
  351. node->children.append(parse_code(tokens));
  352. break;
  353. case '~':
  354. node->children.append(parse_strike_through(tokens));
  355. break;
  356. }
  357. } else if (*tokens == "["sv || *tokens == "!["sv) {
  358. node->children.append(parse_link(tokens));
  359. } else if (in_link && *tokens == "]("sv) {
  360. return node;
  361. } else {
  362. node->children.append(make<TextNode>(tokens->data));
  363. }
  364. if (in_link && !tokens.is_end() && *tokens == "]("sv)
  365. return node;
  366. if (tokens.is_end())
  367. break;
  368. }
  369. return node;
  370. }
  371. NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens)
  372. {
  373. auto next_tok = tokens + 1;
  374. if (next_tok.is_end() || *next_tok != "\n"sv)
  375. return make<TextNode>(tokens->data);
  376. if (tokens->data.length() >= 2)
  377. return make<BreakNode>();
  378. return make<MultiNode>();
  379. }
  380. NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens)
  381. {
  382. auto node = make<TextNode>(tokens->data);
  383. auto next_tok = tokens + 1;
  384. if (!next_tok.is_end() && next_tok->is_space())
  385. // Skip whitespace after newline.
  386. ++tokens;
  387. return node;
  388. }
  389. bool Text::can_open(Token const& opening)
  390. {
  391. return (opening.run_char() == '~' && opening.left_flanking) || (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
  392. }
  393. bool Text::can_close_for(Token const& opening, Text::Token const& closing)
  394. {
  395. if (opening.run_char() != closing.run_char())
  396. return false;
  397. if (opening.run_length() != closing.run_length())
  398. return false;
  399. return (opening.run_char() == '~' && closing.right_flanking) || (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
  400. }
  401. NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
  402. {
  403. auto opening = *tokens;
  404. // Check that the opening delimiter run is properly flanking.
  405. if (!can_open(opening))
  406. return make<TextNode>(opening.data);
  407. auto child = make<MultiNode>();
  408. for (++tokens; !tokens.is_end(); ++tokens) {
  409. if (tokens->is_space()) {
  410. child->children.append(parse_break(tokens));
  411. } else if (*tokens == "\n"sv) {
  412. child->children.append(parse_newline(tokens));
  413. } else if (tokens->is_run) {
  414. if (can_close_for(opening, *tokens)) {
  415. return make<EmphasisNode>(opening.run_length() >= 2, move(child));
  416. }
  417. switch (tokens->run_char()) {
  418. case '*':
  419. case '_':
  420. child->children.append(parse_emph(tokens, in_link));
  421. break;
  422. case '`':
  423. child->children.append(parse_code(tokens));
  424. break;
  425. case '~':
  426. child->children.append(parse_strike_through(tokens));
  427. break;
  428. }
  429. } else if (*tokens == "["sv || *tokens == "!["sv) {
  430. child->children.append(parse_link(tokens));
  431. } else if (in_link && *tokens == "]("sv) {
  432. child->children.prepend(make<TextNode>(opening.data));
  433. return child;
  434. } else {
  435. child->children.append(make<TextNode>(tokens->data));
  436. }
  437. if (in_link && !tokens.is_end() && *tokens == "]("sv) {
  438. child->children.prepend(make<TextNode>(opening.data));
  439. return child;
  440. }
  441. if (tokens.is_end())
  442. break;
  443. }
  444. child->children.prepend(make<TextNode>(opening.data));
  445. return child;
  446. }
  447. NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
  448. {
  449. auto opening = *tokens;
  450. auto is_closing = [&](Token const& token) {
  451. return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
  452. };
  453. bool is_all_whitespace = true;
  454. auto code = make<MultiNode>();
  455. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  456. if (is_closing(*iterator)) {
  457. tokens = iterator;
  458. // Strip first and last space, when appropriate.
  459. if (!is_all_whitespace) {
  460. auto& first = dynamic_cast<TextNode&>(code->children.first());
  461. auto& last = dynamic_cast<TextNode&>(code->children.last());
  462. if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
  463. first.text = first.text.substring(1);
  464. last.text = last.text.substring(0, last.text.length() - 1);
  465. }
  466. }
  467. return make<CodeNode>(move(code));
  468. }
  469. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  470. code->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
  471. }
  472. return make<TextNode>(opening.data);
  473. }
  474. NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
  475. {
  476. auto opening = *tokens++;
  477. bool is_image = opening == "!["sv;
  478. auto link_text = parse_sequence(tokens, true);
  479. if (tokens.is_end() || *tokens != "]("sv) {
  480. link_text->children.prepend(make<TextNode>(opening.data));
  481. return link_text;
  482. }
  483. auto separator = *tokens;
  484. VERIFY(separator == "]("sv);
  485. StringBuilder address;
  486. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  487. if (*iterator == ")"sv) {
  488. tokens = iterator;
  489. return make<LinkNode>(is_image, move(link_text), address.build());
  490. }
  491. address.append(iterator->data);
  492. }
  493. link_text->children.prepend(make<TextNode>(opening.data));
  494. link_text->children.append(make<TextNode>(separator.data));
  495. return link_text;
  496. }
  497. NonnullOwnPtr<Text::Node> Text::parse_strike_through(Vector<Token>::ConstIterator& tokens)
  498. {
  499. auto opening = *tokens;
  500. auto is_closing = [&](Token const& token) {
  501. return token.is_run && token.run_char() == '~' && token.run_length() == opening.run_length();
  502. };
  503. bool is_all_whitespace = true;
  504. auto striked_text = make<MultiNode>();
  505. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  506. if (is_closing(*iterator)) {
  507. tokens = iterator;
  508. if (!is_all_whitespace) {
  509. auto& first = dynamic_cast<TextNode&>(striked_text->children.first());
  510. auto& last = dynamic_cast<TextNode&>(striked_text->children.last());
  511. if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
  512. first.text = first.text.substring(1);
  513. last.text = last.text.substring(0, last.text.length() - 1);
  514. }
  515. }
  516. return make<StrikeThroughNode>(move(striked_text));
  517. }
  518. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  519. striked_text->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
  520. }
  521. return make<TextNode>(opening.data);
  522. }
  523. }