Text.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibMarkdown/Text.h>
  10. #include <LibMarkdown/Visitor.h>
  11. #include <ctype.h>
  12. #include <string.h>
  13. namespace Markdown {
  14. void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
  15. {
  16. builder.append((strong) ? "<strong>"sv : "<em>"sv);
  17. child->render_to_html(builder);
  18. builder.append((strong) ? "</strong>"sv : "</em>"sv);
  19. }
  20. void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
  21. {
  22. if (strong) {
  23. builder.append("\e[1m"sv);
  24. child->render_for_terminal(builder);
  25. builder.append("\e[22m"sv);
  26. } else {
  27. builder.append("\e[3m"sv);
  28. child->render_for_terminal(builder);
  29. builder.append("\e[23m"sv);
  30. }
  31. }
  32. size_t Text::EmphasisNode::terminal_length() const
  33. {
  34. return child->terminal_length();
  35. }
  36. RecursionDecision Text::EmphasisNode::walk(Visitor& visitor) const
  37. {
  38. RecursionDecision rd = visitor.visit(*this);
  39. if (rd != RecursionDecision::Recurse)
  40. return rd;
  41. return child->walk(visitor);
  42. }
  43. void Text::CodeNode::render_to_html(StringBuilder& builder) const
  44. {
  45. builder.append("<code>"sv);
  46. code->render_to_html(builder);
  47. builder.append("</code>"sv);
  48. }
  49. void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
  50. {
  51. builder.append("\e[1m"sv);
  52. code->render_for_terminal(builder);
  53. builder.append("\e[22m"sv);
  54. }
  55. size_t Text::CodeNode::terminal_length() const
  56. {
  57. return code->terminal_length();
  58. }
  59. RecursionDecision Text::CodeNode::walk(Visitor& visitor) const
  60. {
  61. RecursionDecision rd = visitor.visit(*this);
  62. if (rd != RecursionDecision::Recurse)
  63. return rd;
  64. return code->walk(visitor);
  65. }
  66. void Text::BreakNode::render_to_html(StringBuilder& builder) const
  67. {
  68. builder.append("<br />"sv);
  69. }
  70. void Text::BreakNode::render_for_terminal(StringBuilder&) const
  71. {
  72. }
  73. size_t Text::BreakNode::terminal_length() const
  74. {
  75. return 0;
  76. }
  77. RecursionDecision Text::BreakNode::walk(Visitor& visitor) const
  78. {
  79. RecursionDecision rd = visitor.visit(*this);
  80. if (rd != RecursionDecision::Recurse)
  81. return rd;
  82. // Normalize return value
  83. return RecursionDecision::Continue;
  84. }
  85. void Text::TextNode::render_to_html(StringBuilder& builder) const
  86. {
  87. builder.append(escape_html_entities(text));
  88. }
  89. void Text::TextNode::render_for_terminal(StringBuilder& builder) const
  90. {
  91. if (collapsible && (text == "\n" || text.is_whitespace())) {
  92. builder.append(' ');
  93. } else {
  94. builder.append(text);
  95. }
  96. }
  97. size_t Text::TextNode::terminal_length() const
  98. {
  99. if (collapsible && text.is_whitespace()) {
  100. return 1;
  101. }
  102. return text.length();
  103. }
  104. RecursionDecision Text::TextNode::walk(Visitor& visitor) const
  105. {
  106. RecursionDecision rd = visitor.visit(*this);
  107. if (rd != RecursionDecision::Recurse)
  108. return rd;
  109. rd = visitor.visit(text);
  110. if (rd != RecursionDecision::Recurse)
  111. return rd;
  112. // Normalize return value
  113. return RecursionDecision::Continue;
  114. }
  115. void Text::LinkNode::render_to_html(StringBuilder& builder) const
  116. {
  117. if (is_image) {
  118. builder.append("<img src=\""sv);
  119. builder.append(escape_html_entities(href));
  120. if (has_image_dimensions()) {
  121. builder.append("\" style=\""sv);
  122. if (image_width.has_value())
  123. builder.appendff("width: {}px;", *image_width);
  124. if (image_height.has_value())
  125. builder.appendff("height: {}px;", *image_height);
  126. }
  127. builder.append("\" alt=\""sv);
  128. text->render_to_html(builder);
  129. builder.append("\" >"sv);
  130. } else {
  131. builder.append("<a href=\""sv);
  132. builder.append(escape_html_entities(href));
  133. builder.append("\">"sv);
  134. text->render_to_html(builder);
  135. builder.append("</a>"sv);
  136. }
  137. }
  138. void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
  139. {
  140. bool is_linked = href.contains("://"sv);
  141. if (is_linked) {
  142. builder.append("\033[0;34m\e]8;;"sv);
  143. builder.append(href);
  144. builder.append("\e\\"sv);
  145. }
  146. text->render_for_terminal(builder);
  147. if (is_linked) {
  148. builder.appendff(" <{}>", href);
  149. builder.append("\033]8;;\033\\\033[0m"sv);
  150. }
  151. }
  152. size_t Text::LinkNode::terminal_length() const
  153. {
  154. return text->terminal_length();
  155. }
  156. RecursionDecision Text::LinkNode::walk(Visitor& visitor) const
  157. {
  158. RecursionDecision rd = visitor.visit(*this);
  159. if (rd != RecursionDecision::Recurse)
  160. return rd;
  161. // Don't recurse on href.
  162. return text->walk(visitor);
  163. }
  164. void Text::MultiNode::render_to_html(StringBuilder& builder) const
  165. {
  166. for (auto& child : children) {
  167. child->render_to_html(builder);
  168. }
  169. }
  170. void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
  171. {
  172. for (auto& child : children) {
  173. child->render_for_terminal(builder);
  174. }
  175. }
  176. size_t Text::MultiNode::terminal_length() const
  177. {
  178. size_t length = 0;
  179. for (auto& child : children) {
  180. length += child->terminal_length();
  181. }
  182. return length;
  183. }
  184. RecursionDecision Text::MultiNode::walk(Visitor& visitor) const
  185. {
  186. RecursionDecision rd = visitor.visit(*this);
  187. if (rd != RecursionDecision::Recurse)
  188. return rd;
  189. for (auto const& child : children) {
  190. rd = child->walk(visitor);
  191. if (rd == RecursionDecision::Break)
  192. return rd;
  193. }
  194. return RecursionDecision::Continue;
  195. }
  196. void Text::StrikeThroughNode::render_to_html(StringBuilder& builder) const
  197. {
  198. builder.append("<del>"sv);
  199. striked_text->render_to_html(builder);
  200. builder.append("</del>"sv);
  201. }
  202. void Text::StrikeThroughNode::render_for_terminal(StringBuilder& builder) const
  203. {
  204. builder.append("\e[9m"sv);
  205. striked_text->render_for_terminal(builder);
  206. builder.append("\e[29m"sv);
  207. }
  208. size_t Text::StrikeThroughNode::terminal_length() const
  209. {
  210. return striked_text->terminal_length();
  211. }
  212. RecursionDecision Text::StrikeThroughNode::walk(Visitor& visitor) const
  213. {
  214. RecursionDecision rd = visitor.visit(*this);
  215. if (rd != RecursionDecision::Recurse)
  216. return rd;
  217. return striked_text->walk(visitor);
  218. }
  219. size_t Text::terminal_length() const
  220. {
  221. return m_node->terminal_length();
  222. }
  223. DeprecatedString Text::render_to_html() const
  224. {
  225. StringBuilder builder;
  226. m_node->render_to_html(builder);
  227. return builder.to_deprecated_string().trim(" \n\t"sv);
  228. }
  229. DeprecatedString Text::render_for_terminal() const
  230. {
  231. StringBuilder builder;
  232. m_node->render_for_terminal(builder);
  233. return builder.to_deprecated_string().trim(" \n\t"sv);
  234. }
  235. RecursionDecision Text::walk(Visitor& visitor) const
  236. {
  237. RecursionDecision rd = visitor.visit(*this);
  238. if (rd != RecursionDecision::Recurse)
  239. return rd;
  240. return m_node->walk(visitor);
  241. }
  242. Text Text::parse(StringView str)
  243. {
  244. Text text;
  245. auto const tokens = tokenize(str);
  246. auto iterator = tokens.begin();
  247. text.m_node = parse_sequence(iterator, false);
  248. return text;
  249. }
  250. static bool flanking(StringView str, size_t start, size_t end, int dir)
  251. {
  252. ssize_t next = ((dir > 0) ? end : start) + dir;
  253. if (next < 0 || next >= (ssize_t)str.length())
  254. return false;
  255. if (isspace(str[next]))
  256. return false;
  257. if (!ispunct(str[next]))
  258. return true;
  259. ssize_t prev = ((dir > 0) ? start : end) - dir;
  260. if (prev < 0 || prev >= (ssize_t)str.length())
  261. return true;
  262. return isspace(str[prev]) || ispunct(str[prev]);
  263. }
  264. Vector<Text::Token> Text::tokenize(StringView str)
  265. {
  266. Vector<Token> tokens;
  267. StringBuilder current_token;
  268. auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
  269. if (current_token.is_empty())
  270. return;
  271. tokens.append({
  272. current_token.to_deprecated_string(),
  273. left_flanking,
  274. right_flanking,
  275. punct_before,
  276. punct_after,
  277. is_run,
  278. });
  279. current_token.clear();
  280. };
  281. auto flush_token = [&]() {
  282. flush_run(false, false, false, false, false);
  283. };
  284. bool in_space = false;
  285. for (size_t offset = 0; offset < str.length(); ++offset) {
  286. auto has = [&](StringView seq) {
  287. if (offset + seq.length() > str.length())
  288. return false;
  289. return str.substring_view(offset, seq.length()) == seq;
  290. };
  291. auto expect = [&](StringView seq) {
  292. VERIFY(has(seq));
  293. flush_token();
  294. current_token.append(seq);
  295. flush_token();
  296. offset += seq.length() - 1;
  297. };
  298. char ch = str[offset];
  299. if (ch != ' ' && in_space) {
  300. flush_token();
  301. in_space = false;
  302. }
  303. if (ch == '\\' && offset + 1 < str.length() && ispunct(str[offset + 1])) {
  304. current_token.append(str[offset + 1]);
  305. ++offset;
  306. } else if (ch == '*' || ch == '_' || ch == '`' || ch == '~') {
  307. flush_token();
  308. char delim = ch;
  309. size_t run_offset;
  310. for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
  311. current_token.append(str[run_offset]);
  312. }
  313. flush_run(flanking(str, offset, run_offset - 1, +1),
  314. flanking(str, offset, run_offset - 1, -1),
  315. offset > 0 && ispunct(str[offset - 1]),
  316. run_offset < str.length() && ispunct(str[run_offset]),
  317. true);
  318. offset = run_offset - 1;
  319. } else if (ch == ' ') {
  320. if (!in_space) {
  321. flush_token();
  322. in_space = true;
  323. }
  324. current_token.append(ch);
  325. } else if (has("\n"sv)) {
  326. expect("\n"sv);
  327. } else if (has("["sv)) {
  328. expect("["sv);
  329. } else if (has("!["sv)) {
  330. expect("!["sv);
  331. } else if (has("]("sv)) {
  332. expect("]("sv);
  333. } else if (has(")"sv)) {
  334. expect(")"sv);
  335. } else {
  336. current_token.append(ch);
  337. }
  338. }
  339. flush_token();
  340. return tokens;
  341. }
  342. NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
  343. {
  344. auto node = make<MultiNode>();
  345. for (; !tokens.is_end(); ++tokens) {
  346. if (tokens->is_space()) {
  347. node->children.append(parse_break(tokens));
  348. } else if (*tokens == "\n"sv) {
  349. node->children.append(parse_newline(tokens));
  350. } else if (tokens->is_run) {
  351. switch (tokens->run_char()) {
  352. case '*':
  353. case '_':
  354. node->children.append(parse_emph(tokens, in_link));
  355. break;
  356. case '`':
  357. node->children.append(parse_code(tokens));
  358. break;
  359. case '~':
  360. node->children.append(parse_strike_through(tokens));
  361. break;
  362. }
  363. } else if (*tokens == "["sv || *tokens == "!["sv) {
  364. node->children.append(parse_link(tokens));
  365. } else if (in_link && *tokens == "]("sv) {
  366. return node;
  367. } else {
  368. node->children.append(make<TextNode>(tokens->data));
  369. }
  370. if (in_link && !tokens.is_end() && *tokens == "]("sv)
  371. return node;
  372. if (tokens.is_end())
  373. break;
  374. }
  375. return node;
  376. }
  377. NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens)
  378. {
  379. auto next_tok = tokens + 1;
  380. if (next_tok.is_end() || *next_tok != "\n"sv)
  381. return make<TextNode>(tokens->data);
  382. if (tokens->data.length() >= 2)
  383. return make<BreakNode>();
  384. return make<MultiNode>();
  385. }
  386. NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens)
  387. {
  388. auto node = make<TextNode>(tokens->data);
  389. auto next_tok = tokens + 1;
  390. if (!next_tok.is_end() && next_tok->is_space())
  391. // Skip whitespace after newline.
  392. ++tokens;
  393. return node;
  394. }
  395. bool Text::can_open(Token const& opening)
  396. {
  397. return (opening.run_char() == '~' && opening.left_flanking) || (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
  398. }
  399. bool Text::can_close_for(Token const& opening, Text::Token const& closing)
  400. {
  401. if (opening.run_char() != closing.run_char())
  402. return false;
  403. if (opening.run_length() != closing.run_length())
  404. return false;
  405. return (opening.run_char() == '~' && closing.right_flanking) || (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
  406. }
  407. NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
  408. {
  409. auto opening = *tokens;
  410. // Check that the opening delimiter run is properly flanking.
  411. if (!can_open(opening))
  412. return make<TextNode>(opening.data);
  413. auto child = make<MultiNode>();
  414. for (++tokens; !tokens.is_end(); ++tokens) {
  415. if (tokens->is_space()) {
  416. child->children.append(parse_break(tokens));
  417. } else if (*tokens == "\n"sv) {
  418. child->children.append(parse_newline(tokens));
  419. } else if (tokens->is_run) {
  420. if (can_close_for(opening, *tokens)) {
  421. return make<EmphasisNode>(opening.run_length() >= 2, move(child));
  422. }
  423. switch (tokens->run_char()) {
  424. case '*':
  425. case '_':
  426. child->children.append(parse_emph(tokens, in_link));
  427. break;
  428. case '`':
  429. child->children.append(parse_code(tokens));
  430. break;
  431. case '~':
  432. child->children.append(parse_strike_through(tokens));
  433. break;
  434. }
  435. } else if (*tokens == "["sv || *tokens == "!["sv) {
  436. child->children.append(parse_link(tokens));
  437. } else if (in_link && *tokens == "]("sv) {
  438. child->children.prepend(make<TextNode>(opening.data));
  439. return child;
  440. } else {
  441. child->children.append(make<TextNode>(tokens->data));
  442. }
  443. if (in_link && !tokens.is_end() && *tokens == "]("sv) {
  444. child->children.prepend(make<TextNode>(opening.data));
  445. return child;
  446. }
  447. if (tokens.is_end())
  448. break;
  449. }
  450. child->children.prepend(make<TextNode>(opening.data));
  451. return child;
  452. }
  453. NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
  454. {
  455. auto opening = *tokens;
  456. auto is_closing = [&](Token const& token) {
  457. return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
  458. };
  459. bool is_all_whitespace = true;
  460. auto code = make<MultiNode>();
  461. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  462. if (is_closing(*iterator)) {
  463. tokens = iterator;
  464. // Strip first and last space, when appropriate.
  465. if (!is_all_whitespace) {
  466. auto& first = dynamic_cast<TextNode&>(*code->children.first());
  467. auto& last = dynamic_cast<TextNode&>(*code->children.last());
  468. if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
  469. first.text = first.text.substring(1);
  470. last.text = last.text.substring(0, last.text.length() - 1);
  471. }
  472. }
  473. return make<CodeNode>(move(code));
  474. }
  475. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  476. code->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
  477. }
  478. return make<TextNode>(opening.data);
  479. }
  480. NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
  481. {
  482. auto opening = *tokens++;
  483. bool is_image = opening == "!["sv;
  484. auto link_text = parse_sequence(tokens, true);
  485. if (tokens.is_end() || *tokens != "]("sv) {
  486. link_text->children.prepend(make<TextNode>(opening.data));
  487. return link_text;
  488. }
  489. auto separator = *tokens;
  490. VERIFY(separator == "]("sv);
  491. Optional<int> image_width;
  492. Optional<int> image_height;
  493. auto parse_image_dimensions = [&](StringView dimensions) -> bool {
  494. if (!dimensions.starts_with('='))
  495. return false;
  496. ArmedScopeGuard clear_image_dimensions = [&] {
  497. image_width = {};
  498. image_height = {};
  499. };
  500. auto dimension_seperator = dimensions.find('x', 1);
  501. if (!dimension_seperator.has_value())
  502. return false;
  503. auto width_string = dimensions.substring_view(1, *dimension_seperator - 1);
  504. if (!width_string.is_empty()) {
  505. auto width = width_string.to_int();
  506. if (!width.has_value())
  507. return false;
  508. image_width = width;
  509. }
  510. auto height_start = *dimension_seperator + 1;
  511. if (height_start < dimensions.length()) {
  512. auto height_string = dimensions.substring_view(height_start);
  513. auto height = height_string.to_int();
  514. if (!height.has_value())
  515. return false;
  516. image_height = height;
  517. }
  518. clear_image_dimensions.disarm();
  519. return true;
  520. };
  521. StringBuilder address;
  522. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  523. // FIXME: What to do if there's multiple dimension tokens?
  524. if (is_image && !address.is_empty() && parse_image_dimensions(iterator->data))
  525. continue;
  526. if (*iterator == ")"sv) {
  527. tokens = iterator;
  528. return make<LinkNode>(is_image, move(link_text), address.to_deprecated_string().trim_whitespace(), image_width, image_height);
  529. }
  530. address.append(iterator->data);
  531. }
  532. link_text->children.prepend(make<TextNode>(opening.data));
  533. link_text->children.append(make<TextNode>(separator.data));
  534. return link_text;
  535. }
  536. NonnullOwnPtr<Text::Node> Text::parse_strike_through(Vector<Token>::ConstIterator& tokens)
  537. {
  538. auto opening = *tokens;
  539. auto is_closing = [&](Token const& token) {
  540. return token.is_run && token.run_char() == '~' && token.run_length() == opening.run_length();
  541. };
  542. bool is_all_whitespace = true;
  543. auto striked_text = make<MultiNode>();
  544. for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
  545. if (is_closing(*iterator)) {
  546. tokens = iterator;
  547. if (!is_all_whitespace) {
  548. auto& first = dynamic_cast<TextNode&>(*striked_text->children.first());
  549. auto& last = dynamic_cast<TextNode&>(*striked_text->children.last());
  550. if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
  551. first.text = first.text.substring(1);
  552. last.text = last.text.substring(0, last.text.length() - 1);
  553. }
  554. }
  555. return make<StrikeThroughNode>(move(striked_text));
  556. }
  557. is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
  558. striked_text->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
  559. }
  560. return make<TextNode>(opening.data);
  561. }
  562. }