123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724 |
- /*
- * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
- * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/ScopeGuard.h>
- #include <AK/StringBuilder.h>
- #include <LibMarkdown/Text.h>
- #include <LibMarkdown/Visitor.h>
- #include <ctype.h>
- #include <string.h>
- namespace Markdown {
- void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
- {
- builder.append((strong) ? "<strong>"sv : "<em>"sv);
- child->render_to_html(builder);
- builder.append((strong) ? "</strong>"sv : "</em>"sv);
- }
- void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
- {
- if (strong) {
- builder.append("\e[1m"sv);
- child->render_for_terminal(builder);
- builder.append("\e[22m"sv);
- } else {
- builder.append("\e[3m"sv);
- child->render_for_terminal(builder);
- builder.append("\e[23m"sv);
- }
- }
- void Text::EmphasisNode::render_for_raw_print(StringBuilder& builder) const
- {
- child->render_for_raw_print(builder);
- }
- size_t Text::EmphasisNode::terminal_length() const
- {
- return child->terminal_length();
- }
- RecursionDecision Text::EmphasisNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- return child->walk(visitor);
- }
- void Text::CodeNode::render_to_html(StringBuilder& builder) const
- {
- builder.append("<code>"sv);
- code->render_to_html(builder);
- builder.append("</code>"sv);
- }
- void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
- {
- builder.append("\e[1m"sv);
- code->render_for_terminal(builder);
- builder.append("\e[22m"sv);
- }
- void Text::CodeNode::render_for_raw_print(StringBuilder& builder) const
- {
- code->render_for_raw_print(builder);
- }
- size_t Text::CodeNode::terminal_length() const
- {
- return code->terminal_length();
- }
- RecursionDecision Text::CodeNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- return code->walk(visitor);
- }
- void Text::BreakNode::render_to_html(StringBuilder& builder) const
- {
- builder.append("<br />"sv);
- }
- void Text::BreakNode::render_for_terminal(StringBuilder&) const
- {
- }
- void Text::BreakNode::render_for_raw_print(StringBuilder&) const
- {
- }
- size_t Text::BreakNode::terminal_length() const
- {
- return 0;
- }
- RecursionDecision Text::BreakNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- // Normalize return value
- return RecursionDecision::Continue;
- }
- void Text::TextNode::render_to_html(StringBuilder& builder) const
- {
- builder.append(escape_html_entities(text));
- }
- void Text::TextNode::render_for_raw_print(StringBuilder& builder) const
- {
- builder.append(text);
- }
- void Text::TextNode::render_for_terminal(StringBuilder& builder) const
- {
- if (collapsible && (text == "\n" || text.is_whitespace())) {
- builder.append(' ');
- } else {
- builder.append(text);
- }
- }
- size_t Text::TextNode::terminal_length() const
- {
- if (collapsible && text.is_whitespace()) {
- return 1;
- }
- return text.length();
- }
- RecursionDecision Text::TextNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- rd = visitor.visit(text);
- if (rd != RecursionDecision::Recurse)
- return rd;
- // Normalize return value
- return RecursionDecision::Continue;
- }
- void Text::LinkNode::render_to_html(StringBuilder& builder) const
- {
- if (is_image) {
- builder.append("<img src=\""sv);
- builder.append(escape_html_entities(href));
- if (has_image_dimensions()) {
- builder.append("\" style=\""sv);
- if (image_width.has_value())
- builder.appendff("width: {}px;", *image_width);
- if (image_height.has_value())
- builder.appendff("height: {}px;", *image_height);
- }
- builder.append("\" alt=\""sv);
- text->render_to_html(builder);
- builder.append("\" >"sv);
- } else {
- builder.append("<a href=\""sv);
- builder.append(escape_html_entities(href));
- builder.append("\">"sv);
- text->render_to_html(builder);
- builder.append("</a>"sv);
- }
- }
- void Text::LinkNode::render_for_raw_print(StringBuilder& builder) const
- {
- text->render_for_raw_print(builder);
- }
- void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
- {
- bool is_linked = href.contains("://"sv);
- if (is_linked) {
- builder.append("\033[0;34m\e]8;;"sv);
- builder.append(href);
- builder.append("\e\\"sv);
- }
- text->render_for_terminal(builder);
- if (is_linked) {
- builder.appendff(" <{}>", href);
- builder.append("\033]8;;\033\\\033[0m"sv);
- }
- }
- size_t Text::LinkNode::terminal_length() const
- {
- return text->terminal_length();
- }
- RecursionDecision Text::LinkNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- // Don't recurse on href.
- return text->walk(visitor);
- }
- void Text::MultiNode::render_to_html(StringBuilder& builder) const
- {
- for (auto& child : children) {
- child->render_to_html(builder);
- }
- }
- void Text::MultiNode::render_for_raw_print(StringBuilder& builder) const
- {
- for (auto& child : children) {
- child->render_for_raw_print(builder);
- }
- }
- void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
- {
- for (auto& child : children) {
- child->render_for_terminal(builder);
- }
- }
- size_t Text::MultiNode::terminal_length() const
- {
- size_t length = 0;
- for (auto& child : children) {
- length += child->terminal_length();
- }
- return length;
- }
- RecursionDecision Text::MultiNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- for (auto const& child : children) {
- rd = child->walk(visitor);
- if (rd == RecursionDecision::Break)
- return rd;
- }
- return RecursionDecision::Continue;
- }
- void Text::StrikeThroughNode::render_to_html(StringBuilder& builder) const
- {
- builder.append("<del>"sv);
- striked_text->render_to_html(builder);
- builder.append("</del>"sv);
- }
- void Text::StrikeThroughNode::render_for_raw_print(StringBuilder& builder) const
- {
- striked_text->render_for_raw_print(builder);
- }
- void Text::StrikeThroughNode::render_for_terminal(StringBuilder& builder) const
- {
- builder.append("\e[9m"sv);
- striked_text->render_for_terminal(builder);
- builder.append("\e[29m"sv);
- }
- size_t Text::StrikeThroughNode::terminal_length() const
- {
- return striked_text->terminal_length();
- }
- RecursionDecision Text::StrikeThroughNode::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- return striked_text->walk(visitor);
- }
- size_t Text::terminal_length() const
- {
- return m_node->terminal_length();
- }
- ByteString Text::render_to_html() const
- {
- StringBuilder builder;
- m_node->render_to_html(builder);
- return builder.to_byte_string().trim(" \n\t"sv);
- }
- ByteString Text::render_for_raw_print() const
- {
- StringBuilder builder;
- m_node->render_for_raw_print(builder);
- return builder.to_byte_string().trim(" \n\t"sv);
- }
- ByteString Text::render_for_terminal() const
- {
- StringBuilder builder;
- m_node->render_for_terminal(builder);
- return builder.to_byte_string().trim(" \n\t"sv);
- }
- RecursionDecision Text::walk(Visitor& visitor) const
- {
- RecursionDecision rd = visitor.visit(*this);
- if (rd != RecursionDecision::Recurse)
- return rd;
- return m_node->walk(visitor);
- }
- Text Text::parse(StringView str)
- {
- Text text;
- auto const tokens = tokenize(str);
- auto iterator = tokens.begin();
- text.m_node = parse_sequence(iterator, false);
- return text;
- }
- static bool flanking(StringView str, size_t start, size_t end, int dir)
- {
- ssize_t next = ((dir > 0) ? end : start) + dir;
- if (next < 0 || next >= (ssize_t)str.length())
- return false;
- if (isspace(str[next]))
- return false;
- if (!ispunct(str[next]))
- return true;
- ssize_t prev = ((dir > 0) ? start : end) - dir;
- if (prev < 0 || prev >= (ssize_t)str.length())
- return true;
- return isspace(str[prev]) || ispunct(str[prev]);
- }
- Vector<Text::Token> Text::tokenize(StringView str)
- {
- Vector<Token> tokens;
- StringBuilder current_token;
- auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
- if (current_token.is_empty())
- return;
- tokens.append({
- current_token.to_byte_string(),
- left_flanking,
- right_flanking,
- punct_before,
- punct_after,
- is_run,
- });
- current_token.clear();
- };
- auto flush_token = [&]() {
- flush_run(false, false, false, false, false);
- };
- bool in_space = false;
- for (size_t offset = 0; offset < str.length(); ++offset) {
- auto has = [&](StringView seq) {
- if (offset + seq.length() > str.length())
- return false;
- return str.substring_view(offset, seq.length()) == seq;
- };
- auto expect = [&](StringView seq) {
- VERIFY(has(seq));
- flush_token();
- current_token.append(seq);
- flush_token();
- offset += seq.length() - 1;
- };
- char ch = str[offset];
- if (ch != ' ' && in_space) {
- flush_token();
- in_space = false;
- }
- if (ch == '\\' && offset + 1 < str.length() && ispunct(str[offset + 1])) {
- current_token.append(str[offset + 1]);
- ++offset;
- } else if (ch == '*' || ch == '_' || ch == '`' || ch == '~') {
- flush_token();
- char delim = ch;
- size_t run_offset;
- for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
- current_token.append(str[run_offset]);
- }
- flush_run(flanking(str, offset, run_offset - 1, +1),
- flanking(str, offset, run_offset - 1, -1),
- offset > 0 && ispunct(str[offset - 1]),
- run_offset < str.length() && ispunct(str[run_offset]),
- true);
- offset = run_offset - 1;
- } else if (ch == ' ') {
- if (!in_space) {
- flush_token();
- in_space = true;
- }
- current_token.append(ch);
- } else if (has("\n"sv)) {
- expect("\n"sv);
- } else if (has("["sv)) {
- expect("["sv);
- } else if (has(") {
- expect("]("sv);
- } else if (has(")"sv)) {
- expect(")"sv);
- } else {
- current_token.append(ch);
- }
- }
- flush_token();
- return tokens;
- }
- NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
- {
- auto node = make<MultiNode>();
- for (; !tokens.is_end(); ++tokens) {
- if (tokens->is_space()) {
- node->children.append(parse_break(tokens));
- } else if (*tokens == "\n"sv) {
- node->children.append(parse_newline(tokens));
- } else if (tokens->is_run) {
- switch (tokens->run_char()) {
- case '*':
- case '_':
- node->children.append(parse_emph(tokens, in_link));
- break;
- case '`':
- node->children.append(parse_code(tokens));
- break;
- case '~':
- node->children.append(parse_strike_through(tokens));
- break;
- }
- } else if (*tokens == "["sv || *tokens == " {
- return node;
- } else {
- node->children.append(make<TextNode>(tokens->data));
- }
- if (in_link && !tokens.is_end() && *tokens == "]("sv)
- return node;
- if (tokens.is_end())
- break;
- }
- return node;
- }
- NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens)
- {
- auto next_tok = tokens + 1;
- if (next_tok.is_end() || *next_tok != "\n"sv)
- return make<TextNode>(tokens->data);
- if (tokens->data.length() >= 2)
- return make<BreakNode>();
- return make<MultiNode>();
- }
- NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens)
- {
- auto node = make<TextNode>(tokens->data);
- auto next_tok = tokens + 1;
- if (!next_tok.is_end() && next_tok->is_space())
- // Skip whitespace after newline.
- ++tokens;
- return node;
- }
- bool Text::can_open(Token const& opening)
- {
- return (opening.run_char() == '~' && opening.left_flanking) || (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
- }
- bool Text::can_close_for(Token const& opening, Text::Token const& closing)
- {
- if (opening.run_char() != closing.run_char())
- return false;
- if (opening.run_length() != closing.run_length())
- return false;
- return (opening.run_char() == '~' && closing.right_flanking) || (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
- }
- NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
- {
- auto opening = *tokens;
- // Check that the opening delimiter run is properly flanking.
- if (!can_open(opening))
- return make<TextNode>(opening.data);
- auto child = make<MultiNode>();
- for (++tokens; !tokens.is_end(); ++tokens) {
- if (tokens->is_space()) {
- child->children.append(parse_break(tokens));
- } else if (*tokens == "\n"sv) {
- child->children.append(parse_newline(tokens));
- } else if (tokens->is_run) {
- if (can_close_for(opening, *tokens)) {
- return make<EmphasisNode>(opening.run_length() >= 2, move(child));
- }
- switch (tokens->run_char()) {
- case '*':
- case '_':
- child->children.append(parse_emph(tokens, in_link));
- break;
- case '`':
- child->children.append(parse_code(tokens));
- break;
- case '~':
- child->children.append(parse_strike_through(tokens));
- break;
- }
- } else if (*tokens == "["sv || *tokens == " {
- child->children.prepend(make<TextNode>(opening.data));
- return child;
- } else {
- child->children.append(make<TextNode>(tokens->data));
- }
- if (in_link && !tokens.is_end() && *tokens == "]("sv) {
- child->children.prepend(make<TextNode>(opening.data));
- return child;
- }
- if (tokens.is_end())
- break;
- }
- child->children.prepend(make<TextNode>(opening.data));
- return child;
- }
- NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
- {
- auto opening = *tokens;
- auto is_closing = [&](Token const& token) {
- return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
- };
- bool is_all_whitespace = true;
- auto code = make<MultiNode>();
- for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
- if (is_closing(*iterator)) {
- tokens = iterator;
- // Strip first and last space, when appropriate.
- if (!is_all_whitespace) {
- auto& first = dynamic_cast<TextNode&>(*code->children.first());
- auto& last = dynamic_cast<TextNode&>(*code->children.last());
- if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
- first.text = first.text.substring(1);
- last.text = last.text.substring(0, last.text.length() - 1);
- }
- }
- return make<CodeNode>(move(code));
- }
- is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
- code->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
- }
- return make<TextNode>(opening.data);
- }
- NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
- {
- auto opening = *tokens++;
- bool is_image = opening == " {
- link_text->children.prepend(make<TextNode>(opening.data));
- return link_text;
- }
- auto separator = *tokens;
- VERIFY(separator == "]("sv);
- Optional<int> image_width;
- Optional<int> image_height;
- auto parse_image_dimensions = [&](StringView dimensions) -> bool {
- if (!dimensions.starts_with('='))
- return false;
- ArmedScopeGuard clear_image_dimensions = [&] {
- image_width = {};
- image_height = {};
- };
- auto dimension_seperator = dimensions.find('x', 1);
- if (!dimension_seperator.has_value())
- return false;
- auto width_string = dimensions.substring_view(1, *dimension_seperator - 1);
- if (!width_string.is_empty()) {
- auto width = width_string.to_number<int>();
- if (!width.has_value())
- return false;
- image_width = width;
- }
- auto height_start = *dimension_seperator + 1;
- if (height_start < dimensions.length()) {
- auto height_string = dimensions.substring_view(height_start);
- auto height = height_string.to_number<int>();
- if (!height.has_value())
- return false;
- image_height = height;
- }
- clear_image_dimensions.disarm();
- return true;
- };
- StringBuilder address;
- for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
- // FIXME: What to do if there's multiple dimension tokens?
- if (is_image && !address.is_empty() && parse_image_dimensions(iterator->data))
- continue;
- if (*iterator == ")"sv) {
- tokens = iterator;
- ByteString href = address.to_byte_string().trim_whitespace();
- // Add file:// if the link is an absolute path otherwise it will be assumed relative.
- if (AK::StringUtils::starts_with(href, "/"sv, CaseSensitivity::CaseSensitive))
- href = ByteString::formatted("file://{}", href);
- return make<LinkNode>(is_image, move(link_text), move(href), image_width, image_height);
- }
- address.append(iterator->data);
- }
- link_text->children.prepend(make<TextNode>(opening.data));
- link_text->children.append(make<TextNode>(separator.data));
- return link_text;
- }
- NonnullOwnPtr<Text::Node> Text::parse_strike_through(Vector<Token>::ConstIterator& tokens)
- {
- auto opening = *tokens;
- auto is_closing = [&](Token const& token) {
- return token.is_run && token.run_char() == '~' && token.run_length() == opening.run_length();
- };
- bool is_all_whitespace = true;
- auto striked_text = make<MultiNode>();
- for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
- if (is_closing(*iterator)) {
- tokens = iterator;
- if (!is_all_whitespace) {
- auto& first = dynamic_cast<TextNode&>(*striked_text->children.first());
- auto& last = dynamic_cast<TextNode&>(*striked_text->children.last());
- if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
- first.text = first.text.substring(1);
- last.text = last.text.substring(0, last.text.length() - 1);
- }
- }
- return make<StrikeThroughNode>(move(striked_text));
- }
- is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
- striked_text->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
- }
- return make<TextNode>(opening.data);
- }
- }
|