markdown-check.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /*
  2. * Copyright (c) 2021, Ben Wiederhake <BenWiederhake.GitHub@gmx.de>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. /*
  7. * You may want to invoke the checker like this:
  8. * $ cd Build/lagom
  9. * $ ninja
  10. * $ find ../../AK ../../Base ../../Documentation/ ../../Kernel/ ../../Meta/ ../../Ports/ ../../Tests/ ../../Userland/ -type f -name '*.md' | xargs ./markdown-check ../../README.md
  11. */
  12. #include <AK/Format.h>
  13. #include <AK/HashMap.h>
  14. #include <AK/HashTable.h>
  15. #include <AK/LexicalPath.h>
  16. #include <AK/OwnPtr.h>
  17. #include <AK/StdLibExtras.h>
  18. #include <AK/Vector.h>
  19. #include <LibCore/File.h>
  20. #include <LibMarkdown/Document.h>
  21. #include <LibMarkdown/Visitor.h>
  22. struct FileLink {
  23. String file_path; // May be empty, but not null
  24. String anchor; // May be null ("foo.md", "bar.png"), may be empty ("baz.md#")
  25. String label; // May be empty, but not null
  26. };
  27. class MarkdownLinkage final : Markdown::Visitor {
  28. public:
  29. ~MarkdownLinkage() = default;
  30. static MarkdownLinkage analyze(Markdown::Document const&);
  31. bool has_anchor(String const& anchor) const { return m_anchors.contains(anchor); }
  32. HashTable<String> const& anchors() const { return m_anchors; }
  33. Vector<FileLink> const& file_links() const { return m_file_links; }
  34. private:
  35. MarkdownLinkage() = default;
  36. virtual RecursionDecision visit(Markdown::Heading const&) override;
  37. virtual RecursionDecision visit(Markdown::Text::LinkNode const&) override;
  38. HashTable<String> m_anchors;
  39. Vector<FileLink> m_file_links;
  40. };
  41. MarkdownLinkage MarkdownLinkage::analyze(Markdown::Document const& document)
  42. {
  43. MarkdownLinkage linkage;
  44. document.walk(linkage);
  45. return linkage;
  46. }
  47. class StringCollector final : Markdown::Visitor {
  48. public:
  49. StringCollector() = default;
  50. virtual ~StringCollector() = default;
  51. String build() { return m_builder.build(); }
  52. static String from(Markdown::Heading const& heading)
  53. {
  54. StringCollector collector;
  55. heading.walk(collector);
  56. return collector.build();
  57. }
  58. static String from(Markdown::Text::Node const& node)
  59. {
  60. StringCollector collector;
  61. node.walk(collector);
  62. return collector.build();
  63. }
  64. private:
  65. virtual RecursionDecision visit(String const& text) override
  66. {
  67. m_builder.append(text);
  68. return RecursionDecision::Recurse;
  69. }
  70. StringBuilder m_builder;
  71. };
  72. static String slugify(String const& text)
  73. {
  74. // TODO: This feels like it belongs into LibWeb.
  75. String slug = text.to_lowercase();
  76. // Reverse-engineered through github, using:
  77. // find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' | xargs grep --color=always -Pin '^##+ .*[^a-z0-9 ?()`_:/!&|.$'"'"',<>"+-]' README.md
  78. slug = slug.replace(" ", "-", true)
  79. .replace("!", "", true)
  80. .replace("?", "", true)
  81. .replace("(", "", true)
  82. .replace(")", "", true)
  83. .replace(":", "", true)
  84. .replace("/", "-", true)
  85. .replace("&", "", true)
  86. .replace("|", "", true)
  87. .replace(".", "", true)
  88. .replace("$", "", true)
  89. .replace("'", "", true)
  90. .replace(",", "", true)
  91. .replace("\"", "", true)
  92. .replace("+", "", true)
  93. .replace("\\", "", true)
  94. .replace("<", "", true)
  95. .replace(">", "", true);
  96. // What about "="?
  97. return slug;
  98. }
  99. RecursionDecision MarkdownLinkage::visit(Markdown::Heading const& heading)
  100. {
  101. m_anchors.set(slugify(StringCollector::from(heading)));
  102. return RecursionDecision::Recurse;
  103. }
  104. RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_node)
  105. {
  106. String const& href = link_node.href;
  107. if (href.is_null()) {
  108. // Nothing to do here.
  109. return RecursionDecision::Recurse;
  110. }
  111. if (href.starts_with("https://") || href.starts_with("http://")) {
  112. outln("Not checking external link {}", href);
  113. return RecursionDecision::Recurse;
  114. }
  115. if (href.starts_with("file://")) {
  116. // TODO: Resolve relative to $SERENITY_SOURCE_DIR/Base/
  117. // Currently, this affects only one link, so it's not worth the effort.
  118. outln("Not checking local link {}", href);
  119. return RecursionDecision::Recurse;
  120. }
  121. String label = StringCollector::from(*link_node.text);
  122. Optional<size_t> last_hash = href.find_last('#');
  123. if (last_hash.has_value()) {
  124. m_file_links.append({ href.substring(0, last_hash.value()), href.substring(last_hash.value() + 1), label });
  125. } else {
  126. m_file_links.append({ href, String(), label });
  127. }
  128. return RecursionDecision::Recurse;
  129. }
  130. int main(int argc, char** argv)
  131. {
  132. if (argc < 2) {
  133. // Technically it is valid to call this program with zero markdown files: When there are
  134. // no files, there are no dead links. However, any such usage is probably erroneous.
  135. warnln("Usage: {} Foo.md Bar.md ...", argv[0]);
  136. // E.g.: find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' -print0 | xargs -0 ./MarkdownCheck
  137. return 1;
  138. }
  139. outln("Reading and parsing Markdown files ...");
  140. HashMap<String, MarkdownLinkage> files;
  141. for (int i = 1; i < argc; ++i) {
  142. auto path = argv[i];
  143. auto file_or_error = Core::File::open(path, Core::OpenMode::ReadOnly);
  144. if (file_or_error.is_error()) {
  145. warnln("Failed to read {}: {}", path, file_or_error.error());
  146. // Since this should never happen anyway, fail early.
  147. return 1;
  148. }
  149. auto file = file_or_error.release_value();
  150. auto content_buffer = file->read_all();
  151. auto content = StringView(content_buffer);
  152. auto document = Markdown::Document::parse(content);
  153. if (!document) {
  154. warnln("Failed to parse {} due to an unspecified error.", path);
  155. // Since this should never happen anyway, fail early.
  156. return 1;
  157. }
  158. files.set(Core::File::real_path_for(path), MarkdownLinkage::analyze(*document));
  159. }
  160. outln("Checking links ...");
  161. bool any_problems = false;
  162. for (auto const& file_item : files) {
  163. auto file_lexical_path = LexicalPath(file_item.key);
  164. auto file_dir = file_lexical_path.dirname();
  165. for (auto const& file_link : file_item.value.file_links()) {
  166. String pointee_file;
  167. if (file_link.file_path.is_empty()) {
  168. pointee_file = file_item.key;
  169. } else {
  170. pointee_file = LexicalPath::absolute_path(file_dir, file_link.file_path);
  171. }
  172. if (!Core::File::exists(pointee_file)) {
  173. outln("File '{}' points to '{}' (label '{}'), but '{}' does not exist!",
  174. file_item.key, file_link.file_path, file_link.label, pointee_file);
  175. any_problems = true;
  176. continue;
  177. }
  178. if (file_link.anchor.is_empty()) {
  179. // No anchor to test for.
  180. continue;
  181. }
  182. auto pointee_linkage = files.find(pointee_file);
  183. if (pointee_linkage == files.end()) {
  184. outln("File '{}' points to file '{}', which exists, but was not scanned. Add it to the command-line arguments and re-run.",
  185. file_item.key, pointee_file);
  186. any_problems = true;
  187. continue;
  188. }
  189. if (!pointee_linkage->value.has_anchor(file_link.anchor)) {
  190. outln("File '{}' points to '{}#{}' (label '{}'), but file '{}' does not have any heading that results in the anchor '{}'.",
  191. file_item.key, file_link.file_path, file_link.anchor, file_link.label, pointee_file, file_link.anchor);
  192. out(" The following anchors seem to be available:\n ");
  193. bool any_anchors = false;
  194. for (auto const& anchor : pointee_linkage->value.anchors()) {
  195. if (any_anchors)
  196. out(", ");
  197. out("'{}'", anchor);
  198. any_anchors = true;
  199. }
  200. if (!any_anchors)
  201. out("(none)");
  202. outln();
  203. any_problems = true;
  204. }
  205. }
  206. }
  207. if (any_problems) {
  208. outln("Done. Some errors were encountered, please check above log.");
  209. return 1;
  210. } else {
  211. outln("Done. No problems detected.");
  212. return 0;
  213. }
  214. }