From d7a2b5e65b215c36e027fe2354fd249bcd605180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= Date: Sun, 2 Jul 2023 13:20:23 +0200 Subject: [PATCH] markdown-check: Allow generating a manpage link graph Using the cross-page links, we can generate a directed graph showing the topology of which pages refer to other pages. This is not just for fun: the links show how often a page is linked (since links are not deduplicated on purpose), which pairs of pages only have links in one direction (where a link in the other direction may be useful), which groups of closely-interlinked pages exist, and which pages have few or no links to other pages. The EXTRA_MARKDOWN_CHECK_ARGS argument to the check-markdown script can be used to inject the -g flag for generating the graph on all manpages. --- Meta/Lagom/CMakeLists.txt | 3 +- Meta/check-markdown.sh | 3 +- Userland/Utilities/CMakeLists.txt | 2 +- Userland/Utilities/markdown-check.cpp | 91 +++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 3 deletions(-) diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 9894b5d3274..bb130874699 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -400,6 +400,7 @@ if (BUILD_LAGOM) JS Line Locale + Manual Markdown PDF Regex @@ -548,7 +549,7 @@ if (BUILD_LAGOM) target_link_libraries(lzcat LibCompress LibCore LibMain) add_executable(markdown-check ../../Userland/Utilities/markdown-check.cpp) - target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain) + target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain LibManual) if (NOT EMSCRIPTEN) add_executable(ntpquery ../../Userland/Utilities/ntpquery.cpp) diff --git a/Meta/check-markdown.sh b/Meta/check-markdown.sh index 315e9e14385..6554c7d836a 100755 --- a/Meta/check-markdown.sh +++ b/Meta/check-markdown.sh @@ -23,4 +23,5 @@ if [ -z "$SERENITY_SOURCE_DIR" ] ; then export SERENITY_SOURCE_DIR fi -find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" README.md CONTRIBUTING.md +# shellcheck disable=SC2086 # Word splitting is intentional here +find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" -b "${SERENITY_SOURCE_DIR}/Base" $EXTRA_MARKDOWN_CHECK_ARGS README.md CONTRIBUTING.md diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 73f89eacc3f..2608752d76a 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -113,7 +113,7 @@ target_link_libraries(lspci PRIVATE LibPCIDB) target_link_libraries(lsusb PRIVATE LibUSBDB) target_link_libraries(lzcat PRIVATE LibCompress) target_link_libraries(man PRIVATE LibMarkdown LibManual) -target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown) +target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown LibManual) target_link_libraries(matroska PRIVATE LibVideo) target_link_libraries(md PRIVATE LibMarkdown) target_link_libraries(mktemp PRIVATE LibFileSystem) diff --git a/Userland/Utilities/markdown-check.cpp b/Userland/Utilities/markdown-check.cpp index f84ab63231d..897d6d4c25b 100644 --- a/Userland/Utilities/markdown-check.cpp +++ b/Userland/Utilities/markdown-check.cpp @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -239,11 +242,55 @@ RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_no return RecursionDecision::Recurse; } +static ErrorOr generate_link_graph(HashMap, Vector>> const& page_links) +{ + auto const header = "digraph manpage_links {\n"sv; + StringBuilder builder; + TRY(builder.try_append(header)); + + // Not displayed to the user. + HashMap, String> page_identifiers; + + for (auto const& page : page_links.keys()) { + auto path = TRY(page->path()); + StringBuilder identifier_builder; + // Only allow alphanumerics, replace everything else with underscores. + for (auto const& character : path.code_points()) { + if (AK::is_ascii_alphanumeric(character)) + TRY(identifier_builder.try_append_code_point(character)); + else + TRY(identifier_builder.try_append('_')); + } + auto const identifier = TRY(identifier_builder.to_string()); + TRY(builder.try_appendff("{} [label=\"{}({})\"];\n", identifier, TRY(page->name()), page->section_number())); + TRY(page_identifiers.try_set(page, identifier)); + } + + for (auto const& from_page_list : page_links) { + auto const& from_page = from_page_list.key; + for (auto const& to_page : from_page_list.value) { + auto const to_page_identifier = page_identifiers.get(to_page); + // Target page doesn't actually exist; it's probably an ignored page. + if (!to_page_identifier.has_value()) + continue; + TRY(builder.try_appendff("{} -> {};\n", page_identifiers.get(from_page).value(), page_identifiers.get(to_page).value())); + } + } + + TRY(builder.try_append("}\n"sv)); + + return builder.to_string(); +} + ErrorOr serenity_main(Main::Arguments arguments) { Core::ArgsParser args_parser; Vector file_paths; + bool output_link_graph { false }; + StringView base_path = "/"sv; args_parser.add_positional_argument(file_paths, "Path to markdown files to read and parse", "paths", Core::ArgsParser::Required::Yes); + args_parser.add_option(base_path, "System base path (default: \"/\")", "base", 'b', "path"); + args_parser.add_option(output_link_graph, "Output a page link graph into \"manpage-links.gv\". The recommended tool to process this graph is `fdp`.", "link-graph", 'g'); args_parser.parse(arguments); outln("Reading and parsing Markdown files ..."); @@ -331,6 +378,50 @@ ErrorOr serenity_main(Main::Arguments arguments) } } + if (output_link_graph) { + // First, collect all pages, and collect links between pages in a second step after all pages must have been collected. + HashMap> pages; + for (auto const& file : files) { + auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(file.key, base_path))); + auto page = Manual::Node::try_create_from_query({ base_relative_path }); + if (page.is_error()) { + dbgln("Not including {} in the link graph since it's not a man page.", file.key); + continue; + } + TRY(pages.try_set(file.key, page.value())); + for (auto const& link : file.value.file_links()) { + auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(link.file_path, base_path))); + auto maybe_target_page = Manual::Node::try_create_from_query({ base_relative_path }); + if (maybe_target_page.is_error()) { + dbgln("Not including {} in the link graph since it's not a man page.", link.file_path); + continue; + } + TRY(pages.try_set(TRY(String::from_deprecated_string(link.file_path)), maybe_target_page.value())); + } + } + + HashMap, Vector>> page_links; + for (auto const& file : files) { + auto page = pages.get(file.key); + if (!page.has_value()) + continue; + + Vector> linked_pages; + for (auto const& link : file.value.file_links()) { + auto linked_page = pages.get(TRY(String::from_deprecated_string(link.file_path))); + if (!linked_page.has_value()) + continue; + + TRY(linked_pages.try_append(*linked_page.value())); + } + TRY(page_links.try_set(*page.value(), move(linked_pages))); + } + + auto const graph_text = TRY(generate_link_graph(page_links)); + auto const graph_file = TRY(Core::File::open("manpage-links.gv"sv, Core::File::OpenMode::Write | Core::File::OpenMode::Truncate)); + TRY(graph_file->write_until_depleted(graph_text.bytes())); + } + if (any_problems) { outln("Done. Some errors were encountered, please check above log."); return 1;