mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
markdown-check: Allow generating a manpage link graph
Using the cross-page links, we can generate a directed graph showing the topology of which pages refer to other pages. This is not just for fun: the links show how often a page is linked (since links are not deduplicated on purpose), which pairs of pages only have links in one direction (where a link in the other direction may be useful), which groups of closely-interlinked pages exist, and which pages have few or no links to other pages. The EXTRA_MARKDOWN_CHECK_ARGS argument to the check-markdown script can be used to inject the -g flag for generating the graph on all manpages.
This commit is contained in:
parent
f53aa959df
commit
d7a2b5e65b
Notes:
sideshowbarker
2024-07-17 05:19:06 +09:00
Author: https://github.com/kleinesfilmroellchen Commit: https://github.com/SerenityOS/serenity/commit/d7a2b5e65b Pull-request: https://github.com/SerenityOS/serenity/pull/19753 Reviewed-by: https://github.com/linusg
4 changed files with 96 additions and 3 deletions
|
@ -400,6 +400,7 @@ if (BUILD_LAGOM)
|
|||
JS
|
||||
Line
|
||||
Locale
|
||||
Manual
|
||||
Markdown
|
||||
PDF
|
||||
Regex
|
||||
|
@ -548,7 +549,7 @@ if (BUILD_LAGOM)
|
|||
target_link_libraries(lzcat LibCompress LibCore LibMain)
|
||||
|
||||
add_executable(markdown-check ../../Userland/Utilities/markdown-check.cpp)
|
||||
target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain)
|
||||
target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain LibManual)
|
||||
|
||||
if (NOT EMSCRIPTEN)
|
||||
add_executable(ntpquery ../../Userland/Utilities/ntpquery.cpp)
|
||||
|
|
|
@ -23,4 +23,5 @@ if [ -z "$SERENITY_SOURCE_DIR" ] ; then
|
|||
export SERENITY_SOURCE_DIR
|
||||
fi
|
||||
|
||||
find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" README.md CONTRIBUTING.md
|
||||
# shellcheck disable=SC2086 # Word splitting is intentional here
|
||||
find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" -b "${SERENITY_SOURCE_DIR}/Base" $EXTRA_MARKDOWN_CHECK_ARGS README.md CONTRIBUTING.md
|
||||
|
|
|
@ -113,7 +113,7 @@ target_link_libraries(lspci PRIVATE LibPCIDB)
|
|||
target_link_libraries(lsusb PRIVATE LibUSBDB)
|
||||
target_link_libraries(lzcat PRIVATE LibCompress)
|
||||
target_link_libraries(man PRIVATE LibMarkdown LibManual)
|
||||
target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown)
|
||||
target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown LibManual)
|
||||
target_link_libraries(matroska PRIVATE LibVideo)
|
||||
target_link_libraries(md PRIVATE LibMarkdown)
|
||||
target_link_libraries(mktemp PRIVATE LibFileSystem)
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
#include <LibCore/File.h>
|
||||
#include <LibFileSystem/FileSystem.h>
|
||||
#include <LibMain/Main.h>
|
||||
#include <LibManual/PageNode.h>
|
||||
#include <LibManual/Path.h>
|
||||
#include <LibManual/SectionNode.h>
|
||||
#include <LibMarkdown/Document.h>
|
||||
#include <LibMarkdown/Visitor.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -239,11 +242,55 @@ RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_no
|
|||
return RecursionDecision::Recurse;
|
||||
}
|
||||
|
||||
static ErrorOr<String> generate_link_graph(HashMap<NonnullRefPtr<Manual::PageNode const>, Vector<NonnullRefPtr<Manual::PageNode const>>> const& page_links)
|
||||
{
|
||||
auto const header = "digraph manpage_links {\n"sv;
|
||||
StringBuilder builder;
|
||||
TRY(builder.try_append(header));
|
||||
|
||||
// Not displayed to the user.
|
||||
HashMap<NonnullRefPtr<Manual::PageNode const>, String> page_identifiers;
|
||||
|
||||
for (auto const& page : page_links.keys()) {
|
||||
auto path = TRY(page->path());
|
||||
StringBuilder identifier_builder;
|
||||
// Only allow alphanumerics, replace everything else with underscores.
|
||||
for (auto const& character : path.code_points()) {
|
||||
if (AK::is_ascii_alphanumeric(character))
|
||||
TRY(identifier_builder.try_append_code_point(character));
|
||||
else
|
||||
TRY(identifier_builder.try_append('_'));
|
||||
}
|
||||
auto const identifier = TRY(identifier_builder.to_string());
|
||||
TRY(builder.try_appendff("{} [label=\"{}({})\"];\n", identifier, TRY(page->name()), page->section_number()));
|
||||
TRY(page_identifiers.try_set(page, identifier));
|
||||
}
|
||||
|
||||
for (auto const& from_page_list : page_links) {
|
||||
auto const& from_page = from_page_list.key;
|
||||
for (auto const& to_page : from_page_list.value) {
|
||||
auto const to_page_identifier = page_identifiers.get(to_page);
|
||||
// Target page doesn't actually exist; it's probably an ignored page.
|
||||
if (!to_page_identifier.has_value())
|
||||
continue;
|
||||
TRY(builder.try_appendff("{} -> {};\n", page_identifiers.get(from_page).value(), page_identifiers.get(to_page).value()));
|
||||
}
|
||||
}
|
||||
|
||||
TRY(builder.try_append("}\n"sv));
|
||||
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||
{
|
||||
Core::ArgsParser args_parser;
|
||||
Vector<StringView> file_paths;
|
||||
bool output_link_graph { false };
|
||||
StringView base_path = "/"sv;
|
||||
args_parser.add_positional_argument(file_paths, "Path to markdown files to read and parse", "paths", Core::ArgsParser::Required::Yes);
|
||||
args_parser.add_option(base_path, "System base path (default: \"/\")", "base", 'b', "path");
|
||||
args_parser.add_option(output_link_graph, "Output a page link graph into \"manpage-links.gv\". The recommended tool to process this graph is `fdp`.", "link-graph", 'g');
|
||||
args_parser.parse(arguments);
|
||||
|
||||
outln("Reading and parsing Markdown files ...");
|
||||
|
@ -331,6 +378,50 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
}
|
||||
}
|
||||
|
||||
if (output_link_graph) {
|
||||
// First, collect all pages, and collect links between pages in a second step after all pages must have been collected.
|
||||
HashMap<String, NonnullRefPtr<Manual::PageNode const>> pages;
|
||||
for (auto const& file : files) {
|
||||
auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(file.key, base_path)));
|
||||
auto page = Manual::Node::try_create_from_query({ base_relative_path });
|
||||
if (page.is_error()) {
|
||||
dbgln("Not including {} in the link graph since it's not a man page.", file.key);
|
||||
continue;
|
||||
}
|
||||
TRY(pages.try_set(file.key, page.value()));
|
||||
for (auto const& link : file.value.file_links()) {
|
||||
auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(link.file_path, base_path)));
|
||||
auto maybe_target_page = Manual::Node::try_create_from_query({ base_relative_path });
|
||||
if (maybe_target_page.is_error()) {
|
||||
dbgln("Not including {} in the link graph since it's not a man page.", link.file_path);
|
||||
continue;
|
||||
}
|
||||
TRY(pages.try_set(TRY(String::from_deprecated_string(link.file_path)), maybe_target_page.value()));
|
||||
}
|
||||
}
|
||||
|
||||
HashMap<NonnullRefPtr<Manual::PageNode const>, Vector<NonnullRefPtr<Manual::PageNode const>>> page_links;
|
||||
for (auto const& file : files) {
|
||||
auto page = pages.get(file.key);
|
||||
if (!page.has_value())
|
||||
continue;
|
||||
|
||||
Vector<NonnullRefPtr<Manual::PageNode const>> linked_pages;
|
||||
for (auto const& link : file.value.file_links()) {
|
||||
auto linked_page = pages.get(TRY(String::from_deprecated_string(link.file_path)));
|
||||
if (!linked_page.has_value())
|
||||
continue;
|
||||
|
||||
TRY(linked_pages.try_append(*linked_page.value()));
|
||||
}
|
||||
TRY(page_links.try_set(*page.value(), move(linked_pages)));
|
||||
}
|
||||
|
||||
auto const graph_text = TRY(generate_link_graph(page_links));
|
||||
auto const graph_file = TRY(Core::File::open("manpage-links.gv"sv, Core::File::OpenMode::Write | Core::File::OpenMode::Truncate));
|
||||
TRY(graph_file->write_until_depleted(graph_text.bytes()));
|
||||
}
|
||||
|
||||
if (any_problems) {
|
||||
outln("Done. Some errors were encountered, please check above log.");
|
||||
return 1;
|
||||
|
|
Loading…
Reference in a new issue