pdf: Add a --debugging-stats flag

Tries to render all pages of a PDF and then produces a report on which
unimplemented things were encountered.

Example, for pdf_reference_1-7.pdf:

    113 times: Rendering of feature not supported: Type0 font not imp...
        on pages: 170 (3x) 217 (2x) 250 (9x) 252 (2x) 329 (6x)...
    21 times: Rendering of feature not supported: unknown color space
        on pages: 489 (4x) 490 (5x) 491 (3x) 492 (5x) 493 (4x)
    4 times: Rendering of feature not supported: CCITTFaxDecode Filte...
        on pages: 494 (4x)

(Actually, rendering crashes for some page in that file at the moment.
This was with a local change to only render the first 800 pages to work
around that. So this is also good for finding crashes.)
This commit is contained in:
Nico Weber 2023-07-23 16:21:44 -04:00 committed by Tim Flynn
parent 271d0015fe
commit 4bb29e6ac4
Notes: sideshowbarker 2024-07-17 00:59:43 +09:00

View file

@ -75,6 +75,65 @@ static PDF::PDFErrorOr<void> save_rendered_page(PDF::Document& document, int pag
return {};
}
// Takes a sorted non-empty vector of ints like `1 1 3 4 5 5 5` and returns a RLE-y summary string like " 1 (2x) 3 4 5 (3x)" (with a leading space).
static ErrorOr<String> summary_string(Vector<int> const& pages)
{
StringBuilder builder;
int last_page = 0;
int page_count = 0;
for (int page : pages) {
if (page == last_page) {
++page_count;
continue;
}
if (last_page != 0) {
builder.appendff(" {}", last_page);
if (page_count > 1)
builder.appendff(" ({}x)", page_count);
}
last_page = page;
page_count = 1;
}
builder.appendff(" {}", last_page);
if (page_count > 1)
builder.appendff(" ({}x)", page_count);
return builder.to_string();
}
static PDF::PDFErrorOr<void> print_debugging_stats(PDF::Document& document)
{
HashMap<DeprecatedString, Vector<int>> diags_to_pages;
for (u32 page_number = 1; page_number <= document.get_page_count(); ++page_number) {
out("page number {} / {}", page_number, document.get_page_count());
fflush(stdout);
auto page = TRY(document.get_page(page_number - 1));
auto page_size = Gfx::IntSize { 200, round_to<int>(200 * page.media_box.height() / page.media_box.width()) };
auto bitmap = TRY(Gfx::Bitmap::create(Gfx::BitmapFormat::BGRx8888, page_size));
auto errors = PDF::Renderer::render(document, page, bitmap, PDF::RenderingPreferences {});
if (errors.is_error()) {
for (auto const& error : errors.error().errors())
diags_to_pages.ensure(error.message()).append(page_number);
}
out("\r");
}
outln();
if (diags_to_pages.is_empty()) {
outln("no issues found");
return {};
}
auto keys = diags_to_pages.keys();
quick_sort(keys, [&](auto& k1, auto& k2) { return diags_to_pages.get(k1)->size() < diags_to_pages.get(k2)->size(); });
for (auto const& key : keys.in_reverse()) {
auto const& value = diags_to_pages.get(key).value();
outln("{} times: {}", value.size(), key);
outln(" on pages:{}", TRY(summary_string(value)));
}
return {};
}
static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
{
Core::ArgsParser args_parser;
@ -85,6 +144,9 @@ static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
StringView in_path;
args_parser.add_positional_argument(in_path, "Path to input image file", "FILE");
bool debugging_stats = false;
args_parser.add_option(debugging_stats, "Print stats for debugging", "debugging-stats", {});
bool dump_contents = false;
args_parser.add_option(dump_contents, "Dump page contents", "dump-contents", {});
@ -113,6 +175,19 @@ static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
TRY(document->initialize());
#if !defined(AK_OS_SERENITY)
if (debugging_stats || !render_path.is_empty()) {
// Get from Build/lagom/bin/pdf to Base/res/fonts.
auto source_root = LexicalPath(arguments.argv[0]).parent().parent().parent().parent().string();
Gfx::FontDatabase::set_default_fonts_lookup_path(DeprecatedString::formatted("{}/Base/res/fonts", source_root));
}
#endif
if (debugging_stats) {
TRY(print_debugging_stats(*document));
return 0;
}
if (page_number < 1 || page_number > document->get_page_count()) {
warnln("--page {} out of bounds, must be between 1 and {}", page_number, document->get_page_count());
return 1;
@ -125,12 +200,6 @@ static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
}
if (!render_path.is_empty()) {
#if !defined(AK_OS_SERENITY)
// Get from Build/lagom/bin/pdf to Base/res/fonts.
auto source_root = LexicalPath(arguments.argv[0]).parent().parent().parent().parent().string();
Gfx::FontDatabase::set_default_fonts_lookup_path(DeprecatedString::formatted("{}/Base/res/fonts", source_root));
#endif
TRY(save_rendered_page(document, page_index, render_path));
return 0;
}