mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 23:50:19 +00:00
test_pdf.py: Output tweaks
* Count files that are password-protected * Use `pdf --json` to groups by missing feature and then by file instead of by file first, feature second * Count files that render with issues * Print number of files without issues last * Always print all crash stacks
This commit is contained in:
parent
ed7549c64f
commit
8f4966fc5c
Notes:
sideshowbarker
2024-07-17 05:02:42 +09:00
Author: https://github.com/nico Commit: https://github.com/SerenityOS/serenity/commit/8f4966fc5c Pull-request: https://github.com/SerenityOS/serenity/pull/21862
1 changed files with 55 additions and 10 deletions
|
@ -14,7 +14,9 @@ One of those zip files in unzipped makes for a good input folder.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
|
import dataclasses
|
||||||
import glob
|
import glob
|
||||||
|
import json
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
@ -26,6 +28,14 @@ Result = collections.namedtuple(
|
||||||
'Result', ['filename', 'returncode', 'stdout', 'stderr'])
|
'Result', ['filename', 'returncode', 'stdout', 'stderr'])
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class Issues:
|
||||||
|
filenames: [str]
|
||||||
|
filename_to_issues: {str: [int]}
|
||||||
|
num_pages: int
|
||||||
|
count: int
|
||||||
|
|
||||||
|
|
||||||
def elide_aslr(s):
|
def elide_aslr(s):
|
||||||
return re.sub(rb'\b0x[0-9a-f]+\b', b'0xc3ns0r3d', s)
|
return re.sub(rb'\b0x[0-9a-f]+\b', b'0xc3ns0r3d', s)
|
||||||
|
|
||||||
|
@ -36,7 +46,7 @@ def elide_parser_offset(s):
|
||||||
|
|
||||||
def test_pdf(filename):
|
def test_pdf(filename):
|
||||||
pdf_path = os.path.join(os.path.dirname(__file__), '../Build/lagom/bin/pdf')
|
pdf_path = os.path.join(os.path.dirname(__file__), '../Build/lagom/bin/pdf')
|
||||||
r = subprocess.run([pdf_path, '--debugging-stats', filename],
|
r = subprocess.run([pdf_path, '--debugging-stats', '--json', filename],
|
||||||
capture_output=True)
|
capture_output=True)
|
||||||
return Result(filename, r.returncode, r.stdout,
|
return Result(filename, r.returncode, r.stdout,
|
||||||
elide_parser_offset(elide_aslr(r.stderr)))
|
elide_parser_offset(elide_aslr(r.stderr)))
|
||||||
|
@ -60,15 +70,29 @@ def main():
|
||||||
results = multiprocessing.Pool().map(test_pdf, files)
|
results = multiprocessing.Pool().map(test_pdf, files)
|
||||||
|
|
||||||
num_files_without_issues = 0
|
num_files_without_issues = 0
|
||||||
|
num_files_with_password = 0
|
||||||
|
num_files_with_issues = 0
|
||||||
failed_files = []
|
failed_files = []
|
||||||
num_crashes = 0
|
num_crashes = 0
|
||||||
stack_to_files = {}
|
stack_to_files = {}
|
||||||
|
issues = {}
|
||||||
for r in results:
|
for r in results:
|
||||||
print(r.filename)
|
|
||||||
print(r.stdout.decode('utf-8'))
|
|
||||||
if r.returncode == 0:
|
if r.returncode == 0:
|
||||||
if b'no issues found' in r.stdout:
|
if b'PDF requires password' in r.stderr:
|
||||||
|
num_files_with_password += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
j = json.loads(r.stdout.decode('utf-8'))
|
||||||
|
if not j['issues']:
|
||||||
num_files_without_issues += 1
|
num_files_without_issues += 1
|
||||||
|
else:
|
||||||
|
num_files_with_issues += 1
|
||||||
|
for diag in j['issues']:
|
||||||
|
issue = issues.setdefault(diag, Issues([], {}, 0, 0))
|
||||||
|
issue.filenames.append(r.filename)
|
||||||
|
issue.filename_to_issues[r.filename] = j['issues'][diag]
|
||||||
|
issue.num_pages += len(j['issues'][diag])
|
||||||
|
issue.count += sum(a * b for (a, b) in j['issues'][diag])
|
||||||
continue
|
continue
|
||||||
if r.returncode == 1:
|
if r.returncode == 1:
|
||||||
failed_files.append(r.filename)
|
failed_files.append(r.filename)
|
||||||
|
@ -76,10 +100,26 @@ def main():
|
||||||
num_crashes += 1
|
num_crashes += 1
|
||||||
stack_to_files.setdefault(r.stderr, []).append(r.filename)
|
stack_to_files.setdefault(r.stderr, []).append(r.filename)
|
||||||
|
|
||||||
print('Top 5 crashiest stacks')
|
percent = 100 * num_files_with_issues / len(results)
|
||||||
|
print(f'{len(issues)} distinct issues, in {num_files_with_issues} files ({percent}%):')
|
||||||
|
issue_keys = list(issues.keys())
|
||||||
|
issue_keys.sort(reverse=True, key=lambda x: len(issues[x].filenames))
|
||||||
|
for issue_key in issue_keys:
|
||||||
|
issue = issues[issue_key]
|
||||||
|
print(issue_key, end='')
|
||||||
|
print(f', in {len(issue.filenames)} files, on {issue.num_pages} pages, {issue.count} times')
|
||||||
|
filenames = sorted(issue.filenames, reverse=True, key=lambda x: len(issue.filename_to_issues[x]))
|
||||||
|
for filename in filenames:
|
||||||
|
page_counts = issue.filename_to_issues[filename]
|
||||||
|
page_counts = ' '.join([f'{page} ({count}x)' if count > 1 else f'{page}' for (page, count) in page_counts])
|
||||||
|
print(f' {filename} {page_counts}')
|
||||||
|
print()
|
||||||
|
print()
|
||||||
|
|
||||||
|
print('Stacks:')
|
||||||
keys = list(stack_to_files.keys())
|
keys = list(stack_to_files.keys())
|
||||||
keys.sort(key=lambda x: len(stack_to_files[x]), reverse=True)
|
keys.sort(key=lambda x: len(stack_to_files[x]), reverse=True)
|
||||||
for stack in reversed(keys[:5]):
|
for stack in reversed(keys):
|
||||||
files = stack_to_files[stack]
|
files = stack_to_files[stack]
|
||||||
print(stack.decode('utf-8', 'backslashreplace'), end='')
|
print(stack.decode('utf-8', 'backslashreplace'), end='')
|
||||||
print(f'In {len(files)} files:')
|
print(f'In {len(files)} files:')
|
||||||
|
@ -87,10 +127,6 @@ def main():
|
||||||
print(f' {file}')
|
print(f' {file}')
|
||||||
print()
|
print()
|
||||||
|
|
||||||
percent = 100 * num_files_without_issues / len(results)
|
|
||||||
print(f'{num_files_without_issues} files without issues ({percent:.1f}%)')
|
|
||||||
print()
|
|
||||||
|
|
||||||
percent = 100 * num_crashes / len(results)
|
percent = 100 * num_crashes / len(results)
|
||||||
print(f'{num_crashes} crashes ({percent:.1f}%)')
|
print(f'{num_crashes} crashes ({percent:.1f}%)')
|
||||||
print(f'{len(keys)} distinct crash stacks')
|
print(f'{len(keys)} distinct crash stacks')
|
||||||
|
@ -100,6 +136,15 @@ def main():
|
||||||
print(f'{len(failed_files)} failed to open ({percent:.1f}%)')
|
print(f'{len(failed_files)} failed to open ({percent:.1f}%)')
|
||||||
for f in failed_files:
|
for f in failed_files:
|
||||||
print(f' {f}')
|
print(f' {f}')
|
||||||
|
print()
|
||||||
|
|
||||||
|
percent = 100 * num_files_with_password / len(results)
|
||||||
|
print(f'{num_files_with_password} files with password ({percent:.1f}%)')
|
||||||
|
print()
|
||||||
|
|
||||||
|
percent = 100 * num_files_without_issues / len(results)
|
||||||
|
print(f'{num_files_without_issues} files without issues ({percent:.1f}%)')
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue