Meta: Support importing WPT reference tests

Author: https://github.com/AtkinsSJ Commit: https://github.com/LadybirdBrowser/ladybird/commit/7a5b38d5776 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2179 Reviewed-by: https://github.com/tcl3 ✅
2024-11-21 23:20:20 +00:00 · 2024-11-05 14:17:38 +00:00 · 2024-11-05 14:17:38 +00:00 · 7a5b38d577 · 2024-11-05 17:59:26 +00:00
commit 7a5b38d577
parent 9598ed1d17
2 changed files with 91 additions and 18 deletions
--- a/Documentation/Testing.md
+++ b/Documentation/Testing.md
@ -118,7 +118,7 @@ You can import certain Web Platform Tests (WPT) tests into your Ladybird clone
 ./Meta/WPT.sh import html/dom/aria-attribute-reflection.html
 ```
-That is, you give `./Meta/WPT.sh import` the path part of any `http://wpt.live/` URL for a WPT test you want to import. It will then download both that test and any of its JavaScript scripts, copy those to the `Tests/LibWeb/Text/input/wpt-import` directory, run the test, and then in the `Tests/LibWeb/Text/expected/wpt-import` directory, it will create a file with the expected results from the test.
+That is, you give `./Meta/WPT.sh import` the path part of any `http://wpt.live/` URL for a WPT test you want to import. It will then download both that test and any of its JavaScript scripts, copy those to the `Tests/LibWeb/<test-type>/input/wpt-import` directory, run the test, and then in the `Tests/LibWeb/<test-type>/expected/wpt-import` directory, it will create a file with the expected results from the test.
 ## Writing tests
--- a/Meta/import-wpt-test.py
+++ b/Meta/import-wpt-test.py
@ -8,12 +8,30 @@ from pathlib import Path
 from urllib.parse import urljoin
 from urllib.request import urlopen
 from collections import namedtuple
 from enum import Enum
 wpt_base_url = 'https://wpt.live/'
-wpt_import_path = 'Tests/LibWeb/Text/input/wpt-import'
+
-wpt_expected_path = 'Tests/LibWeb/Text/expected/wpt-import'
+
 class TestType(Enum):
    TEXT = 1, 'Tests/LibWeb/Text/input/wpt-import', 'Tests/LibWeb/Text/expected/wpt-import'
    REF = 2, 'Tests/LibWeb/Ref/input/wpt-import', 'Tests/LibWeb/Ref/expected/wpt-import'
    def __new__(cls, *args, **kwds):
        obj = object.__new__(cls)
        obj._value_ = args[0]
        return obj
    def __init__(self, _: str, input_path: str, expected_path: str):
        self.input_path = input_path
        self.expected_path = expected_path
 PathMapping = namedtuple('PathMapping', ['source', 'destination'])
 test_type = TestType.TEXT
 raw_reference_path = None  # As specified in the test HTML
 reference_path = None  # With parent directories
 src_values = []
@ -26,30 +44,52 @@ class ScriptSrcValueFinder(HTMLParser):
                src_values.append(attr_dict["src"])
 class TestTypeIdentifier(HTMLParser):
    """Identifies what kind of test the page is, and stores it in self.test_type
    For reference tests, the URL of the reference page is saved as self.reference_path
    """
    def __init__(self, url):
        super().__init__()
        self.url = url
        self.test_type = TestType.TEXT
        self.reference_path = None
    def handle_starttag(self, tag, attrs):
        if tag == "link":
            attr_dict = dict(attrs)
            if attr_dict["rel"] == "match":
                self.test_type = TestType.REF
                self.reference_path = attr_dict["href"]
 def map_to_path(sources, is_resource=True, resource_path=None):
    if is_resource:
        # Add it as a sibling path if it's a relative resource
        sibling_location = Path(resource_path).parent.__str__()
-        sibling_import_path = wpt_import_path + '/' + sibling_location
+        sibling_import_path = test_type.input_path + '/' + sibling_location
        def remapper(x):
            if x.startswith('/'):
-                return wpt_import_path + x
+                return test_type.input_path + x
            return sibling_import_path + '/' + x
        filepaths = list(map(remapper, sources))
        filepaths = list(map(lambda x: Path(x), filepaths))
    else:
-        # Add the wpt_import_path to the sources if root files
+        # Add the test_type.input_path to the sources if root files
        def remapper(x):
-            return wpt_import_path + '/' + x
+            if x.startswith('/'):
                return test_type.input_path + x
            return test_type.input_path + '/' + x
        filepaths = list(map(lambda x: Path(remapper(x)), sources))
    # Map to source and destination
    def path_mapper(x):
-        output_path = wpt_base_url + x.__str__().replace(wpt_import_path, '')
+        output_path = wpt_base_url + x.__str__().replace(test_type.input_path, '')
        return PathMapping(output_path, x.absolute())
    filepaths = list(map(path_mapper, filepaths))
    return filepaths
@ -58,8 +98,13 @@ def map_to_path(sources, is_resource=True, resource_path=None):
 def modify_sources(files):
    for file in files:
        # Get the distance to the wpt-imports folder
-        folder_index = str(file).find(wpt_import_path)
+        folder_index = str(file).find(test_type.input_path)
-        non_prefixed_path = str(file)[folder_index + len(wpt_import_path):]
+        if folder_index == -1:
            folder_index = str(file).find(test_type.expected_path)
            non_prefixed_path = str(file)[folder_index + len(test_type.expected_path):]
        else:
            non_prefixed_path = str(file)[folder_index + len(test_type.input_path):]
        parent_folder_count = len(Path(non_prefixed_path).parent.parts) - 1
        parent_folder_path = '../' * parent_folder_count
@ -73,8 +118,14 @@ def modify_sources(files):
            if src_value.startswith('/'):
                new_src_value = parent_folder_path + src_value[1::]
                page_source = page_source.replace(src_value, new_src_value)
-                with open(file, 'w') as f:
+
-                    f.write(str(page_source))
+        # Look for mentions of the reference page, and update their href
        if raw_reference_path is not None:
            new_reference_path = parent_folder_path + '../../expected/wpt-import/' + reference_path[::]
            page_source = page_source.replace(raw_reference_path, new_reference_path)
        with open(file, 'w') as f:
            f.write(str(page_source))
 def download_files(filepaths):
@ -106,8 +157,12 @@ def download_files(filepaths):
 def create_expectation_files(files):
    # Ref tests don't have an expectation text file
    if test_type == TestType.REF:
        return
    for file in files:
-        new_path = str(file.destination).replace(wpt_import_path, wpt_expected_path)
+        new_path = str(file.destination).replace(test_type.input_path, test_type.expected_path)
        new_path = new_path.rsplit(".", 1)[0] + '.txt'
        expected_file = Path(new_path)
@ -127,14 +182,32 @@ def main():
    url_to_import = sys.argv[1]
    resource_path = '/'.join(Path(url_to_import).parts[2::])
    main_file = [resource_path]
    main_paths = map_to_path(main_file, False)
    files_to_modify = download_files(main_paths)
    create_expectation_files(main_paths)
    with urlopen(url_to_import) as response:
        page = response.read().decode("utf-8")
    global test_type, reference_path, raw_reference_path
    identifier = TestTypeIdentifier(url_to_import)
    identifier.feed(page)
    test_type = identifier.test_type
    raw_reference_path = identifier.reference_path
    print(f"Identified {url_to_import} as type {test_type}, ref {raw_reference_path}")
    main_file = [resource_path]
    main_paths = map_to_path(main_file, False)
    if test_type == TestType.REF and raw_reference_path is None:
        raise RuntimeError('Failed to file reference path in ref test')
    if raw_reference_path is not None:
        reference_path = Path(resource_path).parent.joinpath(raw_reference_path).__str__()
        main_paths.append(PathMapping(
            wpt_base_url + '/' + reference_path,
            Path(test_type.expected_path + '/' + reference_path).absolute()
        ))
    files_to_modify = download_files(main_paths)
    create_expectation_files(main_paths)
    parser = ScriptSrcValueFinder()
    parser.feed(page)