Meta: Make import-wpt-test.py resolve “..” parent refs in URLs/pathnames

This change makes the Meta/import-wpt-test.py script handle URLs such as
https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js and paths
containing, e.g., wpt-import/WebCryptoAPI/generateKey/../util/helpers.js
(that is, URLs and paths with “..” parent-directory references in them).

Otherwise, without this change, when the import-wpt-test.py script tries
a URL like https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js
which contains a “..” parent-directory reference, the script fails with
a “urllib.error.HTTPError: HTTP Error 404: Not Found” error message.
This commit is contained in:
sideshowbarker 2024-10-31 12:43:09 +09:00 committed by Tim Ledbetter
parent f064c6e930
commit cf7a1f6a52
Notes: github-actions[bot] 2024-10-31 07:05:11 +00:00

View file

@ -4,6 +4,7 @@ import os
import sys import sys
from pathlib import Path from pathlib import Path
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.request import urlopen from urllib.request import urlopen
from collections import namedtuple from collections import namedtuple
@ -81,23 +82,26 @@ def download_files(filepaths):
downloaded_files = [] downloaded_files = []
for file in filepaths: for file in filepaths:
if (file.destination.exists()): source = urljoin(file.source, "/".join(file.source.split('/')[3:]))
print(f"Skipping {file.destination} as it already exists") destination = Path(file.destination).absolute()
if destination.exists():
print(f"Skipping {destination} as it already exists")
continue continue
print(f"Downloading {file.source} to {file.destination}") print(f"Downloading {source} to {destination}")
connection = urlopen(file.source) connection = urlopen(source)
if connection.status != 200: if connection.status != 200:
print(f"Failed to download {file.source}") print(f"Failed to download {file.source}")
continue continue
os.makedirs(file.destination.parent, exist_ok=True) os.makedirs(destination.parent, exist_ok=True)
with open(file.destination, 'wb') as f: with open(destination, 'wb') as f:
f.write(connection.read()) f.write(connection.read())
downloaded_files.append(file.destination) downloaded_files.append(destination)
return downloaded_files return downloaded_files