From cf7a1f6a5297f4b01a98a39a3d9da6dd4cbcf5d2 Mon Sep 17 00:00:00 2001 From: sideshowbarker Date: Thu, 31 Oct 2024 12:43:09 +0900 Subject: [PATCH] =?UTF-8?q?Meta:=20Make=20import-wpt-test.py=20resolve=20?= =?UTF-8?q?=E2=80=9C..=E2=80=9D=20parent=20refs=20in=20URLs/pathnames?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes the Meta/import-wpt-test.py script handle URLs such as https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js and paths containing, e.g., wpt-import/WebCryptoAPI/generateKey/../util/helpers.js (that is, URLs and paths with “..” parent-directory references in them). Otherwise, without this change, when the import-wpt-test.py script tries a URL like https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js which contains a “..” parent-directory reference, the script fails with a “urllib.error.HTTPError: HTTP Error 404: Not Found” error message. --- Meta/import-wpt-test.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Meta/import-wpt-test.py b/Meta/import-wpt-test.py index 4cbe0a2cc44..084e30a7100 100755 --- a/Meta/import-wpt-test.py +++ b/Meta/import-wpt-test.py @@ -4,6 +4,7 @@ import os import sys from pathlib import Path from bs4 import BeautifulSoup +from urllib.parse import urljoin from urllib.request import urlopen from collections import namedtuple @@ -81,23 +82,26 @@ def download_files(filepaths): downloaded_files = [] for file in filepaths: - if (file.destination.exists()): - print(f"Skipping {file.destination} as it already exists") + source = urljoin(file.source, "/".join(file.source.split('/')[3:])) + destination = Path(file.destination).absolute() + + if destination.exists(): + print(f"Skipping {destination} as it already exists") continue - print(f"Downloading {file.source} to {file.destination}") + print(f"Downloading {source} to {destination}") - connection = urlopen(file.source) + connection = urlopen(source) if connection.status != 200: print(f"Failed to download {file.source}") continue - os.makedirs(file.destination.parent, exist_ok=True) + os.makedirs(destination.parent, exist_ok=True) - with open(file.destination, 'wb') as f: + with open(destination, 'wb') as f: f.write(connection.read()) - downloaded_files.append(file.destination) + downloaded_files.append(destination) return downloaded_files