mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibURL: Fix heuristic for URL domain parsing IDNA fast path
Our heuristic was a bit too simplistic and would not run through the ToASCII unicode algorithm which performs some extra validation. This would cause invalid URLs that should fail to be parsed be mistakenly accepted. This fixes 8 tests in: https://wpt.live/url/url-constructor.any.html
This commit is contained in:
parent
fd4e943e12
commit
db3f118046
Notes:
github-actions[bot]
2024-08-06 22:09:08 +00:00
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/db3f1180464 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/987 Reviewed-by: https://github.com/tcl3 ✅
3 changed files with 39 additions and 4 deletions
2
Tests/LibWeb/Text/expected/URL/invalid-urls.txt
Normal file
2
Tests/LibWeb/Text/expected/URL/invalid-urls.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
new URL('file://xn--/p', undefined)
|
||||
error creating URL: 'TypeError: Invalid URL'
|
21
Tests/LibWeb/Text/input/URL/invalid-urls.html
Normal file
21
Tests/LibWeb/Text/input/URL/invalid-urls.html
Normal file
|
@ -0,0 +1,21 @@
|
|||
<script src="../include.js"></script>
|
||||
<script>
|
||||
test(() => {
|
||||
const urls = [
|
||||
{ input: 'file://xn--/p' },
|
||||
];
|
||||
|
||||
for (url of urls) {
|
||||
if (url.base === undefined)
|
||||
println(`new URL('${url.input}', ${url.base})`);
|
||||
else
|
||||
println(`new URL('${url.input}', '${url.base}')`);
|
||||
|
||||
try {
|
||||
new URL(url.input, url.base);
|
||||
} catch (e) {
|
||||
println(`error creating URL: '${e}'`);
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
|
@ -596,14 +596,26 @@ static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
|
|||
// 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
|
||||
// 2. If result is a failure value, domain-to-ASCII validation error, return failure.
|
||||
|
||||
// OPTIMIZATION: Fast path for all-ASCII domain strings.
|
||||
if (all_of(domain, is_ascii)) {
|
||||
// OPTIMIZATION: If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.)
|
||||
// does not produce any item that starts with an ASCII case-insensitive match for "xn--", this
|
||||
// step is equivalent to ASCII lowercasing domain.
|
||||
if (!be_strict && all_of(domain, is_ascii)) {
|
||||
// 3. If result is the empty string, domain-to-ASCII validation error, return failure.
|
||||
if (domain.is_empty())
|
||||
return Error::from_string_literal("Empty domain");
|
||||
|
||||
auto lowercase_domain = domain.to_lowercase_string();
|
||||
return String::from_utf8_without_validation(lowercase_domain.bytes());
|
||||
bool slow_path = false;
|
||||
for (auto part : domain.split_view('.')) {
|
||||
if (part.starts_with("xn--"sv, CaseSensitivity::CaseInsensitive)) {
|
||||
slow_path = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!slow_path) {
|
||||
auto lowercase_domain = domain.to_lowercase_string();
|
||||
return String::from_utf8_without_validation(lowercase_domain.bytes());
|
||||
}
|
||||
}
|
||||
|
||||
Unicode::IDNA::ToAsciiOptions const options {
|
||||
|
|
Loading…
Reference in a new issue