LibURL: Validate for invalid _domain_ code points for non-opaque domains

We were previously not checking for C0 control, U+0025 (%), or U+007F
DELETE.

This makes another good set of URL tests in WPT pass :^)
This commit is contained in:
Shannon Booth 2024-08-05 01:17:14 +12:00 committed by Tim Ledbetter
parent f511c0b441
commit fdf4f1e887
Notes: github-actions[bot] 2024-08-04 17:30:05 +00:00
2 changed files with 35 additions and 6 deletions

View file

@ -512,3 +512,18 @@ TEST_CASE(ascii_only_url)
EXPECT_EQ(url.to_byte_string(), "http://example.com/iNdEx.HtMl#fRaGmEnT");
}
}
TEST_CASE(invalid_domain_code_points)
{
{
constexpr auto upper_case_url = "http://example%25.com"sv;
URL::URL url(upper_case_url);
EXPECT(!url.is_valid());
}
{
constexpr auto mixed_case_url = "http://thing\u0007y/'"sv;
URL::URL url(mixed_case_url);
EXPECT(!url.is_valid());
}
}

View file

@ -22,6 +22,22 @@ namespace URL {
// NOTE: This is similar to the LibC macro EOF = -1.
constexpr u32 end_of_file = 0xFFFFFFFF;
// https://url.spec.whatwg.org/#forbidden-host-code-point
static bool is_forbidden_host_code_point(u32 code_point)
{
// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, U+0020 SPACE,
// U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?), U+0040 (@), U+005B ([),
// U+005C (\), U+005D (]), U+005E (^), or U+007C (|).
return "\0\t\n\r #/:<>?@[\\]^|"sv.contains(code_point);
}
// https://url.spec.whatwg.org/#forbidden-domain-code-point
static bool is_forbidden_domain_code_point(u32 code_point)
{
// A forbidden domain code point is a forbidden host code point, a C0 control, U+0025 (%), or U+007F DELETE.
return is_forbidden_host_code_point(code_point) || is_ascii_c0_control(code_point) || code_point == '%' || code_point == 0x7F;
}
// https://url.spec.whatwg.org/#url-code-points
static bool is_url_code_point(u32 code_point)
{
@ -44,9 +60,8 @@ static void report_validation_error(SourceLocation const& location = SourceLocat
static Optional<Host> parse_opaque_host(StringView input)
{
// 1. If input contains a forbidden host code point, host-invalid-code-point validation error, return failure.
auto forbidden_host_characters_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters_excluding_percent) {
if (input.contains(character)) {
for (auto code_point : Utf8View { input }) {
if (is_forbidden_host_code_point(code_point)) {
report_validation_error();
return {};
}
@ -647,9 +662,8 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
auto ascii_domain = ascii_domain_or_error.release_value();
// 7. If asciiDomain contains a forbidden domain code point, domain-invalid-code-point validation error, return failure.
auto forbidden_host_characters = "\0\t\n\r #%/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters) {
if (ascii_domain.bytes_as_string_view().contains(character)) {
for (auto character : ascii_domain.bytes_as_string_view()) {
if (is_forbidden_domain_code_point(character)) {
report_validation_error();
return {};
}