AK: Report validation error for URLs with invalid percent encodings

This part of the spec is mostly useful for our debugging for now, but
could eventually be hooked up so that the user can see any reported
validation errors.
This commit is contained in:
Shannon Booth 2023-08-13 11:17:02 +12:00 committed by Andrew Kaster
parent f53dfdd6ac
commit e233de1e48
Notes: sideshowbarker 2024-07-16 23:55:09 +09:00

View file

@ -786,6 +786,10 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
return input.substring_view(iterator - input.begin() + iterator.underlying_code_point_length_in_bytes()).as_string();
};
auto remaining_starts_with_two_ascii_hex_digits = [&]() {
return is_ascii_hex_digit(iterator.peek(1).value_or(end_of_file)) && is_ascii_hex_digit(iterator.peek(2).value_or(end_of_file));
};
// 9. Keep running the following state machine by switching on state. If after a run pointer points to the EOF code point, go to the next step. Otherwise, increase pointer by 1 and continue with the state machine.
// NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop.
// ++iterator : "increase pointer by 1"
@ -1528,7 +1532,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error();
// 3. UTF-8 percent-encode c using the path percent-encode set and append the result to buffer.
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::Path);
@ -1560,7 +1566,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error();
// 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to urls path.
if (code_point != end_of_file) {
@ -1606,7 +1614,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error();
// 3. Append c to buffer.
buffer.append_code_point(code_point);
@ -1621,7 +1631,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error();
// FIXME: 3. UTF-8 percent-encode c using the fragment percent-encode set and append the result to urls fragment.
buffer.append_code_point(code_point);