mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK: Stop using U+0000 as end of file code point in URL parser
This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to indicate end of file. This fixes a bug where inputs containing null bytes would terminate the parser early, because they were interpreted as end of file.
This commit is contained in:
parent
97425c7dfb
commit
2164d8aae8
Notes:
sideshowbarker
2024-07-18 16:52:00 +09:00
Author: https://github.com/MaxWipfli Commit: https://github.com/SerenityOS/serenity/commit/2164d8aae8a Pull-request: https://github.com/SerenityOS/serenity/pull/7791 Reviewed-by: https://github.com/awesomekling
1 changed files with 18 additions and 16 deletions
|
@ -16,6 +16,9 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
// NOTE: This is similar to the LibC macro EOF = -1.
|
||||
constexpr u32 end_of_file = 0xFFFFFFFF;
|
||||
|
||||
constexpr bool is_url_code_point(u32 code_point)
|
||||
{
|
||||
// FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters.
|
||||
|
@ -221,14 +224,13 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
// NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop.
|
||||
// ++iterator : "increase pointer by 1"
|
||||
// continue : "decrease pointer by 1"
|
||||
// NOTE: The NULL code point is used as the "EOF code point".
|
||||
for (;;) {
|
||||
u32 code_point = 0;
|
||||
u32 code_point = end_of_file;
|
||||
if (!iterator.done())
|
||||
code_point = *iterator;
|
||||
|
||||
if constexpr (URL_PARSER_DEBUG) {
|
||||
if (!code_point)
|
||||
if (code_point == end_of_file)
|
||||
dbgln("URLParser::parse: {} state with EOF.", state_name(state));
|
||||
else if (is_ascii_printable(code_point))
|
||||
dbgln("URLParser::parse: {} state with code point U+{:04X} ({:c}).", state_name(state), code_point, code_point);
|
||||
|
@ -335,7 +337,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
} else if (code_point == '#') {
|
||||
url.m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else if (code_point != 0) {
|
||||
} else if (code_point != end_of_file) {
|
||||
url.m_query = {};
|
||||
if (url.m_paths.size())
|
||||
url.m_paths.remove(url.m_paths.size() - 1);
|
||||
|
@ -408,7 +410,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
}
|
||||
}
|
||||
buffer.clear();
|
||||
} else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
} else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
if (at_sign_seen && buffer.is_empty()) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
|
@ -434,7 +436,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
url.m_host = host.release_value();
|
||||
buffer.clear();
|
||||
state = State::Port;
|
||||
} else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
} else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
if (url.is_special() && buffer.is_empty()) {
|
||||
report_validation_error();
|
||||
return {};
|
||||
|
@ -457,7 +459,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
case State::Port:
|
||||
if (is_ascii_digit(code_point)) {
|
||||
buffer.append_code_point(code_point);
|
||||
} else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
} else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
|
||||
if (!buffer.is_empty()) {
|
||||
auto port = buffer.to_string().to_uint();
|
||||
if (!port.has_value() || port.value() > 65535) {
|
||||
|
@ -494,7 +496,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
} else if (code_point == '#') {
|
||||
url.m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else if (code_point != 0) {
|
||||
} else if (code_point != end_of_file) {
|
||||
url.m_query = {};
|
||||
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
|
||||
if (!starts_with_windows_drive_letter(substring_from_pointer)) {
|
||||
|
@ -524,7 +526,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
}
|
||||
break;
|
||||
case State::FileHost:
|
||||
if (code_point == 0 || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
|
||||
if (code_point == end_of_file || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
|
||||
if (is_windows_drive_letter(buffer.to_string())) {
|
||||
report_validation_error();
|
||||
state = State::Path;
|
||||
|
@ -559,14 +561,14 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
} else if (code_point == '#') {
|
||||
url.m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else if (code_point != 0) {
|
||||
} else if (code_point != end_of_file) {
|
||||
state = State::Path;
|
||||
if (code_point != '/')
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case State::Path:
|
||||
if (code_point == 0 || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') {
|
||||
if (code_point == end_of_file || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') {
|
||||
if (url.is_special() && code_point == '\\')
|
||||
report_validation_error();
|
||||
if (is_double_dot_path_segment(buffer.to_string())) {
|
||||
|
@ -616,10 +618,10 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
url.m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else {
|
||||
if (code_point != 0 && !is_url_code_point(code_point) && code_point != '%')
|
||||
if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
|
||||
report_validation_error();
|
||||
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
||||
if (code_point != 0) {
|
||||
if (code_point != end_of_file) {
|
||||
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
|
||||
} else {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
|
@ -628,7 +630,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
}
|
||||
break;
|
||||
case State::Query:
|
||||
if (code_point == '#' || code_point == 0) {
|
||||
if (code_point == end_of_file || code_point == '#') {
|
||||
VERIFY(url.m_query == "");
|
||||
auto query_percent_encode_set = url.is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
|
@ -638,7 +640,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
url.m_fragment = "";
|
||||
state = State::Fragment;
|
||||
}
|
||||
} else if (code_point != 0) {
|
||||
} else if (code_point != end_of_file) {
|
||||
if (!is_url_code_point(code_point) && code_point != '%')
|
||||
report_validation_error();
|
||||
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
||||
|
@ -647,7 +649,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
|
|||
break;
|
||||
case State::Fragment:
|
||||
// NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the fragment on EOF.
|
||||
if (code_point) {
|
||||
if (code_point != end_of_file) {
|
||||
if (!is_url_code_point(code_point) && code_point != '%')
|
||||
report_validation_error();
|
||||
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
||||
|
|
Loading…
Reference in a new issue