AK: Clear buffer after leaving CannotBeABaseUrlPath in URLParser

By not clearing the buffer, we were leaking the path part of a URL into
the query for URLs without an authority component (no '//host').

This could be seen most noticeably in mailto: URLs with header fields
set, as the query part of `mailto:user@example.com?subject=test` was
parsed to `user@example.comsubject=test`.

data: URLs didn't have this problem, because we have a special case for
parsing them.
This commit is contained in:
Karol Kosek 2023-07-06 19:13:42 +02:00 committed by Andreas Kling
parent 28fdc7af05
commit 58017a0581
Notes: sideshowbarker 2024-07-17 07:38:17 +09:00
2 changed files with 17 additions and 1 deletions

View file

@ -1575,13 +1575,13 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
// -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state // -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
case State::CannotBeABaseUrlPath: case State::CannotBeABaseUrlPath:
// NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF. // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF.
// NOTE: Verify that the assumptions required for this simplification are correct.
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty()); VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
// 1. If c is U+003F (?), then set urls query to the empty string and state to query state. // 1. If c is U+003F (?), then set urls query to the empty string and state to query state.
if (code_point == '?') { if (code_point == '?') {
url->m_paths[0] = buffer.string_view(); url->m_paths[0] = buffer.string_view();
url->m_query = ""; url->m_query = "";
buffer.clear();
state = State::Query; state = State::Query;
} }
// 2. Otherwise, if c is U+0023 (#), then set urls fragment to the empty string and state to fragment state. // 2. Otherwise, if c is U+0023 (#), then set urls fragment to the empty string and state to fragment state.
@ -1589,6 +1589,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
// NOTE: This needs to be percent decoded since the member variables contain decoded data. // NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = buffer.string_view(); url->m_paths[0] = buffer.string_view();
url->m_fragment = ""; url->m_fragment = "";
buffer.clear();
state = State::Fragment; state = State::Fragment;
} }
// 3. Otherwise: // 3. Otherwise:
@ -1604,6 +1605,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control); URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
} else { } else {
url->m_paths[0] = buffer.string_view(); url->m_paths[0] = buffer.string_view();
buffer.clear();
} }
} }
break; break;

View file

@ -226,6 +226,20 @@ TEST_CASE(mailto_url)
EXPECT_EQ(url.serialize(), "mailto:mail@example.com"); EXPECT_EQ(url.serialize(), "mailto:mail@example.com");
} }
TEST_CASE(mailto_url_with_subject)
{
URL url("mailto:mail@example.com?subject=test"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.scheme(), "mailto");
EXPECT(url.host().has<Empty>());
EXPECT_EQ(url.port_or_default(), 0);
EXPECT_EQ(url.path_segment_count(), 1u);
EXPECT_EQ(url.path_segment_at_index(0), "mail@example.com");
EXPECT_EQ(url.query(), "subject=test");
EXPECT(url.fragment().is_null());
EXPECT_EQ(url.serialize(), "mailto:mail@example.com?subject=test");
}
TEST_CASE(data_url) TEST_CASE(data_url)
{ {
URL url("data:text/html,test"sv); URL url("data:text/html,test"sv);