From 58017a05815bd025b94df39714dc2ed0f7f85dc2 Mon Sep 17 00:00:00 2001 From: Karol Kosek Date: Thu, 6 Jul 2023 19:13:42 +0200 Subject: [PATCH] AK: Clear buffer after leaving CannotBeABaseUrlPath in URLParser By not clearing the buffer, we were leaking the path part of a URL into the query for URLs without an authority component (no '//host'). This could be seen most noticeably in mailto: URLs with header fields set, as the query part of `mailto:user@example.com?subject=test` was parsed to `user@example.comsubject=test`. data: URLs didn't have this problem, because we have a special case for parsing them. --- AK/URLParser.cpp | 4 +++- Tests/AK/TestURL.cpp | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index f3397b2ed32..e890c4a29b3 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -1575,13 +1575,13 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state case State::CannotBeABaseUrlPath: // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF. - // NOTE: Verify that the assumptions required for this simplification are correct. VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty()); // 1. If c is U+003F (?), then set url’s query to the empty string and state to query state. if (code_point == '?') { url->m_paths[0] = buffer.string_view(); url->m_query = ""; + buffer.clear(); state = State::Query; } // 2. Otherwise, if c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. @@ -1589,6 +1589,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // NOTE: This needs to be percent decoded since the member variables contain decoded data. url->m_paths[0] = buffer.string_view(); url->m_fragment = ""; + buffer.clear(); state = State::Fragment; } // 3. Otherwise: @@ -1604,6 +1605,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control); } else { url->m_paths[0] = buffer.string_view(); + buffer.clear(); } } break; diff --git a/Tests/AK/TestURL.cpp b/Tests/AK/TestURL.cpp index 78790f27754..08648afab29 100644 --- a/Tests/AK/TestURL.cpp +++ b/Tests/AK/TestURL.cpp @@ -226,6 +226,20 @@ TEST_CASE(mailto_url) EXPECT_EQ(url.serialize(), "mailto:mail@example.com"); } +TEST_CASE(mailto_url_with_subject) +{ + URL url("mailto:mail@example.com?subject=test"sv); + EXPECT(url.is_valid()); + EXPECT_EQ(url.scheme(), "mailto"); + EXPECT(url.host().has()); + EXPECT_EQ(url.port_or_default(), 0); + EXPECT_EQ(url.path_segment_count(), 1u); + EXPECT_EQ(url.path_segment_at_index(0), "mail@example.com"); + EXPECT_EQ(url.query(), "subject=test"); + EXPECT(url.fragment().is_null()); + EXPECT_EQ(url.serialize(), "mailto:mail@example.com?subject=test"); +} + TEST_CASE(data_url) { URL url("data:text/html,test"sv);