mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
AK: Don't store parts of URLs percent decoded
As noted in serval comments doing this goes against the WC3 spec, and breaks parsing then re-serializing URLs that contain percent encoded data, that was not encoded using the same character set as the serializer. For example, previously if you had a URL like: https:://foo.com/what%2F%2F (the path is what + '//' percent encoded) Creating URL("https:://foo.com/what%2F%2F").serialize() would return: https://foo.com/what// Which is incorrect and not the same as the URL we passed. This is because the re-serializing uses the PercentEncodeSet::Path which does not include '/'. Only doing the percent encoding in the setters fixes this, which is required to navigate to Google Street View (which includes a percent encoded URL in its URL). Seems to fix #13477 too
This commit is contained in:
parent
d2fc8efd9e
commit
8283e8b88c
Notes:
sideshowbarker
2024-07-17 01:10:58 +09:00
Author: https://github.com/MacDue Commit: https://github.com/SerenityOS/serenity/commit/8283e8b88c Pull-request: https://github.com/SerenityOS/serenity/pull/18271
5 changed files with 85 additions and 51 deletions
69
AK/URL.cpp
69
AK/URL.cpp
|
@ -47,20 +47,32 @@ URL URL::complete_url(StringView relative_url) const
|
|||
return URLParser::parse(relative_url, *this);
|
||||
}
|
||||
|
||||
// NOTE: This only exists for compatibility with the existing URL tests which check for both .is_null() and .is_empty().
|
||||
static DeprecatedString deprecated_string_percent_encode(DeprecatedString const& input, URL::PercentEncodeSet set = URL::PercentEncodeSet::Userinfo, URL::SpaceAsPlus space_as_plus = URL::SpaceAsPlus::No)
|
||||
{
|
||||
if (input.is_null() || input.is_empty())
|
||||
return input;
|
||||
return URL::percent_encode(input.view(), set, space_as_plus);
|
||||
}
|
||||
|
||||
void URL::set_scheme(DeprecatedString scheme)
|
||||
{
|
||||
m_scheme = move(scheme);
|
||||
m_valid = compute_validity();
|
||||
}
|
||||
|
||||
void URL::set_username(DeprecatedString username)
|
||||
void URL::set_username(DeprecatedString username, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
|
||||
username = deprecated_string_percent_encode(username, PercentEncodeSet::Userinfo);
|
||||
m_username = move(username);
|
||||
m_valid = compute_validity();
|
||||
}
|
||||
|
||||
void URL::set_password(DeprecatedString password)
|
||||
void URL::set_password(DeprecatedString password, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
|
||||
password = deprecated_string_percent_encode(password, PercentEncodeSet::Userinfo);
|
||||
m_password = move(password);
|
||||
m_valid = compute_validity();
|
||||
}
|
||||
|
@ -81,19 +93,38 @@ void URL::set_port(Optional<u16> port)
|
|||
m_valid = compute_validity();
|
||||
}
|
||||
|
||||
void URL::set_paths(Vector<DeprecatedString> paths)
|
||||
void URL::set_paths(Vector<DeprecatedString> paths, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
m_paths = move(paths);
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes) {
|
||||
Vector<DeprecatedString> encoded_paths;
|
||||
encoded_paths.ensure_capacity(paths.size());
|
||||
for (auto& segment : paths)
|
||||
encoded_paths.unchecked_append(deprecated_string_percent_encode(segment, PercentEncodeSet::Path));
|
||||
m_paths = move(encoded_paths);
|
||||
} else {
|
||||
m_paths = move(paths);
|
||||
}
|
||||
m_valid = compute_validity();
|
||||
}
|
||||
|
||||
void URL::set_query(DeprecatedString query)
|
||||
void URL::append_path(DeprecatedString path, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
|
||||
path = deprecated_string_percent_encode(path, PercentEncodeSet::Path);
|
||||
m_paths.append(path);
|
||||
}
|
||||
|
||||
void URL::set_query(DeprecatedString query, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
|
||||
query = deprecated_string_percent_encode(query, is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query);
|
||||
m_query = move(query);
|
||||
}
|
||||
|
||||
void URL::set_fragment(DeprecatedString fragment)
|
||||
void URL::set_fragment(DeprecatedString fragment, ApplyPercentEncoding apply_percent_encoding)
|
||||
{
|
||||
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
|
||||
fragment = deprecated_string_percent_encode(fragment, PercentEncodeSet::Fragment);
|
||||
m_fragment = move(fragment);
|
||||
}
|
||||
|
||||
|
@ -171,9 +202,8 @@ URL URL::create_with_file_scheme(DeprecatedString const& path, DeprecatedString
|
|||
// This is because a file URL always needs a non-null hostname.
|
||||
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
|
||||
url.set_paths(lexical_path.parts());
|
||||
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
|
||||
if (path.ends_with('/'))
|
||||
url.append_path("");
|
||||
url.append_slash();
|
||||
url.set_fragment(fragment);
|
||||
return url;
|
||||
}
|
||||
|
@ -188,9 +218,8 @@ URL URL::create_with_help_scheme(DeprecatedString const& path, DeprecatedString
|
|||
// This is because a file URL always needs a non-null hostname.
|
||||
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
|
||||
url.set_paths(lexical_path.parts());
|
||||
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
|
||||
if (path.ends_with('/'))
|
||||
url.append_path("");
|
||||
url.append_slash();
|
||||
url.set_fragment(fragment);
|
||||
return url;
|
||||
}
|
||||
|
@ -242,10 +271,10 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
|
|||
builder.append("//"sv);
|
||||
|
||||
if (includes_credentials()) {
|
||||
builder.append(percent_encode(m_username, PercentEncodeSet::Userinfo));
|
||||
builder.append(m_username);
|
||||
if (!m_password.is_empty()) {
|
||||
builder.append(':');
|
||||
builder.append(percent_encode(m_password, PercentEncodeSet::Userinfo));
|
||||
builder.append(m_password);
|
||||
}
|
||||
builder.append('@');
|
||||
}
|
||||
|
@ -256,24 +285,24 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
|
|||
}
|
||||
|
||||
if (cannot_be_a_base_url()) {
|
||||
builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
|
||||
builder.append(m_paths[0]);
|
||||
} else {
|
||||
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
|
||||
builder.append("/."sv);
|
||||
for (auto& segment : m_paths) {
|
||||
builder.append('/');
|
||||
builder.append(percent_encode(segment, PercentEncodeSet::Path));
|
||||
builder.append(segment);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_query.is_null()) {
|
||||
builder.append('?');
|
||||
builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
|
||||
builder.append(m_query);
|
||||
}
|
||||
|
||||
if (exclude_fragment == ExcludeFragment::No && !m_fragment.is_null()) {
|
||||
builder.append('#');
|
||||
builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
|
||||
builder.append(m_fragment);
|
||||
}
|
||||
|
||||
return builder.to_deprecated_string();
|
||||
|
@ -300,24 +329,24 @@ DeprecatedString URL::serialize_for_display() const
|
|||
}
|
||||
|
||||
if (cannot_be_a_base_url()) {
|
||||
builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
|
||||
builder.append(m_paths[0]);
|
||||
} else {
|
||||
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
|
||||
builder.append("/."sv);
|
||||
for (auto& segment : m_paths) {
|
||||
builder.append('/');
|
||||
builder.append(percent_encode(segment, PercentEncodeSet::Path));
|
||||
builder.append(segment);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_query.is_null()) {
|
||||
builder.append('?');
|
||||
builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
|
||||
builder.append(m_query);
|
||||
}
|
||||
|
||||
if (!m_fragment.is_null()) {
|
||||
builder.append('#');
|
||||
builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
|
||||
builder.append(m_fragment);
|
||||
}
|
||||
|
||||
return builder.to_deprecated_string();
|
||||
|
|
23
AK/URL.h
23
AK/URL.h
|
@ -19,8 +19,6 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
// NOTE: The member variables cannot contain any percent encoded sequences.
|
||||
// The URL parser automatically decodes those sequences and the serialize() method will re-encode them as necessary.
|
||||
class URL {
|
||||
friend class URLParser;
|
||||
|
||||
|
@ -70,16 +68,25 @@ public:
|
|||
bool includes_credentials() const { return !m_username.is_empty() || !m_password.is_empty(); }
|
||||
bool is_special() const { return is_special_scheme(m_scheme); }
|
||||
|
||||
enum class ApplyPercentEncoding {
|
||||
Yes,
|
||||
No
|
||||
};
|
||||
void set_scheme(DeprecatedString);
|
||||
void set_username(DeprecatedString);
|
||||
void set_password(DeprecatedString);
|
||||
void set_username(DeprecatedString username, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void set_password(DeprecatedString password, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void set_host(DeprecatedString);
|
||||
void set_port(Optional<u16>);
|
||||
void set_paths(Vector<DeprecatedString>);
|
||||
void set_query(DeprecatedString);
|
||||
void set_fragment(DeprecatedString);
|
||||
void set_paths(Vector<DeprecatedString> password, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void set_query(DeprecatedString query, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void set_fragment(DeprecatedString fragment, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void set_cannot_be_a_base_url(bool value) { m_cannot_be_a_base_url = value; }
|
||||
void append_path(DeprecatedString path) { m_paths.append(move(path)); }
|
||||
void append_path(DeprecatedString path, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
|
||||
void append_slash()
|
||||
{
|
||||
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
|
||||
append_path("", ApplyPercentEncoding::No);
|
||||
}
|
||||
|
||||
DeprecatedString path() const;
|
||||
DeprecatedString basename() const;
|
||||
|
|
|
@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input)
|
|||
// future for validation of URLs, which would then lead to infinite recursion.
|
||||
// The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
|
||||
// variables' values here, not what the URL class presents to its users.
|
||||
// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting
|
||||
// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode
|
||||
// everything before setting the member variables.
|
||||
URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
|
||||
{
|
||||
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
|
||||
|
@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
++iterator;
|
||||
} else {
|
||||
url->m_cannot_be_a_base_url = true;
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
state = State::CannotBeABaseUrlPath;
|
||||
}
|
||||
} else {
|
||||
|
@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (password_token_seen) {
|
||||
builder.append(url->password());
|
||||
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
url->m_password = URL::percent_decode(builder.string_view());
|
||||
url->m_password = builder.string_view();
|
||||
} else {
|
||||
builder.append(url->username());
|
||||
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
url->m_username = URL::percent_decode(builder.string_view());
|
||||
url->m_username = builder.string_view();
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
|
@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
url->m_host = base_url->m_host;
|
||||
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
|
||||
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
|
||||
url->append_path(base_url->m_paths[0]);
|
||||
url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
|
||||
state = State::Path;
|
||||
continue;
|
||||
}
|
||||
|
@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
|
||||
url->m_paths.remove(url->m_paths.size() - 1);
|
||||
if (code_point != '/' && !(url->is_special() && code_point == '\\'))
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
} else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
} else if (!is_single_dot_path_segment(buffer.string_view())) {
|
||||
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
|
||||
auto drive_letter = buffer.string_view()[0];
|
||||
|
@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
buffer.append(drive_letter);
|
||||
buffer.append(':');
|
||||
}
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->append_path(URL::percent_decode(buffer.string_view()));
|
||||
url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
|
||||
}
|
||||
buffer.clear();
|
||||
if (code_point == '?') {
|
||||
|
@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
// NOTE: Verify that the assumptions required for this simplification are correct.
|
||||
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
|
||||
if (code_point == '?') {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
url->m_query = "";
|
||||
state = State::Query;
|
||||
} else if (code_point == '#') {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
url->m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else {
|
||||
|
@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (code_point != end_of_file) {
|
||||
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
|
||||
} else {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
||||
buffer.append_code_point(code_point);
|
||||
} else {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_fragment = URL::percent_decode(buffer.string_view());
|
||||
url->m_fragment = buffer.string_view();
|
||||
buffer.clear();
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -151,7 +151,7 @@ TEST_CASE(file_url_with_encoded_characters)
|
|||
URL url("file:///my/file/test%23file.txt"sv);
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url.scheme(), "file");
|
||||
EXPECT_EQ(url.path(), "/my/file/test#file.txt");
|
||||
EXPECT_EQ(url.path(), "/my/file/test%23file.txt");
|
||||
EXPECT(url.query().is_null());
|
||||
EXPECT(url.fragment().is_null());
|
||||
}
|
||||
|
@ -389,7 +389,7 @@ TEST_CASE(unicode)
|
|||
{
|
||||
URL url { "http://example.com/_ünicöde_téxt_©"sv };
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url.path(), "/_ünicöde_téxt_©");
|
||||
EXPECT_EQ(url.path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
|
||||
EXPECT(url.query().is_null());
|
||||
EXPECT(url.fragment().is_null());
|
||||
}
|
||||
|
@ -415,3 +415,10 @@ TEST_CASE(empty_url_with_base_url)
|
|||
EXPECT_EQ(parsed_url.is_valid(), true);
|
||||
EXPECT(base_url.equals(parsed_url));
|
||||
}
|
||||
|
||||
TEST_CASE(google_street_view)
|
||||
{
|
||||
constexpr auto streetview_url = "https://www.google.co.uk/maps/@53.3354159,-1.9573545,3a,75y,121.1h,75.67t/data=!3m7!1e1!3m5!1sSY8xCv17jAX4S7SRdV38hg!2e0!6shttps:%2F%2Fstreetviewpixels-pa.googleapis.com%2Fv1%2Fthumbnail%3Fpanoid%3DSY8xCv17jAX4S7SRdV38hg%26cb_client%3Dmaps_sv.tactile.gps%26w%3D203%26h%3D100%26yaw%3D188.13148%26pitch%3D0%26thumbfov%3D100!7i13312!8i6656";
|
||||
URL url(streetview_url);
|
||||
EXPECT_EQ(url.serialize(), streetview_url);
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public:
|
|||
|
||||
WebIDL::ExceptionOr<String> to_json() const;
|
||||
|
||||
void set_query(Badge<URLSearchParams>, String query) { m_url.set_query(query.to_deprecated_string()); }
|
||||
void set_query(Badge<URLSearchParams>, String query) { m_url.set_query(query.to_deprecated_string(), AK::URL::ApplyPercentEncoding::Yes); }
|
||||
|
||||
private:
|
||||
URL(JS::Realm&, AK::URL, JS::NonnullGCPtr<URLSearchParams> query);
|
||||
|
|
Loading…
Reference in a new issue