LibURL+Everywhere: Only percent decode URL paths when actually needed

Web specs do not return through javascript percent decoded URL path
components - but we were doing this in a number of places due to the
default behaviour of URL::serialize_path.

Since percent encoded URL paths may not contain valid UTF-8 - this was
resulting in us crashing in these places.

For example - on an HTMLAnchorElement when retrieving the pathname for
the URL of:

http://ladybird.org/foo%C2%91%91

To fix this make the URL class only return the percent encoded
serialized path, matching the URL spec. When the decoded path is
required instead explicitly call URL::percent_decode.

This fixes a crash running WPT URL tests for the anchor element on:

https://wpt.live/url/a-element.html
This commit is contained in:
Shannon Booth 2024-08-05 16:55:39 +12:00 committed by Andreas Kling
parent ffe070d7f9
commit cc55732332
Notes: github-actions[bot] 2024-08-05 07:59:02 +00:00
18 changed files with 38 additions and 38 deletions

View file

@ -159,7 +159,8 @@ TEST_CASE(file_url_with_encoded_characters)
URL::URL url("file:///my/file/test%23file.txt"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.scheme(), "file");
EXPECT_EQ(url.serialize_path(), "/my/file/test#file.txt");
EXPECT_EQ(url.serialize_path(), "/my/file/test%23file.txt");
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/my/file/test#file.txt");
EXPECT(!url.query().has_value());
EXPECT(!url.fragment().has_value());
}
@ -330,7 +331,8 @@ TEST_CASE(unicode)
{
URL::URL url { "http://example.com/_ünicöde_téxt_©"sv };
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/_ünicöde_téxt_©");
EXPECT_EQ(url.serialize_path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/_ünicöde_téxt_©");
EXPECT(!url.query().has_value());
EXPECT(!url.fragment().has_value());
}

View file

@ -0,0 +1 @@
/foo%C2%91%91

View file

@ -0,0 +1,8 @@
<a id="anchor" href="http://ladybird.org/foo%C2%91%91"></a>
<script src="../include.js"></script>
<script>
test(() => {
const anchorElement = document.getElementById('anchor');
println(anchorElement.pathname);
})
</script>

View file

@ -236,12 +236,12 @@ bool is_special_scheme(StringView scheme)
}
// https://url.spec.whatwg.org/#url-path-serializer
ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const
String URL::serialize_path() const
{
// 1. If url has an opaque path, then return urls path.
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
if (cannot_be_a_base_url())
return m_data->paths[0].to_byte_string();
return m_data->paths[0];
// 2. Let output be the empty string.
StringBuilder output;
@ -249,11 +249,11 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons
// 3. For each segment of urls path: append U+002F (/) followed by segment to output.
for (auto const& segment : m_data->paths) {
output.append('/');
output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string());
output.append(segment);
}
// 4. Return output.
return output.to_byte_string();
return output.to_string_without_validation();
}
// https://url.spec.whatwg.org/#concept-url-serializer

View file

@ -53,11 +53,6 @@ using IPv6Address = Array<u16, 8>;
// but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
using Host = Variant<IPv4Address, IPv6Address, String, Empty>;
enum class ApplyPercentDecoding {
Yes,
No
};
// https://w3c.github.io/FileAPI/#blob-url-entry
// NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
struct BlobURLEntry {
@ -161,7 +156,7 @@ public:
m_data->paths.append(String {});
}
ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
String serialize_path() const;
ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
ByteString serialize_for_display() const;
ByteString to_byte_string() const { return serialize(); }

View file

@ -373,12 +373,10 @@ void DOMURL::set_port(String const& port)
}
// https://url.spec.whatwg.org/#dom-url-pathname
WebIDL::ExceptionOr<String> DOMURL::pathname() const
String DOMURL::pathname() const
{
auto& vm = realm().vm();
// The pathname getter steps are to return the result of URL path serializing thiss URL.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_path(URL::ApplyPercentDecoding::No)));
return m_url.serialize_path();
}
// https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0

View file

@ -55,7 +55,7 @@ public:
WebIDL::ExceptionOr<String> port() const;
void set_port(String const&);
WebIDL::ExceptionOr<String> pathname() const;
String pathname() const;
void set_pathname(String const&);
Optional<String> const& fragment() const { return m_url.fragment(); }

View file

@ -291,10 +291,8 @@ String HTMLHyperlinkElementUtils::pathname() const
if (!m_url.has_value())
return String {};
// 4. If url's cannot-be-a-base-URL is true, then return url's path[0].
// 5. If url's path is empty, then return the empty string.
// 6. Return "/", followed by the strings in url's path (including empty strings), separated from each other by "/".
return MUST(String::from_byte_string(m_url->serialize_path()));
// 4. Return the result of URL path serializing url.
return m_url->serialize_path();
}
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname

View file

@ -248,15 +248,13 @@ WebIDL::ExceptionOr<void> Location::set_port(String const&)
// https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname
WebIDL::ExceptionOr<String> Location::pathname() const
{
auto& vm = this->vm();
// 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException.
auto const relevant_document = this->relevant_document();
if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin()))
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
// 2. Return the result of URL path serializing this Location object's url.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_path()));
return url().serialize_path();
}
WebIDL::ExceptionOr<void> Location::set_pathname(String const&)

View file

@ -92,11 +92,10 @@ WebIDL::ExceptionOr<String> WorkerLocation::port() const
}
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname
WebIDL::ExceptionOr<String> WorkerLocation::pathname() const
String WorkerLocation::pathname() const
{
auto& vm = realm().vm();
// The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_path()));
return m_global_scope->url().serialize_path();
}
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search

View file

@ -24,7 +24,7 @@ public:
WebIDL::ExceptionOr<String> host() const;
WebIDL::ExceptionOr<String> hostname() const;
WebIDL::ExceptionOr<String> port() const;
WebIDL::ExceptionOr<String> pathname() const;
String pathname() const;
WebIDL::ExceptionOr<String> search() const;
WebIDL::ExceptionOr<String> hash() const;

View file

@ -42,7 +42,7 @@ ErrorOr<String> load_error_page(URL::URL const& url, StringView error_message)
ErrorOr<String> load_file_directory_page(URL::URL const& url)
{
// Generate HTML contents entries table
auto lexical_path = LexicalPath(url.serialize_path());
auto lexical_path = LexicalPath(URL::percent_decode(url.serialize_path()));
Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir);
Vector<ByteString> names;
while (dt.has_next())

View file

@ -102,7 +102,7 @@ void Resource::did_load(Badge<ResourceLoader>, ReadonlyBytes data, HTTP::HeaderM
if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) {
m_mime_type = "text/plain";
} else {
m_mime_type = Core::guess_mime_type_based_on_filename(url().serialize_path());
m_mime_type = Core::guess_mime_type_based_on_filename(URL::percent_decode(url().serialize_path()));
}
}

View file

@ -311,7 +311,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
}
auto data = resource.value()->data();
auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time());
auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time());
log_success(request);
success_callback(data, response_headers, {});
@ -328,7 +328,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
return;
}
FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
--m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
@ -376,7 +376,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
}
auto data = maybe_data.release_value();
auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime);
auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime);
log_success(request);
success_callback(data, response_headers, {});

View file

@ -26,7 +26,7 @@ ByteString ConnectionInfo::resource_name() const
// The "resource-name" can be constructed by concatenating the following:
StringBuilder builder;
// "/" if the path component is empty
auto path = m_url.serialize_path();
auto path = URL::percent_decode(m_url.serialize_path());
if (path.is_empty())
builder.append('/');
// The path component

View file

@ -269,7 +269,7 @@ String CookieJar::default_path(const URL::URL& url)
// https://tools.ietf.org/html/rfc6265#section-5.1.4
// 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise).
auto uri_path = url.serialize_path();
auto uri_path = URL::percent_decode(url.serialize_path());
// 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps.
if (uri_path.is_empty() || (uri_path[0] != '/'))
@ -283,6 +283,7 @@ String CookieJar::default_path(const URL::URL& url)
return "/"_string;
// 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/").
// FIXME: The path might not be valid UTF-8.
return MUST(String::from_utf8(uri_path.substring_view(0, last_separator)));
}

View file

@ -380,7 +380,7 @@ static ErrorOr<TestResult> run_ref_test(HeadlessWebContentView& view, URL::URL c
if (dump_failed_ref_tests) {
warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url);
auto title = LexicalPath::title(url.serialize_path());
auto title = LexicalPath::title(URL::percent_decode(url.serialize_path()));
auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr<void> {
auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write));
auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap));

View file

@ -371,7 +371,7 @@ static auto parse(StringView contents)
if (url.scheme() != "file")
return Error::from_string_literal("NYI: Nonlocal entity");
auto file = TRY(Core::File::open(url.serialize_path(), Core::File::OpenMode::Read));
auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
return ByteString::copy(TRY(file->read_until_eof()));
},
},
@ -440,7 +440,7 @@ static void do_run_tests(XML::Document& document)
continue;
}
auto file_path = url.serialize_path();
auto file_path = URL::percent_decode(url.serialize_path());
auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read);
if (file_result.is_error()) {
warnln("Read error for {}: {}", file_path, file_result.error());