mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-04 05:20:30 +00:00
LibURL+Everywhere: Only percent decode URL paths when actually needed
Web specs do not return through javascript percent decoded URL path components - but we were doing this in a number of places due to the default behaviour of URL::serialize_path. Since percent encoded URL paths may not contain valid UTF-8 - this was resulting in us crashing in these places. For example - on an HTMLAnchorElement when retrieving the pathname for the URL of: http://ladybird.org/foo%C2%91%91 To fix this make the URL class only return the percent encoded serialized path, matching the URL spec. When the decoded path is required instead explicitly call URL::percent_decode. This fixes a crash running WPT URL tests for the anchor element on: https://wpt.live/url/a-element.html
This commit is contained in:
parent
ffe070d7f9
commit
cc55732332
Notes:
github-actions[bot]
2024-08-05 07:59:02 +00:00
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/cc557323326 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/966
18 changed files with 38 additions and 38 deletions
|
@ -159,7 +159,8 @@ TEST_CASE(file_url_with_encoded_characters)
|
|||
URL::URL url("file:///my/file/test%23file.txt"sv);
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url.scheme(), "file");
|
||||
EXPECT_EQ(url.serialize_path(), "/my/file/test#file.txt");
|
||||
EXPECT_EQ(url.serialize_path(), "/my/file/test%23file.txt");
|
||||
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/my/file/test#file.txt");
|
||||
EXPECT(!url.query().has_value());
|
||||
EXPECT(!url.fragment().has_value());
|
||||
}
|
||||
|
@ -330,7 +331,8 @@ TEST_CASE(unicode)
|
|||
{
|
||||
URL::URL url { "http://example.com/_ünicöde_téxt_©"sv };
|
||||
EXPECT(url.is_valid());
|
||||
EXPECT_EQ(url.serialize_path(), "/_ünicöde_téxt_©");
|
||||
EXPECT_EQ(url.serialize_path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
|
||||
EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/_ünicöde_téxt_©");
|
||||
EXPECT(!url.query().has_value());
|
||||
EXPECT(!url.fragment().has_value());
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/foo%C2%91%91
|
|
@ -0,0 +1,8 @@
|
|||
<a id="anchor" href="http://ladybird.org/foo%C2%91%91"></a>
|
||||
<script src="../include.js"></script>
|
||||
<script>
|
||||
test(() => {
|
||||
const anchorElement = document.getElementById('anchor');
|
||||
println(anchorElement.pathname);
|
||||
})
|
||||
</script>
|
|
@ -236,12 +236,12 @@ bool is_special_scheme(StringView scheme)
|
|||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#url-path-serializer
|
||||
ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const
|
||||
String URL::serialize_path() const
|
||||
{
|
||||
// 1. If url has an opaque path, then return url’s path.
|
||||
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
|
||||
if (cannot_be_a_base_url())
|
||||
return m_data->paths[0].to_byte_string();
|
||||
return m_data->paths[0];
|
||||
|
||||
// 2. Let output be the empty string.
|
||||
StringBuilder output;
|
||||
|
@ -249,11 +249,11 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons
|
|||
// 3. For each segment of url’s path: append U+002F (/) followed by segment to output.
|
||||
for (auto const& segment : m_data->paths) {
|
||||
output.append('/');
|
||||
output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string());
|
||||
output.append(segment);
|
||||
}
|
||||
|
||||
// 4. Return output.
|
||||
return output.to_byte_string();
|
||||
return output.to_string_without_validation();
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-url-serializer
|
||||
|
|
|
@ -53,11 +53,6 @@ using IPv6Address = Array<u16, 8>;
|
|||
// but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
|
||||
using Host = Variant<IPv4Address, IPv6Address, String, Empty>;
|
||||
|
||||
enum class ApplyPercentDecoding {
|
||||
Yes,
|
||||
No
|
||||
};
|
||||
|
||||
// https://w3c.github.io/FileAPI/#blob-url-entry
|
||||
// NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
|
||||
struct BlobURLEntry {
|
||||
|
@ -161,7 +156,7 @@ public:
|
|||
m_data->paths.append(String {});
|
||||
}
|
||||
|
||||
ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
|
||||
String serialize_path() const;
|
||||
ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
|
||||
ByteString serialize_for_display() const;
|
||||
ByteString to_byte_string() const { return serialize(); }
|
||||
|
|
|
@ -373,12 +373,10 @@ void DOMURL::set_port(String const& port)
|
|||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#dom-url-pathname
|
||||
WebIDL::ExceptionOr<String> DOMURL::pathname() const
|
||||
String DOMURL::pathname() const
|
||||
{
|
||||
auto& vm = realm().vm();
|
||||
|
||||
// The pathname getter steps are to return the result of URL path serializing this’s URL.
|
||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_path(URL::ApplyPercentDecoding::No)));
|
||||
return m_url.serialize_path();
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
|
||||
|
|
|
@ -55,7 +55,7 @@ public:
|
|||
WebIDL::ExceptionOr<String> port() const;
|
||||
void set_port(String const&);
|
||||
|
||||
WebIDL::ExceptionOr<String> pathname() const;
|
||||
String pathname() const;
|
||||
void set_pathname(String const&);
|
||||
|
||||
Optional<String> const& fragment() const { return m_url.fragment(); }
|
||||
|
|
|
@ -291,10 +291,8 @@ String HTMLHyperlinkElementUtils::pathname() const
|
|||
if (!m_url.has_value())
|
||||
return String {};
|
||||
|
||||
// 4. If url's cannot-be-a-base-URL is true, then return url's path[0].
|
||||
// 5. If url's path is empty, then return the empty string.
|
||||
// 6. Return "/", followed by the strings in url's path (including empty strings), separated from each other by "/".
|
||||
return MUST(String::from_byte_string(m_url->serialize_path()));
|
||||
// 4. Return the result of URL path serializing url.
|
||||
return m_url->serialize_path();
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname
|
||||
|
|
|
@ -248,15 +248,13 @@ WebIDL::ExceptionOr<void> Location::set_port(String const&)
|
|||
// https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname
|
||||
WebIDL::ExceptionOr<String> Location::pathname() const
|
||||
{
|
||||
auto& vm = this->vm();
|
||||
|
||||
// 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException.
|
||||
auto const relevant_document = this->relevant_document();
|
||||
if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin()))
|
||||
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
|
||||
|
||||
// 2. Return the result of URL path serializing this Location object's url.
|
||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_path()));
|
||||
return url().serialize_path();
|
||||
}
|
||||
|
||||
WebIDL::ExceptionOr<void> Location::set_pathname(String const&)
|
||||
|
|
|
@ -92,11 +92,10 @@ WebIDL::ExceptionOr<String> WorkerLocation::port() const
|
|||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname
|
||||
WebIDL::ExceptionOr<String> WorkerLocation::pathname() const
|
||||
String WorkerLocation::pathname() const
|
||||
{
|
||||
auto& vm = realm().vm();
|
||||
// The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url.
|
||||
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_path()));
|
||||
return m_global_scope->url().serialize_path();
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search
|
||||
|
|
|
@ -24,7 +24,7 @@ public:
|
|||
WebIDL::ExceptionOr<String> host() const;
|
||||
WebIDL::ExceptionOr<String> hostname() const;
|
||||
WebIDL::ExceptionOr<String> port() const;
|
||||
WebIDL::ExceptionOr<String> pathname() const;
|
||||
String pathname() const;
|
||||
WebIDL::ExceptionOr<String> search() const;
|
||||
WebIDL::ExceptionOr<String> hash() const;
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ ErrorOr<String> load_error_page(URL::URL const& url, StringView error_message)
|
|||
ErrorOr<String> load_file_directory_page(URL::URL const& url)
|
||||
{
|
||||
// Generate HTML contents entries table
|
||||
auto lexical_path = LexicalPath(url.serialize_path());
|
||||
auto lexical_path = LexicalPath(URL::percent_decode(url.serialize_path()));
|
||||
Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir);
|
||||
Vector<ByteString> names;
|
||||
while (dt.has_next())
|
||||
|
|
|
@ -102,7 +102,7 @@ void Resource::did_load(Badge<ResourceLoader>, ReadonlyBytes data, HTTP::HeaderM
|
|||
if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) {
|
||||
m_mime_type = "text/plain";
|
||||
} else {
|
||||
m_mime_type = Core::guess_mime_type_based_on_filename(url().serialize_path());
|
||||
m_mime_type = Core::guess_mime_type_based_on_filename(URL::percent_decode(url().serialize_path()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -311,7 +311,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
|||
}
|
||||
|
||||
auto data = resource.value()->data();
|
||||
auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time());
|
||||
auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time());
|
||||
|
||||
log_success(request);
|
||||
success_callback(data, response_headers, {});
|
||||
|
@ -328,7 +328,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
|||
return;
|
||||
}
|
||||
|
||||
FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
|
||||
FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr<i32> file_or_error) {
|
||||
--m_pending_loads;
|
||||
if (on_load_counter_change)
|
||||
on_load_counter_change();
|
||||
|
@ -376,7 +376,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
|
|||
}
|
||||
|
||||
auto data = maybe_data.release_value();
|
||||
auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime);
|
||||
auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime);
|
||||
|
||||
log_success(request);
|
||||
success_callback(data, response_headers, {});
|
||||
|
|
|
@ -26,7 +26,7 @@ ByteString ConnectionInfo::resource_name() const
|
|||
// The "resource-name" can be constructed by concatenating the following:
|
||||
StringBuilder builder;
|
||||
// "/" if the path component is empty
|
||||
auto path = m_url.serialize_path();
|
||||
auto path = URL::percent_decode(m_url.serialize_path());
|
||||
if (path.is_empty())
|
||||
builder.append('/');
|
||||
// The path component
|
||||
|
|
|
@ -269,7 +269,7 @@ String CookieJar::default_path(const URL::URL& url)
|
|||
// https://tools.ietf.org/html/rfc6265#section-5.1.4
|
||||
|
||||
// 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise).
|
||||
auto uri_path = url.serialize_path();
|
||||
auto uri_path = URL::percent_decode(url.serialize_path());
|
||||
|
||||
// 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps.
|
||||
if (uri_path.is_empty() || (uri_path[0] != '/'))
|
||||
|
@ -283,6 +283,7 @@ String CookieJar::default_path(const URL::URL& url)
|
|||
return "/"_string;
|
||||
|
||||
// 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/").
|
||||
// FIXME: The path might not be valid UTF-8.
|
||||
return MUST(String::from_utf8(uri_path.substring_view(0, last_separator)));
|
||||
}
|
||||
|
||||
|
|
|
@ -380,7 +380,7 @@ static ErrorOr<TestResult> run_ref_test(HeadlessWebContentView& view, URL::URL c
|
|||
|
||||
if (dump_failed_ref_tests) {
|
||||
warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url);
|
||||
auto title = LexicalPath::title(url.serialize_path());
|
||||
auto title = LexicalPath::title(URL::percent_decode(url.serialize_path()));
|
||||
auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr<void> {
|
||||
auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write));
|
||||
auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap));
|
||||
|
|
|
@ -371,7 +371,7 @@ static auto parse(StringView contents)
|
|||
if (url.scheme() != "file")
|
||||
return Error::from_string_literal("NYI: Nonlocal entity");
|
||||
|
||||
auto file = TRY(Core::File::open(url.serialize_path(), Core::File::OpenMode::Read));
|
||||
auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
|
||||
return ByteString::copy(TRY(file->read_until_eof()));
|
||||
},
|
||||
},
|
||||
|
@ -440,7 +440,7 @@ static void do_run_tests(XML::Document& document)
|
|||
continue;
|
||||
}
|
||||
|
||||
auto file_path = url.serialize_path();
|
||||
auto file_path = URL::percent_decode(url.serialize_path());
|
||||
auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read);
|
||||
if (file_result.is_error()) {
|
||||
warnln("Read error for {}: {}", file_path, file_result.error());
|
||||
|
|
Loading…
Reference in a new issue