diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp
index 887ebd9e494..e57ba732dd1 100644
--- a/Tests/LibURL/TestURL.cpp
+++ b/Tests/LibURL/TestURL.cpp
@@ -159,7 +159,8 @@ TEST_CASE(file_url_with_encoded_characters)
URL::URL url("file:///my/file/test%23file.txt"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.scheme(), "file");
- EXPECT_EQ(url.serialize_path(), "/my/file/test#file.txt");
+ EXPECT_EQ(url.serialize_path(), "/my/file/test%23file.txt");
+ EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/my/file/test#file.txt");
EXPECT(!url.query().has_value());
EXPECT(!url.fragment().has_value());
}
@@ -330,7 +331,8 @@ TEST_CASE(unicode)
{
URL::URL url { "http://example.com/_ünicöde_téxt_©"sv };
EXPECT(url.is_valid());
- EXPECT_EQ(url.serialize_path(), "/_ünicöde_téxt_©");
+ EXPECT_EQ(url.serialize_path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
+ EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/_ünicöde_téxt_©");
EXPECT(!url.query().has_value());
EXPECT(!url.fragment().has_value());
}
diff --git a/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt b/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt
new file mode 100644
index 00000000000..9f10028bee2
--- /dev/null
+++ b/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt
@@ -0,0 +1 @@
+ /foo%C2%91%91
diff --git a/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html b/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html
new file mode 100644
index 00000000000..d9724b3fa0d
--- /dev/null
+++ b/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html
@@ -0,0 +1,8 @@
+
+
+
diff --git a/Userland/Libraries/LibURL/URL.cpp b/Userland/Libraries/LibURL/URL.cpp
index 352abcee80e..b22a3c025e3 100644
--- a/Userland/Libraries/LibURL/URL.cpp
+++ b/Userland/Libraries/LibURL/URL.cpp
@@ -236,12 +236,12 @@ bool is_special_scheme(StringView scheme)
}
// https://url.spec.whatwg.org/#url-path-serializer
-ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const
+String URL::serialize_path() const
{
// 1. If url has an opaque path, then return url’s path.
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
if (cannot_be_a_base_url())
- return m_data->paths[0].to_byte_string();
+ return m_data->paths[0];
// 2. Let output be the empty string.
StringBuilder output;
@@ -249,11 +249,11 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons
// 3. For each segment of url’s path: append U+002F (/) followed by segment to output.
for (auto const& segment : m_data->paths) {
output.append('/');
- output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string());
+ output.append(segment);
}
// 4. Return output.
- return output.to_byte_string();
+ return output.to_string_without_validation();
}
// https://url.spec.whatwg.org/#concept-url-serializer
diff --git a/Userland/Libraries/LibURL/URL.h b/Userland/Libraries/LibURL/URL.h
index fa4afcd9655..9590ade30be 100644
--- a/Userland/Libraries/LibURL/URL.h
+++ b/Userland/Libraries/LibURL/URL.h
@@ -53,11 +53,6 @@ using IPv6Address = Array;
// but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
using Host = Variant;
-enum class ApplyPercentDecoding {
- Yes,
- No
-};
-
// https://w3c.github.io/FileAPI/#blob-url-entry
// NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
struct BlobURLEntry {
@@ -161,7 +156,7 @@ public:
m_data->paths.append(String {});
}
- ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
+ String serialize_path() const;
ByteString serialize(ExcludeFragment = ExcludeFragment::No) const;
ByteString serialize_for_display() const;
ByteString to_byte_string() const { return serialize(); }
diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp
index ab2bf80943a..457712d7ed3 100644
--- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp
+++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp
@@ -373,12 +373,10 @@ void DOMURL::set_port(String const& port)
}
// https://url.spec.whatwg.org/#dom-url-pathname
-WebIDL::ExceptionOr DOMURL::pathname() const
+String DOMURL::pathname() const
{
- auto& vm = realm().vm();
-
// The pathname getter steps are to return the result of URL path serializing this’s URL.
- return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_path(URL::ApplyPercentDecoding::No)));
+ return m_url.serialize_path();
}
// https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0
diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h
index e12f47fb64b..a25c9e431a6 100644
--- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h
+++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h
@@ -55,7 +55,7 @@ public:
WebIDL::ExceptionOr port() const;
void set_port(String const&);
- WebIDL::ExceptionOr pathname() const;
+ String pathname() const;
void set_pathname(String const&);
Optional const& fragment() const { return m_url.fragment(); }
diff --git a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp
index 69bc15a1eb7..816fa17a3c4 100644
--- a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp
+++ b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp
@@ -291,10 +291,8 @@ String HTMLHyperlinkElementUtils::pathname() const
if (!m_url.has_value())
return String {};
- // 4. If url's cannot-be-a-base-URL is true, then return url's path[0].
- // 5. If url's path is empty, then return the empty string.
- // 6. Return "/", followed by the strings in url's path (including empty strings), separated from each other by "/".
- return MUST(String::from_byte_string(m_url->serialize_path()));
+ // 4. Return the result of URL path serializing url.
+ return m_url->serialize_path();
}
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname
diff --git a/Userland/Libraries/LibWeb/HTML/Location.cpp b/Userland/Libraries/LibWeb/HTML/Location.cpp
index f8feb67ed16..0b3f399ef0c 100644
--- a/Userland/Libraries/LibWeb/HTML/Location.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Location.cpp
@@ -248,15 +248,13 @@ WebIDL::ExceptionOr Location::set_port(String const&)
// https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname
WebIDL::ExceptionOr Location::pathname() const
{
- auto& vm = this->vm();
-
// 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException.
auto const relevant_document = this->relevant_document();
if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin()))
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
// 2. Return the result of URL path serializing this Location object's url.
- return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_path()));
+ return url().serialize_path();
}
WebIDL::ExceptionOr Location::set_pathname(String const&)
diff --git a/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp b/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp
index 418a20f1310..4e25c696188 100644
--- a/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp
+++ b/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp
@@ -92,11 +92,10 @@ WebIDL::ExceptionOr WorkerLocation::port() const
}
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname
-WebIDL::ExceptionOr WorkerLocation::pathname() const
+String WorkerLocation::pathname() const
{
- auto& vm = realm().vm();
// The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url.
- return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_path()));
+ return m_global_scope->url().serialize_path();
}
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search
diff --git a/Userland/Libraries/LibWeb/HTML/WorkerLocation.h b/Userland/Libraries/LibWeb/HTML/WorkerLocation.h
index 1d7bf75f7d0..c403a07db03 100644
--- a/Userland/Libraries/LibWeb/HTML/WorkerLocation.h
+++ b/Userland/Libraries/LibWeb/HTML/WorkerLocation.h
@@ -24,7 +24,7 @@ public:
WebIDL::ExceptionOr host() const;
WebIDL::ExceptionOr hostname() const;
WebIDL::ExceptionOr port() const;
- WebIDL::ExceptionOr pathname() const;
+ String pathname() const;
WebIDL::ExceptionOr search() const;
WebIDL::ExceptionOr hash() const;
diff --git a/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp b/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp
index 568dbd02ac0..f277fcd41c2 100644
--- a/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp
+++ b/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp
@@ -42,7 +42,7 @@ ErrorOr load_error_page(URL::URL const& url, StringView error_message)
ErrorOr load_file_directory_page(URL::URL const& url)
{
// Generate HTML contents entries table
- auto lexical_path = LexicalPath(url.serialize_path());
+ auto lexical_path = LexicalPath(URL::percent_decode(url.serialize_path()));
Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir);
Vector names;
while (dt.has_next())
diff --git a/Userland/Libraries/LibWeb/Loader/Resource.cpp b/Userland/Libraries/LibWeb/Loader/Resource.cpp
index 3d54f3c5b34..29a3c27afc7 100644
--- a/Userland/Libraries/LibWeb/Loader/Resource.cpp
+++ b/Userland/Libraries/LibWeb/Loader/Resource.cpp
@@ -102,7 +102,7 @@ void Resource::did_load(Badge, ReadonlyBytes data, HTTP::HeaderM
if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) {
m_mime_type = "text/plain";
} else {
- m_mime_type = Core::guess_mime_type_based_on_filename(url().serialize_path());
+ m_mime_type = Core::guess_mime_type_based_on_filename(URL::percent_decode(url().serialize_path()));
}
}
diff --git a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp
index 6a6f66ff29f..df6c30e50a1 100644
--- a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp
+++ b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp
@@ -311,7 +311,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
}
auto data = resource.value()->data();
- auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time());
+ auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time());
log_success(request);
success_callback(data, response_headers, {});
@@ -328,7 +328,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
return;
}
- FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr file_or_error) {
+ FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr file_or_error) {
--m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
@@ -376,7 +376,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback
}
auto data = maybe_data.release_value();
- auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime);
+ auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime);
log_success(request);
success_callback(data, response_headers, {});
diff --git a/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp b/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp
index 5a8acf1ed80..8082667f446 100644
--- a/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp
+++ b/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp
@@ -26,7 +26,7 @@ ByteString ConnectionInfo::resource_name() const
// The "resource-name" can be constructed by concatenating the following:
StringBuilder builder;
// "/" if the path component is empty
- auto path = m_url.serialize_path();
+ auto path = URL::percent_decode(m_url.serialize_path());
if (path.is_empty())
builder.append('/');
// The path component
diff --git a/Userland/Libraries/LibWebView/CookieJar.cpp b/Userland/Libraries/LibWebView/CookieJar.cpp
index 91c4bd44aa5..c5cdfeb04c8 100644
--- a/Userland/Libraries/LibWebView/CookieJar.cpp
+++ b/Userland/Libraries/LibWebView/CookieJar.cpp
@@ -269,7 +269,7 @@ String CookieJar::default_path(const URL::URL& url)
// https://tools.ietf.org/html/rfc6265#section-5.1.4
// 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise).
- auto uri_path = url.serialize_path();
+ auto uri_path = URL::percent_decode(url.serialize_path());
// 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps.
if (uri_path.is_empty() || (uri_path[0] != '/'))
@@ -283,6 +283,7 @@ String CookieJar::default_path(const URL::URL& url)
return "/"_string;
// 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/").
+ // FIXME: The path might not be valid UTF-8.
return MUST(String::from_utf8(uri_path.substring_view(0, last_separator)));
}
diff --git a/Userland/Utilities/headless-browser.cpp b/Userland/Utilities/headless-browser.cpp
index 060eac3929c..b236c2518c4 100644
--- a/Userland/Utilities/headless-browser.cpp
+++ b/Userland/Utilities/headless-browser.cpp
@@ -380,7 +380,7 @@ static ErrorOr run_ref_test(HeadlessWebContentView& view, URL::URL c
if (dump_failed_ref_tests) {
warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url);
- auto title = LexicalPath::title(url.serialize_path());
+ auto title = LexicalPath::title(URL::percent_decode(url.serialize_path()));
auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr {
auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write));
auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap));
diff --git a/Userland/Utilities/xml.cpp b/Userland/Utilities/xml.cpp
index 9b1eae56049..319c0bcfe2e 100644
--- a/Userland/Utilities/xml.cpp
+++ b/Userland/Utilities/xml.cpp
@@ -371,7 +371,7 @@ static auto parse(StringView contents)
if (url.scheme() != "file")
return Error::from_string_literal("NYI: Nonlocal entity");
- auto file = TRY(Core::File::open(url.serialize_path(), Core::File::OpenMode::Read));
+ auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
return ByteString::copy(TRY(file->read_until_eof()));
},
},
@@ -440,7 +440,7 @@ static void do_run_tests(XML::Document& document)
continue;
}
- auto file_path = url.serialize_path();
+ auto file_path = URL::percent_decode(url.serialize_path());
auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read);
if (file_result.is_error()) {
warnln("Read error for {}: {}", file_path, file_result.error());