diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 887ebd9e494..e57ba732dd1 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -159,7 +159,8 @@ TEST_CASE(file_url_with_encoded_characters) URL::URL url("file:///my/file/test%23file.txt"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "file"); - EXPECT_EQ(url.serialize_path(), "/my/file/test#file.txt"); + EXPECT_EQ(url.serialize_path(), "/my/file/test%23file.txt"); + EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/my/file/test#file.txt"); EXPECT(!url.query().has_value()); EXPECT(!url.fragment().has_value()); } @@ -330,7 +331,8 @@ TEST_CASE(unicode) { URL::URL url { "http://example.com/_ünicöde_téxt_©"sv }; EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/_ünicöde_téxt_©"); + EXPECT_EQ(url.serialize_path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9"); + EXPECT_EQ(URL::percent_decode(url.serialize_path()), "/_ünicöde_téxt_©"); EXPECT(!url.query().has_value()); EXPECT(!url.fragment().has_value()); } diff --git a/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt b/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt new file mode 100644 index 00000000000..9f10028bee2 --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/anchor-element-url-invalid-unicode-pathname.txt @@ -0,0 +1 @@ + /foo%C2%91%91 diff --git a/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html b/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html new file mode 100644 index 00000000000..d9724b3fa0d --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/anchor-element-url-invalid-unicode-pathname.html @@ -0,0 +1,8 @@ + + + diff --git a/Userland/Libraries/LibURL/URL.cpp b/Userland/Libraries/LibURL/URL.cpp index 352abcee80e..b22a3c025e3 100644 --- a/Userland/Libraries/LibURL/URL.cpp +++ b/Userland/Libraries/LibURL/URL.cpp @@ -236,12 +236,12 @@ bool is_special_scheme(StringView scheme) } // https://url.spec.whatwg.org/#url-path-serializer -ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) const +String URL::serialize_path() const { // 1. If url has an opaque path, then return url’s path. // FIXME: Reimplement this step once we modernize the URL implementation to meet the spec. if (cannot_be_a_base_url()) - return m_data->paths[0].to_byte_string(); + return m_data->paths[0]; // 2. Let output be the empty string. StringBuilder output; @@ -249,11 +249,11 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons // 3. For each segment of url’s path: append U+002F (/) followed by segment to output. for (auto const& segment : m_data->paths) { output.append('/'); - output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string()); + output.append(segment); } // 4. Return output. - return output.to_byte_string(); + return output.to_string_without_validation(); } // https://url.spec.whatwg.org/#concept-url-serializer diff --git a/Userland/Libraries/LibURL/URL.h b/Userland/Libraries/LibURL/URL.h index fa4afcd9655..9590ade30be 100644 --- a/Userland/Libraries/LibURL/URL.h +++ b/Userland/Libraries/LibURL/URL.h @@ -53,11 +53,6 @@ using IPv6Address = Array; // but it is sometimes used as opaque identifier in URLs where a network address is not necessary. using Host = Variant; -enum class ApplyPercentDecoding { - Yes, - No -}; - // https://w3c.github.io/FileAPI/#blob-url-entry // NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery). struct BlobURLEntry { @@ -161,7 +156,7 @@ public: m_data->paths.append(String {}); } - ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const; + String serialize_path() const; ByteString serialize(ExcludeFragment = ExcludeFragment::No) const; ByteString serialize_for_display() const; ByteString to_byte_string() const { return serialize(); } diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp index ab2bf80943a..457712d7ed3 100644 --- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -373,12 +373,10 @@ void DOMURL::set_port(String const& port) } // https://url.spec.whatwg.org/#dom-url-pathname -WebIDL::ExceptionOr DOMURL::pathname() const +String DOMURL::pathname() const { - auto& vm = realm().vm(); - // The pathname getter steps are to return the result of URL path serializing this’s URL. - return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_path(URL::ApplyPercentDecoding::No))); + return m_url.serialize_path(); } // https://url.spec.whatwg.org/#ref-for-dom-url-pathname%E2%91%A0 diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h index e12f47fb64b..a25c9e431a6 100644 --- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.h +++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.h @@ -55,7 +55,7 @@ public: WebIDL::ExceptionOr port() const; void set_port(String const&); - WebIDL::ExceptionOr pathname() const; + String pathname() const; void set_pathname(String const&); Optional const& fragment() const { return m_url.fragment(); } diff --git a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp index 69bc15a1eb7..816fa17a3c4 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp @@ -291,10 +291,8 @@ String HTMLHyperlinkElementUtils::pathname() const if (!m_url.has_value()) return String {}; - // 4. If url's cannot-be-a-base-URL is true, then return url's path[0]. - // 5. If url's path is empty, then return the empty string. - // 6. Return "/", followed by the strings in url's path (including empty strings), separated from each other by "/". - return MUST(String::from_byte_string(m_url->serialize_path())); + // 4. Return the result of URL path serializing url. + return m_url->serialize_path(); } // https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-pathname diff --git a/Userland/Libraries/LibWeb/HTML/Location.cpp b/Userland/Libraries/LibWeb/HTML/Location.cpp index f8feb67ed16..0b3f399ef0c 100644 --- a/Userland/Libraries/LibWeb/HTML/Location.cpp +++ b/Userland/Libraries/LibWeb/HTML/Location.cpp @@ -248,15 +248,13 @@ WebIDL::ExceptionOr Location::set_port(String const&) // https://html.spec.whatwg.org/multipage/history.html#dom-location-pathname WebIDL::ExceptionOr Location::pathname() const { - auto& vm = this->vm(); - // 1. If this's relevant Document is non-null and its origin is not same origin-domain with the entry settings object's origin, then throw a "SecurityError" DOMException. auto const relevant_document = this->relevant_document(); if (relevant_document && !relevant_document->origin().is_same_origin_domain(entry_settings_object().origin())) return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string); // 2. Return the result of URL path serializing this Location object's url. - return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_path())); + return url().serialize_path(); } WebIDL::ExceptionOr Location::set_pathname(String const&) diff --git a/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp b/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp index 418a20f1310..4e25c696188 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp +++ b/Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp @@ -92,11 +92,10 @@ WebIDL::ExceptionOr WorkerLocation::port() const } // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-pathname -WebIDL::ExceptionOr WorkerLocation::pathname() const +String WorkerLocation::pathname() const { - auto& vm = realm().vm(); // The pathname getter steps are to return the result of URL path serializing this's WorkerGlobalScope object's url. - return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_path())); + return m_global_scope->url().serialize_path(); } // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-search diff --git a/Userland/Libraries/LibWeb/HTML/WorkerLocation.h b/Userland/Libraries/LibWeb/HTML/WorkerLocation.h index 1d7bf75f7d0..c403a07db03 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerLocation.h +++ b/Userland/Libraries/LibWeb/HTML/WorkerLocation.h @@ -24,7 +24,7 @@ public: WebIDL::ExceptionOr host() const; WebIDL::ExceptionOr hostname() const; WebIDL::ExceptionOr port() const; - WebIDL::ExceptionOr pathname() const; + String pathname() const; WebIDL::ExceptionOr search() const; WebIDL::ExceptionOr hash() const; diff --git a/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp b/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp index 568dbd02ac0..f277fcd41c2 100644 --- a/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/GeneratedPagesLoader.cpp @@ -42,7 +42,7 @@ ErrorOr load_error_page(URL::URL const& url, StringView error_message) ErrorOr load_file_directory_page(URL::URL const& url) { // Generate HTML contents entries table - auto lexical_path = LexicalPath(url.serialize_path()); + auto lexical_path = LexicalPath(URL::percent_decode(url.serialize_path())); Core::DirIterator dt(lexical_path.string(), Core::DirIterator::Flags::SkipParentAndBaseDir); Vector names; while (dt.has_next()) diff --git a/Userland/Libraries/LibWeb/Loader/Resource.cpp b/Userland/Libraries/LibWeb/Loader/Resource.cpp index 3d54f3c5b34..29a3c27afc7 100644 --- a/Userland/Libraries/LibWeb/Loader/Resource.cpp +++ b/Userland/Libraries/LibWeb/Loader/Resource.cpp @@ -102,7 +102,7 @@ void Resource::did_load(Badge, ReadonlyBytes data, HTTP::HeaderM if (content_type_options.value_or("").equals_ignoring_ascii_case("nosniff"sv)) { m_mime_type = "text/plain"; } else { - m_mime_type = Core::guess_mime_type_based_on_filename(url().serialize_path()); + m_mime_type = Core::guess_mime_type_based_on_filename(URL::percent_decode(url().serialize_path())); } } diff --git a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp index 6a6f66ff29f..df6c30e50a1 100644 --- a/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp @@ -311,7 +311,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback } auto data = resource.value()->data(); - auto response_headers = response_headers_for_file(url.serialize_path(), resource.value()->modified_time()); + auto response_headers = response_headers_for_file(URL::percent_decode(url.serialize_path()), resource.value()->modified_time()); log_success(request); success_callback(data, response_headers, {}); @@ -328,7 +328,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback return; } - FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr file_or_error) { + FileRequest file_request(URL::percent_decode(url.serialize_path()), [this, success_callback = move(success_callback), error_callback = move(error_callback), request, respond_directory_page](ErrorOr file_or_error) { --m_pending_loads; if (on_load_counter_change) on_load_counter_change(); @@ -376,7 +376,7 @@ void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback } auto data = maybe_data.release_value(); - auto response_headers = response_headers_for_file(request.url().serialize_path(), st_or_error.value().st_mtime); + auto response_headers = response_headers_for_file(URL::percent_decode(request.url().serialize_path()), st_or_error.value().st_mtime); log_success(request); success_callback(data, response_headers, {}); diff --git a/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp b/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp index 5a8acf1ed80..8082667f446 100644 --- a/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp +++ b/Userland/Libraries/LibWebSocket/ConnectionInfo.cpp @@ -26,7 +26,7 @@ ByteString ConnectionInfo::resource_name() const // The "resource-name" can be constructed by concatenating the following: StringBuilder builder; // "/" if the path component is empty - auto path = m_url.serialize_path(); + auto path = URL::percent_decode(m_url.serialize_path()); if (path.is_empty()) builder.append('/'); // The path component diff --git a/Userland/Libraries/LibWebView/CookieJar.cpp b/Userland/Libraries/LibWebView/CookieJar.cpp index 91c4bd44aa5..c5cdfeb04c8 100644 --- a/Userland/Libraries/LibWebView/CookieJar.cpp +++ b/Userland/Libraries/LibWebView/CookieJar.cpp @@ -269,7 +269,7 @@ String CookieJar::default_path(const URL::URL& url) // https://tools.ietf.org/html/rfc6265#section-5.1.4 // 1. Let uri-path be the path portion of the request-uri if such a portion exists (and empty otherwise). - auto uri_path = url.serialize_path(); + auto uri_path = URL::percent_decode(url.serialize_path()); // 2. If the uri-path is empty or if the first character of the uri-path is not a %x2F ("/") character, output %x2F ("/") and skip the remaining steps. if (uri_path.is_empty() || (uri_path[0] != '/')) @@ -283,6 +283,7 @@ String CookieJar::default_path(const URL::URL& url) return "/"_string; // 4. Output the characters of the uri-path from the first character up to, but not including, the right-most %x2F ("/"). + // FIXME: The path might not be valid UTF-8. return MUST(String::from_utf8(uri_path.substring_view(0, last_separator))); } diff --git a/Userland/Utilities/headless-browser.cpp b/Userland/Utilities/headless-browser.cpp index 060eac3929c..b236c2518c4 100644 --- a/Userland/Utilities/headless-browser.cpp +++ b/Userland/Utilities/headless-browser.cpp @@ -380,7 +380,7 @@ static ErrorOr run_ref_test(HeadlessWebContentView& view, URL::URL c if (dump_failed_ref_tests) { warnln("\033[33;1mRef test {} failed; dumping screenshots\033[0m", url); - auto title = LexicalPath::title(url.serialize_path()); + auto title = LexicalPath::title(URL::percent_decode(url.serialize_path())); auto dump_screenshot = [&](Gfx::Bitmap& bitmap, StringView path) -> ErrorOr { auto screenshot_file = TRY(Core::File::open(path, Core::File::OpenMode::Write)); auto encoded_data = TRY(Gfx::PNGWriter::encode(bitmap)); diff --git a/Userland/Utilities/xml.cpp b/Userland/Utilities/xml.cpp index 9b1eae56049..319c0bcfe2e 100644 --- a/Userland/Utilities/xml.cpp +++ b/Userland/Utilities/xml.cpp @@ -371,7 +371,7 @@ static auto parse(StringView contents) if (url.scheme() != "file") return Error::from_string_literal("NYI: Nonlocal entity"); - auto file = TRY(Core::File::open(url.serialize_path(), Core::File::OpenMode::Read)); + auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read)); return ByteString::copy(TRY(file->read_until_eof())); }, }, @@ -440,7 +440,7 @@ static void do_run_tests(XML::Document& document) continue; } - auto file_path = url.serialize_path(); + auto file_path = URL::percent_decode(url.serialize_path()); auto file_result = Core::File::open(file_path, Core::File::OpenMode::Read); if (file_result.is_error()) { warnln("Read error for {}: {}", file_path, file_result.error());