Browse Source

LibWeb+LibURL: Consolidate Origin parsing and serialization into LibURL

Because of the previous awkward factoring of Origin we had two
implementations of Origin serializing and creation. Move the
implementation of DOMURL::url_origin into URL::origin, and
instead use the implemenation of URL::Origin::serialize for
serialization (replacing URL::serialize_origin).

This happens to fix 8 URL subtests as the two implemenations had
diverged, and URL::serialize_origin was previously missing the spec
changes of: whatwg/url@eee49fd and whatwg/url@fff33c3
Shannon Booth 9 months ago
parent
commit
501f92b54e

+ 1 - 0
Tests/LibWeb/Text/expected/URL/url-origin-for-blob-url-with-inner-ftp-scheme.txt

@@ -0,0 +1 @@
+null

+ 7 - 0
Tests/LibWeb/Text/input/URL/url-origin-for-blob-url-with-inner-ftp-scheme.html

@@ -0,0 +1,7 @@
+<a id="a" href='blob:ftp://host/path'>
+<script src="../include.js"></script>
+<script>
+    test(() => {
+        println(document.getElementById("a").origin);
+    })
+</script>

+ 43 - 21
Userland/Libraries/LibURL/URL.cpp

@@ -352,32 +352,54 @@ ErrorOr<String> URL::to_string() const
     return String::from_byte_string(serialize());
 }
 
-// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
 // https://url.spec.whatwg.org/#concept-url-origin
-ByteString URL::serialize_origin() const
+Origin URL::origin() const
 {
-    VERIFY(m_data->valid);
+    // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
+    // -> "blob"
+    if (scheme() == "blob"sv) {
+        auto url_string = to_string().release_value_but_fixme_should_propagate_errors();
+
+        // 1. If url’s blob URL entry is non-null, then return url’s blob URL entry’s environment’s origin.
+        if (blob_url_entry().has_value())
+            return blob_url_entry()->environment_origin;
+
+        // 2. Let pathURL be the result of parsing the result of URL path serializing url.
+        auto path_url = Parser::basic_parse(serialize_path());
 
-    if (m_data->scheme == "blob"sv) {
-        // TODO: 1. If URL’s blob URL entry is non-null, then return URL’s blob URL entry’s environment’s origin.
-        // 2. Let url be the result of parsing URL’s path[0].
-        VERIFY(!m_data->paths.is_empty());
-        URL url = m_data->paths[0];
-        // 3. Return a new opaque origin, if url is failure, and url’s origin otherwise.
-        if (!url.is_valid())
-            return "null";
-        return url.serialize_origin();
-    } else if (!m_data->scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
-        return "null";
+        // 3. If pathURL is failure, then return a new opaque origin.
+        if (!path_url.is_valid())
+            return Origin {};
+
+        // 4. If pathURL’s scheme is "http", "https", or "file", then return pathURL’s origin.
+        if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
+            return path_url.origin();
+
+        // 5. Return a new opaque origin.
+        return Origin {};
     }
 
-    StringBuilder builder;
-    builder.append(m_data->scheme);
-    builder.append("://"sv);
-    builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
-    if (m_data->port.has_value())
-        builder.appendff(":{}", *m_data->port);
-    return builder.to_byte_string();
+    // -> "ftp"
+    // -> "http"
+    // -> "https"
+    // -> "ws"
+    // -> "wss"
+    if (scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
+        // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
+        return Origin(scheme().to_byte_string(), host(), port().value_or(0));
+    }
+
+    // -> "file"
+    // AD-HOC: Our resource:// is basically an alias to file://
+    if (scheme() == "file"sv || scheme() == "resource"sv) {
+        // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
+        // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
+        return Origin(scheme().to_byte_string(), String {}, 0);
+    }
+
+    // -> Otherwise
+    // Return a new opaque origin.
+    return Origin {};
 }
 
 bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const

+ 1 - 2
Userland/Libraries/LibURL/URL.h

@@ -122,8 +122,7 @@ public:
     ByteString to_byte_string() const { return serialize(); }
     ErrorOr<String> to_string() const;
 
-    // HTML origin
-    ByteString serialize_origin() const;
+    Origin origin() const;
 
     bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const;
 

+ 2 - 54
Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp

@@ -141,7 +141,7 @@ WebIDL::ExceptionOr<void> DOMURL::revoke_object_url(JS::VM& vm, StringView url)
         return {};
 
     // 3. Let origin be the origin of url record.
-    auto origin = url_origin(url_record);
+    auto origin = url_record.origin();
 
     // 4. Let settings be the current settings object.
     auto& settings = HTML::current_settings_object();
@@ -218,7 +218,7 @@ WebIDL::ExceptionOr<String> DOMURL::origin() const
     auto& vm = realm().vm();
 
     // The origin getter steps are to return the serialization of this’s URL’s origin. [HTML]
-    return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_origin()));
+    return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.origin().serialize()));
 }
 
 // https://url.spec.whatwg.org/#dom-url-protocol
@@ -478,58 +478,6 @@ void DOMURL::set_hash(String const& hash)
     (void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
 }
 
-// https://url.spec.whatwg.org/#concept-url-origin
-URL::Origin url_origin(URL::URL const& url)
-{
-    // FIXME: We should probably have an extended version of URL::URL for LibWeb instead of standalone functions like this.
-
-    // The origin of a URL url is the origin returned by running these steps, switching on url’s scheme:
-    // -> "blob"
-    if (url.scheme() == "blob"sv) {
-        auto url_string = url.to_string().release_value_but_fixme_should_propagate_errors();
-
-        // 1. If url’s blob URL entry is non-null, then return url’s blob URL entry’s environment’s origin.
-        if (url.blob_url_entry().has_value())
-            return url.blob_url_entry()->environment_origin;
-
-        // 2. Let pathURL be the result of parsing the result of URL path serializing url.
-        auto path_url = parse(url.serialize_path());
-
-        // 3. If pathURL is failure, then return a new opaque origin.
-        if (!path_url.is_valid())
-            return URL::Origin {};
-
-        // 4. If pathURL’s scheme is "http", "https", or "file", then return pathURL’s origin.
-        if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
-            return url_origin(path_url);
-
-        // 5. Return a new opaque origin.
-        return URL::Origin {};
-    }
-
-    // -> "ftp"
-    // -> "http"
-    // -> "https"
-    // -> "ws"
-    // -> "wss"
-    if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
-        // Return the tuple origin (url’s scheme, url’s host, url’s port, null).
-        return URL::Origin(url.scheme().to_byte_string(), url.host(), url.port().value_or(0));
-    }
-
-    // -> "file"
-    // AD-HOC: Our resource:// is basically an alias to file://
-    if (url.scheme() == "file"sv || url.scheme() == "resource"sv) {
-        // Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
-        // Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
-        return URL::Origin(url.scheme().to_byte_string(), String {}, 0);
-    }
-
-    // -> Otherwise
-    // Return a new opaque origin.
-    return URL::Origin {};
-}
-
 // https://url.spec.whatwg.org/#concept-domain
 bool host_is_domain(URL::Host const& host)
 {

+ 0 - 1
Userland/Libraries/LibWeb/DOMURL/DOMURL.h

@@ -92,7 +92,6 @@ private:
     JS::NonnullGCPtr<URLSearchParams> m_query;
 };
 
-URL::Origin url_origin(URL::URL const&);
 bool host_is_domain(URL::Host const&);
 
 // https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path

+ 1 - 1
Userland/Libraries/LibWeb/Fetch/Fetching/Checks.cpp

@@ -70,7 +70,7 @@ bool tao_check(Infrastructure::Request const& request, Infrastructure::Response
     //       information, but the container document would not.
     if (request.mode() == Infrastructure::Request::Mode::Navigate
         && request.origin().has<URL::Origin>()
-        && !DOMURL::url_origin(request.current_url()).is_same_origin(request.origin().get<URL::Origin>())) {
+        && !request.current_url().origin().is_same_origin(request.origin().get<URL::Origin>())) {
         return false;
     }
 

+ 4 - 4
Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp

@@ -353,7 +353,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> main_fetch(JS::Realm& realm, Inf
         // -> request’s current URL’s scheme is "data"
         // -> request’s mode is "navigate" or "websocket"
         else if (
-            (request->origin().has<URL::Origin>() && DOMURL::url_origin(request->current_url()).is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic)
+            (request->origin().has<URL::Origin>() && request->current_url().origin().is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic)
             || request->current_url().scheme() == "data"sv
             || (request->mode() == Infrastructure::Request::Mode::Navigate || request->mode() == Infrastructure::Request::Mode::WebSocket)) {
             // 1. Set request’s response tainting to "basic".
@@ -1201,7 +1201,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
     if (request->mode() == Infrastructure::Request::Mode::CORS
         && location_url.includes_credentials()
         && request->origin().has<URL::Origin>()
-        && !request->origin().get<URL::Origin>().is_same_origin(DOMURL::url_origin(location_url))) {
+        && !request->origin().get<URL::Origin>().is_same_origin(location_url.origin())) {
         return PendingResponse::create(vm, request, Infrastructure::Response::network_error(vm, "Request with 'cors' mode and different URL and request origin must not include credentials in redirect URL"sv));
     }
 
@@ -1244,7 +1244,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
     // 13. If request’s current URL’s origin is not same origin with locationURL’s origin, then for each headerName of
     //     CORS non-wildcard request-header name, delete headerName from request’s header list.
     // NOTE: I.e., the moment another origin is seen after the initial request, the `Authorization` header is removed.
-    if (!DOMURL::url_origin(request->current_url()).is_same_origin(DOMURL::url_origin(location_url))) {
+    if (!request->current_url().origin().is_same_origin(location_url.origin())) {
         static constexpr Array cors_non_wildcard_request_header_names {
             "Authorization"sv
         };
@@ -2578,7 +2578,7 @@ void set_sec_fetch_site_header(Infrastructure::Request& request)
     if (!header_value.equals_ignoring_ascii_case("none"sv)) {
         for (auto& url : request.url_list()) {
             // 1. If url is same origin with r’s origin, continue.
-            if (DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(request.current_url())))
+            if (url.origin().is_same_origin(request.current_url().origin()))
                 continue;
 
             // 2. Set header’s value to cross-site.

+ 4 - 4
Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.cpp

@@ -173,8 +173,8 @@ bool Request::has_redirect_tainted_origin() const
 
         // 2. If url’s origin is not same origin with lastURL’s origin and request’s origin is not same origin with lastURL’s origin, then return true.
         auto const* request_origin = m_origin.get_pointer<URL::Origin>();
-        if (!DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(*last_url))
-            && (request_origin == nullptr || !request_origin->is_same_origin(DOMURL::url_origin(*last_url)))) {
+        if (!url.origin().is_same_origin(last_url->origin())
+            && (request_origin == nullptr || !request_origin->is_same_origin(last_url->origin()))) {
             return true;
         }
 
@@ -328,7 +328,7 @@ void Request::add_origin_header()
             case ReferrerPolicy::ReferrerPolicy::SameOrigin:
                 // If request’s origin is not same origin with request’s current URL’s origin, then set serializedOrigin
                 // to `null`.
-                if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(DOMURL::url_origin(current_url())))
+                if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(current_url().origin()))
                     serialized_origin = MUST(ByteBuffer::copy("null"sv.bytes()));
                 break;
             // -> Otherwise
@@ -368,7 +368,7 @@ bool Request::cross_origin_embedder_policy_allows_credentials() const
     if (request_origin == nullptr)
         return false;
 
-    return request_origin->is_same_origin(DOMURL::url_origin(current_url())) && !has_redirect_tainted_origin();
+    return request_origin->is_same_origin(current_url().origin()) && !has_redirect_tainted_origin();
 }
 
 StringView request_destination_to_string(Request::Destination destination)

+ 1 - 1
Userland/Libraries/LibWeb/Fetch/Request.cpp

@@ -309,7 +309,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Request>> Request::construct_impl(JS::Realm
             // - parsedReferrer’s scheme is "about" and path is the string "client"
             // - parsedReferrer’s origin is not same origin with origin
             // then set request’s referrer to "client".
-            auto parsed_referrer_origin = DOMURL::url_origin(parsed_referrer);
+            auto parsed_referrer_origin = parsed_referrer.origin();
             if ((parsed_referrer.scheme() == "about"sv && parsed_referrer.paths().size() == 1 && parsed_referrer.paths()[0] == "client"sv)
                 || !parsed_referrer_origin.is_same_origin(origin)) {
                 request->set_referrer(Infrastructure::Request::Referrer::Client);

+ 1 - 1
Userland/Libraries/LibWeb/HTML/BrowsingContext.cpp

@@ -85,7 +85,7 @@ URL::Origin determine_the_origin(Optional<URL::URL> const& url, SandboxingFlagSe
         return source_origin.release_value();
 
     // 5. Return url's origin.
-    return DOMURL::url_origin(*url);
+    return url->origin();
 }
 
 // https://html.spec.whatwg.org/multipage/document-sequences.html#creating-a-new-auxiliary-browsing-context

+ 1 - 1
Userland/Libraries/LibWeb/HTML/EventSource.cpp

@@ -449,7 +449,7 @@ void EventSource::dispatch_the_event()
     //    the value of the event type buffer.
     MessageEventInit init {};
     init.data = JS::PrimitiveString::create(vm(), data_buffer);
-    init.origin = MUST(String::from_byte_string(m_url.serialize_origin()));
+    init.origin = MUST(String::from_byte_string(m_url.origin().serialize()));
     init.last_event_id = last_event_id;
 
     auto type = m_event_type.is_empty() ? HTML::EventNames::message : m_event_type;

+ 1 - 1
Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp

@@ -50,7 +50,7 @@ String HTMLHyperlinkElementUtils::origin() const
         return String {};
 
     // 3. Return the serialization of this element's url's origin.
-    return MUST(String::from_byte_string(m_url->serialize_origin()));
+    return MUST(String::from_byte_string(m_url->origin().serialize()));
 }
 
 // https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-protocol

+ 1 - 1
Userland/Libraries/LibWeb/HTML/Location.cpp

@@ -139,7 +139,7 @@ WebIDL::ExceptionOr<String> Location::origin() const
         return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
 
     // 2. Return the serialization of this's url's origin.
-    return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_origin()));
+    return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().origin().serialize()));
 }
 
 // https://html.spec.whatwg.org/multipage/history.html#dom-location-protocol

+ 1 - 1
Userland/Libraries/LibWeb/HTML/Window.cpp

@@ -1033,7 +1033,7 @@ WebIDL::ExceptionOr<void> Window::window_post_message_steps(JS::Value message, W
             return WebIDL::SyntaxError::create(target_realm, MUST(String::formatted("Invalid URL for targetOrigin: '{}'", options.target_origin)));
 
         // 3. Set targetOrigin to parsedURL's origin.
-        target_origin = DOMURL::url_origin(parsed_url);
+        target_origin = parsed_url.origin();
     }
 
     // 6. Let transfer be options["transfer"].

+ 1 - 1
Userland/Libraries/LibWeb/HTML/WorkerLocation.cpp

@@ -26,7 +26,7 @@ WebIDL::ExceptionOr<String> WorkerLocation::origin() const
 {
     auto& vm = realm().vm();
     // The origin getter steps are to return the serialization of this's WorkerGlobalScope object's url's origin.
-    return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_origin()));
+    return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().origin().serialize()));
 }
 
 // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-protocol

+ 1 - 1
Userland/Libraries/LibWeb/Internals/Internals.cpp

@@ -143,7 +143,7 @@ void Internals::spoof_current_url(String const& url_string)
 
     VERIFY(url.is_valid());
 
-    auto origin = DOMURL::url_origin(url);
+    auto origin = url.origin();
 
     auto& window = internals_window();
     window.associated_document().set_url(url);

+ 1 - 1
Userland/Libraries/LibWeb/PermissionsPolicy/AutoplayAllowlist.cpp

@@ -82,7 +82,7 @@ ErrorOr<void> AutoplayAllowlist::enable_for_origins(ReadonlySpan<String> origins
             continue;
         }
 
-        TRY(allowlist.try_append(DOMURL::url_origin(url)));
+        TRY(allowlist.try_append(url.origin()));
     }
 
     return {};

+ 3 - 3
Userland/Libraries/LibWeb/ReferrerPolicy/AbstractOperations.cpp

@@ -146,7 +146,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
     case ReferrerPolicy::StrictOriginWhenCrossOrigin:
         // 1. If the origin of referrerURL and the origin of request’s current URL are the same, then return
         //    referrerURL.
-        if (referrer_url.has_value() && DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url())))
+        if (referrer_url.has_value() && referrer_url->origin().is_same_origin(request.current_url().origin()))
             return referrer_url;
 
         // 2. If referrerURL is a potentially trustworthy URL and request’s current URL is not a potentially
@@ -164,7 +164,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
         // 1. If the origin of referrerURL and the origin of request’s current URL are the same, then return
         //    referrerURL.
         if (referrer_url.has_value()
-            && DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) {
+            && referrer_url->origin().is_same_origin(request.current_url().origin())) {
             return referrer_url;
         }
 
@@ -175,7 +175,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
         // 1. If the origin of referrerURL and the origin of request’s current URL are the same, then return
         //    referrerURL.
         if (referrer_url.has_value()
-            && DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) {
+            && referrer_url->origin().is_same_origin(request.current_url().origin())) {
             return referrer_url;
         }
 

+ 1 - 1
Userland/Libraries/LibWeb/SecureContexts/AbstractOperations.cpp

@@ -80,7 +80,7 @@ Trustworthiness is_url_potentially_trustworthy(URL::URL const& url)
         return Trustworthiness::PotentiallyTrustworthy;
 
     // 3. Return the result of executing § 3.1 Is origin potentially trustworthy? on url’s origin.
-    return is_origin_potentially_trustworthy(DOMURL::url_origin(url));
+    return is_origin_potentially_trustworthy(url.origin());
 }
 
 }

+ 1 - 1
Userland/Libraries/LibWeb/StorageAPI/StorageKey.cpp

@@ -36,7 +36,7 @@ StorageKey obtain_a_storage_key_for_non_storage_purposes(HTML::Environment const
         auto& mutable_settings = const_cast<HTML::EnvironmentSettingsObject&>(settings);
         return { mutable_settings.origin() };
     }
-    return { DOMURL::url_origin(environment.creation_url) };
+    return { environment.creation_url.origin() };
 }
 
 }