Selaa lähdekoodia

LibURL: Promote Host to a proper class

This lets us move a few Host-related functions (like serialization and
checks for what the Host is) into Host instead of having them dotted
around the codebase.

For now, the interface is still very Variant-like, to avoid having to
change quite so much in one go.
Sam Atkins 8 kuukautta sitten
vanhempi
commit
63688148b9

+ 7 - 0
Libraries/LibIPC/Decoder.cpp

@@ -111,6 +111,13 @@ ErrorOr<URL::Origin> decode(Decoder& decoder)
     return URL::Origin { move(scheme), move(host), port };
 }
 
+template<>
+ErrorOr<URL::Host> decode(Decoder& decoder)
+{
+    auto value = TRY(decoder.decode<URL::Host::VariantType>());
+    return URL::Host { move(value) };
+}
+
 template<>
 ErrorOr<File> decode(Decoder& decoder)
 {

+ 3 - 0
Libraries/LibIPC/Decoder.h

@@ -108,6 +108,9 @@ ErrorOr<URL::URL> decode(Decoder&);
 template<>
 ErrorOr<URL::Origin> decode(Decoder&);
 
+template<>
+ErrorOr<URL::Host> decode(Decoder&);
+
 template<>
 ErrorOr<File> decode(Decoder&);
 

+ 7 - 0
Libraries/LibIPC/Encoder.cpp

@@ -131,6 +131,13 @@ ErrorOr<void> encode(Encoder& encoder, URL::Origin const& origin)
     return {};
 }
 
+template<>
+ErrorOr<void> encode(Encoder& encoder, URL::Host const& host)
+{
+    TRY(encoder.encode(host.value()));
+    return {};
+}
+
 template<>
 ErrorOr<void> encode(Encoder& encoder, File const& file)
 {

+ 3 - 0
Libraries/LibIPC/Encoder.h

@@ -107,6 +107,9 @@ ErrorOr<void> encode(Encoder&, URL::URL const&);
 template<>
 ErrorOr<void> encode(Encoder&, URL::Origin const&);
 
+template<>
+ErrorOr<void> encode(Encoder&, URL::Host const&);
+
 template<>
 ErrorOr<void> encode(Encoder&, File const&);
 

+ 1 - 0
Libraries/LibURL/CMakeLists.txt

@@ -1,6 +1,7 @@
 include(public_suffix)
 
 set(SOURCES
+    Host.cpp
     Origin.cpp
     Parser.cpp
     URL.cpp

+ 155 - 0
Libraries/LibURL/Host.cpp

@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
+ * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
+ * Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibURL/Host.h>
+
+namespace URL {
+
+Host::Host(VariantType&& value)
+    : m_value(move(value))
+{
+}
+
+Host::Host(String&& string)
+    : m_value(move(string))
+{
+}
+
+// https://url.spec.whatwg.org/#concept-ipv4-serializer
+static String serialize_ipv4_address(IPv4Address address)
+{
+    // 1. Let output be the empty string.
+    // NOTE: Array to avoid prepend.
+    Array<u8, 4> output;
+
+    // 2. Let n be the value of address.
+    u32 n = address;
+
+    // 3. For each i in the range 1 to 4, inclusive:
+    for (size_t i = 0; i <= 3; ++i) {
+        // 1. Prepend n % 256, serialized, to output.
+        output[3 - i] = n % 256;
+
+        // 2. If i is not 4, then prepend U+002E (.) to output.
+        // NOTE: done at end
+
+        // 3. Set n to floor(n / 256).
+        n /= 256;
+    }
+
+    // 4. Return output.
+    return MUST(String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3]));
+}
+
+// https://url.spec.whatwg.org/#concept-ipv6-serializer
+static void serialize_ipv6_address(IPv6Address const& address, StringBuilder& output)
+{
+    // 1. Let output be the empty string.
+
+    // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0.
+    Optional<size_t> compress;
+    size_t longest_sequence_length = 0;
+    size_t current_sequence_length = 0;
+    size_t current_sequence_start = 0;
+    for (size_t i = 0; i < 8; ++i) {
+        if (address[i] == 0) {
+            if (current_sequence_length == 0)
+                current_sequence_start = i;
+            ++current_sequence_length;
+        } else {
+            if (current_sequence_length > longest_sequence_length) {
+                longest_sequence_length = current_sequence_length;
+                compress = current_sequence_start;
+            }
+            current_sequence_length = 0;
+        }
+    }
+
+    if (current_sequence_length > longest_sequence_length) {
+        longest_sequence_length = current_sequence_length;
+        compress = current_sequence_start;
+    }
+
+    // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null.
+    if (longest_sequence_length <= 1)
+        compress = {};
+
+    // 4. Let ignore0 be false.
+    auto ignore0 = false;
+
+    // 5. For each pieceIndex in the range 0 to 7, inclusive:
+    for (size_t piece_index = 0; piece_index <= 7; ++piece_index) {
+        // 1. If ignore0 is true and address[pieceIndex] is 0, then continue.
+        if (ignore0 && address[piece_index] == 0)
+            continue;
+
+        // 2. Otherwise, if ignore0 is true, set ignore0 to false.
+        if (ignore0)
+            ignore0 = false;
+
+        // 3. If compress is pieceIndex, then:
+        if (compress == piece_index) {
+            // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise.
+            auto separator = piece_index == 0 ? "::"sv : ":"sv;
+
+            // 2. Append separator to output.
+            output.append(separator);
+
+            // 3. Set ignore0 to true and continue.
+            ignore0 = true;
+            continue;
+        }
+
+        // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output.
+        output.appendff("{:x}", address[piece_index]);
+
+        // 5. If pieceIndex is not 7, then append U+003A (:) to output.
+        if (piece_index != 7)
+            output.append(':');
+    }
+
+    // 6. Return output.
+}
+
+// https://url.spec.whatwg.org/#concept-domain
+bool Host::is_domain() const
+{
+    // A domain is a non-empty ASCII string that identifies a realm within a network.
+    return m_value.has<String>() && !m_value.get<String>().is_empty();
+}
+
+// https://url.spec.whatwg.org/#empty-host
+bool Host::is_empty_host() const
+{
+    // An empty host is the empty string.
+    return m_value.has<String>() && m_value.get<String>().is_empty();
+}
+
+// https://url.spec.whatwg.org/#concept-host-serializer
+String Host::serialize() const
+{
+    return m_value.visit(
+        // 1. If host is an IPv4 address, return the result of running the IPv4 serializer on host.
+        [](IPv4Address const& address) {
+            return serialize_ipv4_address(address);
+        },
+        // 2. Otherwise, if host is an IPv6 address, return U+005B ([), followed by the result of running the IPv6 serializer on host, followed by U+005D (]).
+        [](IPv6Address const& address) {
+            StringBuilder output;
+            output.append('[');
+            serialize_ipv6_address(address, output);
+            output.append(']');
+            return output.to_string_without_validation();
+        },
+        // 3. Otherwise, host is a domain, opaque host, or empty host, return host.
+        [](String const& string) {
+            return string;
+        });
+}
+
+}

+ 25 - 1
Libraries/LibURL/Host.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2024, Shannon Booth <shannon@serenityos.org>
+ * Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -26,6 +27,29 @@ using IPv6Address = Array<u16, 8>;
 // https://url.spec.whatwg.org/#concept-host
 // A host is a domain, an IP address, an opaque host, or an empty host. Typically a host serves as a network address,
 // but it is sometimes used as opaque identifier in URLs where a network address is not necessary.
-using Host = Variant<IPv4Address, IPv6Address, String>;
+class Host {
+public:
+    using VariantType = Variant<IPv4Address, IPv6Address, String>;
+    Host(VariantType&&);
+    Host(String&&);
+
+    bool is_domain() const;
+    bool is_empty_host() const;
+
+    template<typename T>
+    bool has() const { return m_value.template has<T>(); }
+
+    template<typename T>
+    T const& get() const { return m_value.template get<T>(); }
+
+    bool operator==(Host const& other) const = default;
+
+    VariantType const& value() const { return m_value; }
+
+    String serialize() const;
+
+private:
+    VariantType m_value;
+};
 
 }

+ 2 - 2
Libraries/LibURL/Origin.cpp

@@ -24,7 +24,7 @@ String Origin::serialize() const
     result.append("://"sv);
 
     // 4. Append origin's host, serialized, to result.
-    result.append(MUST(Parser::serialize_host(host())));
+    result.append(host().serialize());
 
     // 5. If origin's port is non-null, append a U+003A COLON character (:), and origin's port, serialized, to result.
     if (port().has_value()) {
@@ -50,7 +50,7 @@ unsigned Traits<URL::Origin>::hash(URL::Origin const& origin)
     if (origin.port().has_value())
         hash = pair_int_hash(hash, *origin.port());
 
-    hash = pair_int_hash(hash, URL::Parser::serialize_host(origin.host()).release_value_but_fixme_should_propagate_errors().hash());
+    hash = pair_int_hash(hash, origin.host().serialize().hash());
 
     return hash;
 }

+ 3 - 121
Libraries/LibURL/Parser.cpp

@@ -236,102 +236,6 @@ static Optional<IPv4Address> parse_ipv4_address(StringView input)
     return ipv4;
 }
 
-// https://url.spec.whatwg.org/#concept-ipv4-serializer
-static ErrorOr<String> serialize_ipv4_address(IPv4Address address)
-{
-    // 1. Let output be the empty string.
-    // NOTE: Array to avoid prepend.
-    Array<u8, 4> output;
-
-    // 2. Let n be the value of address.
-    u32 n = address;
-
-    // 3. For each i in the range 1 to 4, inclusive:
-    for (size_t i = 0; i <= 3; ++i) {
-        // 1. Prepend n % 256, serialized, to output.
-        output[3 - i] = n % 256;
-
-        // 2. If i is not 4, then prepend U+002E (.) to output.
-        // NOTE: done at end
-
-        // 3. Set n to floor(n / 256).
-        n /= 256;
-    }
-
-    // 4. Return output.
-    return String::formatted("{}.{}.{}.{}", output[0], output[1], output[2], output[3]);
-}
-
-// https://url.spec.whatwg.org/#concept-ipv6-serializer
-static void serialize_ipv6_address(IPv6Address const& address, StringBuilder& output)
-{
-    // 1. Let output be the empty string.
-
-    // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0.
-    Optional<size_t> compress;
-    size_t longest_sequence_length = 0;
-    size_t current_sequence_length = 0;
-    size_t current_sequence_start = 0;
-    for (size_t i = 0; i < 8; ++i) {
-        if (address[i] == 0) {
-            if (current_sequence_length == 0)
-                current_sequence_start = i;
-            ++current_sequence_length;
-        } else {
-            if (current_sequence_length > longest_sequence_length) {
-                longest_sequence_length = current_sequence_length;
-                compress = current_sequence_start;
-            }
-            current_sequence_length = 0;
-        }
-    }
-
-    if (current_sequence_length > longest_sequence_length) {
-        longest_sequence_length = current_sequence_length;
-        compress = current_sequence_start;
-    }
-
-    // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null.
-    if (longest_sequence_length <= 1)
-        compress = {};
-
-    // 4. Let ignore0 be false.
-    auto ignore0 = false;
-
-    // 5. For each pieceIndex in the range 0 to 7, inclusive:
-    for (size_t piece_index = 0; piece_index <= 7; ++piece_index) {
-        // 1. If ignore0 is true and address[pieceIndex] is 0, then continue.
-        if (ignore0 && address[piece_index] == 0)
-            continue;
-
-        // 2. Otherwise, if ignore0 is true, set ignore0 to false.
-        if (ignore0)
-            ignore0 = false;
-
-        // 3. If compress is pieceIndex, then:
-        if (compress == piece_index) {
-            // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise.
-            auto separator = piece_index == 0 ? "::"sv : ":"sv;
-
-            // 2. Append separator to output.
-            output.append(separator);
-
-            // 3. Set ignore0 to true and continue.
-            ignore0 = true;
-            continue;
-        }
-
-        // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output.
-        output.appendff("{:x}", address[piece_index]);
-
-        // 5. If pieceIndex is not 7, then append U+003A (:) to output.
-        if (piece_index != 7)
-            output.append(':');
-    }
-
-    // 6. Return output.
-}
-
 // https://url.spec.whatwg.org/#concept-ipv6-parser
 static Optional<IPv6Address> parse_ipv6_address(StringView input)
 {
@@ -654,7 +558,7 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
         auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2));
         if (!address.has_value())
             return {};
-        return address.release_value();
+        return Host { address.release_value() };
     }
 
     // 2. If isOpaque is true, then return the result of opaque-host parsing input.
@@ -690,35 +594,13 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
         if (!ipv4_host.has_value())
             return {};
 
-        return ipv4_host.release_value();
+        return Host { ipv4_host.release_value() };
     }
 
     // 9. Return asciiDomain.
     return ascii_domain;
 }
 
-// https://url.spec.whatwg.org/#concept-host-serializer
-ErrorOr<String> Parser::serialize_host(Host const& host)
-{
-    // 1. If host is an IPv4 address, return the result of running the IPv4 serializer on host.
-    if (host.has<IPv4Address>())
-        return serialize_ipv4_address(host.get<IPv4Address>());
-
-    // 2. Otherwise, if host is an IPv6 address, return U+005B ([), followed by the result of running the IPv6 serializer on host, followed by U+005D (]).
-    if (host.has<IPv6Address>()) {
-        StringBuilder output;
-        TRY(output.try_append('['));
-        serialize_ipv6_address(host.get<IPv6Address>(), output);
-        TRY(output.try_append(']'));
-        return output.to_string();
-    }
-
-    // 3. Otherwise, host is a domain, opaque host, or empty host, return host.
-    if (host.has<String>())
-        return host.get<String>();
-    return String {};
-}
-
 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
 constexpr bool starts_with_windows_drive_letter(StringView input)
 {
@@ -953,7 +835,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL
                         return *url;
 
                     // 4. If url’s scheme is "file" and its host is an empty host, then return.
-                    if (url->scheme() == "file"sv && url->host() == String {})
+                    if (url->scheme() == "file"sv && url->host().has_value() && url->host()->is_empty_host())
                         return *url;
                 }
 

+ 0 - 3
Libraries/LibURL/Parser.h

@@ -63,9 +63,6 @@ public:
     // https://url.spec.whatwg.org/#string-percent-encode-after-encoding
     static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
 
-    // https://url.spec.whatwg.org/#concept-host-serializer
-    static ErrorOr<String> serialize_host(Host const&);
-
     // https://url.spec.whatwg.org/#shorten-a-urls-path
     static void shorten_urls_path(URL&);
 };

+ 3 - 2
Libraries/LibURL/URL.cpp

@@ -88,7 +88,7 @@ void URL::set_host(Host host)
 // https://url.spec.whatwg.org/#concept-host-serializer
 ErrorOr<String> URL::serialized_host() const
 {
-    return Parser::serialize_host(m_data->host.value());
+    return m_data->host->serialize();
 }
 
 void URL::set_port(Optional<u16> port)
@@ -119,7 +119,8 @@ void URL::append_path(StringView path)
 bool URL::cannot_have_a_username_or_password_or_port() const
 {
     // A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file".
-    return !m_data->host.has_value() || m_data->host == String {} || m_data->scheme == "file"sv;
+
+    return !m_data->host.has_value() || m_data->host->is_empty_host() || m_data->scheme == "file"sv;
 }
 
 // FIXME: This is by no means complete.

+ 1 - 1
Libraries/LibWeb/DOM/Document.cpp

@@ -3194,7 +3194,7 @@ String Document::domain() const
         return String {};
 
     // 3. Return effectiveDomain, serialized.
-    return MUST(URL::Parser::serialize_host(effective_domain.release_value()));
+    return effective_domain->serialize();
 }
 
 void Document::set_domain(String const& domain)

+ 1 - 1
Libraries/LibWeb/Fetch/Fetching/Fetching.cpp

@@ -322,7 +322,7 @@ WebIDL::ExceptionOr<GC::Ptr<PendingResponse>> main_fetch(JS::Realm& realm, Infra
         // - request’s current URL’s scheme is "http"
         request->current_url().scheme() == "http"sv
         // - request’s current URL’s host is a domain
-        && request->current_url().host().has_value() && DOMURL::host_is_domain(request->current_url().host().value())
+        && request->current_url().host().has_value() && request->current_url().host()->is_domain()
         // FIXME: - Matching request’s current URL’s host per Known HSTS Host Domain Name Matching results in either a
         //          superdomain match with an asserted includeSubDomains directive or a congruent match (with or without an
         //          asserted includeSubDomains directive) [HSTS]; or DNS resolution for the request finds a matching HTTPS RR

+ 1 - 3
Libraries/LibWeb/HTML/WorkerLocation.cpp

@@ -60,8 +60,6 @@ WebIDL::ExceptionOr<String> WorkerLocation::host() const
 // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-hostname
 WebIDL::ExceptionOr<String> WorkerLocation::hostname() const
 {
-    auto& vm = realm().vm();
-
     // The hostname getter steps are:
     // 1. Let host be this's WorkerGlobalScope object's url's host.
     auto const& host = m_global_scope->url().host();
@@ -71,7 +69,7 @@ WebIDL::ExceptionOr<String> WorkerLocation::hostname() const
         return String {};
 
     // 3. Return host, serialized.
-    return TRY_OR_THROW_OOM(vm, URL::Parser::serialize_host(host.value()));
+    return host->serialize();
 }
 
 // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-port