Browse Source

LibURL+LibUnicode+LibWebView: Handle punycode directly in LibURL

We had defined punycode handling in LibUnicode when LibURL (AK at the
time) was unable to depend on LibUnicode. This is no longer the case.
Timothy Flynn 1 year ago
parent
commit
576c2f4f4d

+ 1 - 1
Meta/Lagom/CMakeLists.txt

@@ -357,6 +357,7 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "BSD$" OR HAIKU)
     target_link_libraries(AK PRIVATE execinfo)
 endif()
 
+add_serenity_subdirectory(Userland/Libraries/LibUnicode)
 add_serenity_subdirectory(Userland/Libraries/LibURL)
 
 # LibCore
@@ -481,7 +482,6 @@ if (BUILD_LAGOM)
         TextCodec
         Threading
         TLS
-        Unicode
         Video
         Wasm
         WebSocket

+ 4 - 1
Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn

@@ -8,5 +8,8 @@ shared_library("LibURL") {
     "URL.cpp",
     "URL.h",
   ]
-  deps = [ "//AK" ]
+  deps = [
+    "//AK",
+    "//Userland/Libraries/LibUnicode",
+  ]
 }

+ 1 - 5
Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn

@@ -177,13 +177,9 @@ source_set("LibUnicode") {
     "Punycode.cpp",
     "Segmentation.cpp",
     "String.cpp",
-    "URL.cpp",
     "UnicodeUtils.cpp",
   ]
-  deps = [
-    "//AK",
-    "//Userland/Libraries/LibURL",
-  ]
+  deps = [ "//AK" ]
 
   if (enable_unicode_database_download) {
     deps += [

+ 1 - 0
Userland/Libraries/LibURL/CMakeLists.txt

@@ -4,3 +4,4 @@ set(SOURCES
 )
 
 serenity_lib(LibURL url)
+target_link_libraries(LibURL PRIVATE LibUnicode)

+ 28 - 4
Userland/Libraries/LibURL/Parser.cpp

@@ -15,6 +15,7 @@
 #include <AK/StringUtils.h>
 #include <AK/Utf8View.h>
 #include <LibURL/Parser.h>
+#include <LibUnicode/IDNA.h>
 
 namespace URL {
 
@@ -574,6 +575,29 @@ static bool ends_in_a_number_checker(StringView input)
     return false;
 }
 
+// https://url.spec.whatwg.org/#concept-domain-to-ascii
+static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
+{
+    // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
+    // 2. If result is a failure value, domain-to-ASCII validation error, return failure.
+    Unicode::IDNA::ToAsciiOptions const options {
+        Unicode::IDNA::CheckHyphens::No,
+        Unicode::IDNA::CheckBidi::Yes,
+        Unicode::IDNA::CheckJoiners::Yes,
+        be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
+        Unicode::IDNA::TransitionalProcessing::No,
+        be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
+    };
+    auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
+
+    // 3. If result is the empty string, domain-to-ASCII validation error, return failure.
+    if (result.is_empty())
+        return Error::from_string_literal("Empty domain");
+
+    // 4. Return result.
+    return result;
+}
+
 // https://url.spec.whatwg.org/#concept-host-parser
 // NOTE: This is a very bare-bones implementation.
 static Optional<Host> parse_host(StringView input, bool is_opaque = false)
@@ -603,10 +627,10 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
     // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
     auto domain = percent_decode(input);
 
-    // NOTE: This is handled in Unicode::create_unicode_url, to work around the fact that we can't call into LibUnicode here
-    // FIXME: 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
-    // FIXME: 6. If asciiDomain is failure, then return failure.
-    auto ascii_domain_or_error = String::from_byte_string(domain);
+    // 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
+    auto ascii_domain_or_error = domain_to_ascii(domain, false);
+
+    // 6. If asciiDomain is failure, then return failure.
     if (ascii_domain_or_error.is_error())
         return {};
 

+ 0 - 2
Userland/Libraries/LibUnicode/CMakeLists.txt

@@ -10,12 +10,10 @@ set(SOURCES
     Segmentation.cpp
     String.cpp
     UnicodeUtils.cpp
-    URL.cpp
     ${UNICODE_DATA_SOURCES}
 )
 set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
 
 serenity_lib(LibUnicode unicode)
-target_link_libraries(LibUnicode PRIVATE LibURL)
 
 target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)

+ 0 - 57
Userland/Libraries/LibUnicode/URL.cpp

@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include <LibUnicode/IDNA.h>
-#include <LibUnicode/URL.h>
-
-namespace Unicode {
-
-// https://url.spec.whatwg.org/#concept-domain-to-ascii
-static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
-{
-    // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
-    // 2. If result is a failure value, domain-to-ASCII validation error, return failure.
-    Unicode::IDNA::ToAsciiOptions const options {
-        Unicode::IDNA::CheckHyphens::No,
-        Unicode::IDNA::CheckBidi::Yes,
-        Unicode::IDNA::CheckJoiners::Yes,
-        be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
-        Unicode::IDNA::TransitionalProcessing::No,
-        be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
-    };
-    auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
-
-    // 3. If result is the empty string, domain-to-ASCII validation error, return failure.
-    if (result.is_empty())
-        return Error::from_string_literal("Empty domain");
-
-    // 4. Return result.
-    return result;
-}
-
-// https://url.spec.whatwg.org/#concept-host-parser
-ErrorOr<URL::URL> create_unicode_url(String const& url_string)
-{
-    // NOTE: 1.-4. are implemented in URL::Parser::parse_host
-
-    URL::URL url = url_string;
-    if (!url.is_valid() || !url.host().has<String>())
-        return url;
-
-    auto& domain = url.host().get<String>();
-    if (domain.is_empty())
-        return url;
-
-    // 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
-    // 6. If asciiDomain is failure, then return failure.
-    auto ascii_domain = TRY(domain_to_ascii(domain.bytes_as_string_view(), false));
-
-    // FIXME: Reimplement 7. or call into URL::Parser::parse_host using ascii_domain (8. & 9. do not apply)
-    url.set_host(ascii_domain);
-    return url;
-}
-
-}

+ 0 - 16
Userland/Libraries/LibUnicode/URL.h

@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#pragma once
-
-#include <AK/String.h>
-#include <LibURL/URL.h>
-
-namespace Unicode {
-
-ErrorOr<URL::URL> create_unicode_url(String const&);
-
-}

+ 2 - 20
Userland/Libraries/LibWebView/URL.cpp

@@ -5,11 +5,9 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
-#include <AK/LexicalPath.h>
 #include <AK/String.h>
 #include <LibCore/System.h>
 #include <LibFileSystem/FileSystem.h>
-#include <LibUnicode/URL.h>
 #include <LibWebView/URL.h>
 
 #if defined(ENABLE_PUBLIC_SUFFIX)
@@ -18,32 +16,16 @@
 
 namespace WebView {
 
-static Optional<URL::URL> create_url_with_url_or_path(String const& url_or_path)
-{
-    auto url = Unicode::create_unicode_url(url_or_path);
-    if (!url.is_error() && url.value().is_valid())
-        return url.release_value();
-
-    auto path = LexicalPath::canonicalized_path(url_or_path.to_byte_string());
-    auto url_from_path = URL::create_with_file_scheme(path);
-    if (url_from_path.is_valid())
-        return url_from_path;
-
-    return {};
-}
-
 static Optional<URL::URL> query_public_suffix_list(StringView url_string)
 {
     auto out = MUST(String::from_utf8(url_string));
     if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv))
         out = MUST(String::formatted("https://{}"sv, out));
 
-    auto maybe_url = create_url_with_url_or_path(out);
-    if (!maybe_url.has_value())
+    auto url = URL::create_with_url_or_path(out.to_byte_string());
+    if (!url.is_valid())
         return {};
 
-    auto url = maybe_url.release_value();
-
     if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
         return url;