Prechádzať zdrojové kódy

LibURL+LibUnicode+LibWebView: Handle punycode directly in LibURL

We had defined punycode handling in LibUnicode when LibURL (AK at the
time) was unable to depend on LibUnicode. This is no longer the case.
Timothy Flynn 1 rok pred
rodič
commit
576c2f4f4d

+ 1 - 1
Meta/Lagom/CMakeLists.txt

@@ -357,6 +357,7 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "BSD$" OR HAIKU)
     target_link_libraries(AK PRIVATE execinfo)
     target_link_libraries(AK PRIVATE execinfo)
 endif()
 endif()
 
 
+add_serenity_subdirectory(Userland/Libraries/LibUnicode)
 add_serenity_subdirectory(Userland/Libraries/LibURL)
 add_serenity_subdirectory(Userland/Libraries/LibURL)
 
 
 # LibCore
 # LibCore
@@ -481,7 +482,6 @@ if (BUILD_LAGOM)
         TextCodec
         TextCodec
         Threading
         Threading
         TLS
         TLS
-        Unicode
         Video
         Video
         Wasm
         Wasm
         WebSocket
         WebSocket

+ 4 - 1
Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn

@@ -8,5 +8,8 @@ shared_library("LibURL") {
     "URL.cpp",
     "URL.cpp",
     "URL.h",
     "URL.h",
   ]
   ]
-  deps = [ "//AK" ]
+  deps = [
+    "//AK",
+    "//Userland/Libraries/LibUnicode",
+  ]
 }
 }

+ 1 - 5
Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn

@@ -177,13 +177,9 @@ source_set("LibUnicode") {
     "Punycode.cpp",
     "Punycode.cpp",
     "Segmentation.cpp",
     "Segmentation.cpp",
     "String.cpp",
     "String.cpp",
-    "URL.cpp",
     "UnicodeUtils.cpp",
     "UnicodeUtils.cpp",
   ]
   ]
-  deps = [
-    "//AK",
-    "//Userland/Libraries/LibURL",
-  ]
+  deps = [ "//AK" ]
 
 
   if (enable_unicode_database_download) {
   if (enable_unicode_database_download) {
     deps += [
     deps += [

+ 1 - 0
Userland/Libraries/LibURL/CMakeLists.txt

@@ -4,3 +4,4 @@ set(SOURCES
 )
 )
 
 
 serenity_lib(LibURL url)
 serenity_lib(LibURL url)
+target_link_libraries(LibURL PRIVATE LibUnicode)

+ 28 - 4
Userland/Libraries/LibURL/Parser.cpp

@@ -15,6 +15,7 @@
 #include <AK/StringUtils.h>
 #include <AK/StringUtils.h>
 #include <AK/Utf8View.h>
 #include <AK/Utf8View.h>
 #include <LibURL/Parser.h>
 #include <LibURL/Parser.h>
+#include <LibUnicode/IDNA.h>
 
 
 namespace URL {
 namespace URL {
 
 
@@ -574,6 +575,29 @@ static bool ends_in_a_number_checker(StringView input)
     return false;
     return false;
 }
 }
 
 
+// https://url.spec.whatwg.org/#concept-domain-to-ascii
+static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
+{
+    // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
+    // 2. If result is a failure value, domain-to-ASCII validation error, return failure.
+    Unicode::IDNA::ToAsciiOptions const options {
+        Unicode::IDNA::CheckHyphens::No,
+        Unicode::IDNA::CheckBidi::Yes,
+        Unicode::IDNA::CheckJoiners::Yes,
+        be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
+        Unicode::IDNA::TransitionalProcessing::No,
+        be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
+    };
+    auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
+
+    // 3. If result is the empty string, domain-to-ASCII validation error, return failure.
+    if (result.is_empty())
+        return Error::from_string_literal("Empty domain");
+
+    // 4. Return result.
+    return result;
+}
+
 // https://url.spec.whatwg.org/#concept-host-parser
 // https://url.spec.whatwg.org/#concept-host-parser
 // NOTE: This is a very bare-bones implementation.
 // NOTE: This is a very bare-bones implementation.
 static Optional<Host> parse_host(StringView input, bool is_opaque = false)
 static Optional<Host> parse_host(StringView input, bool is_opaque = false)
@@ -603,10 +627,10 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
     // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
     // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
     auto domain = percent_decode(input);
     auto domain = percent_decode(input);
 
 
-    // NOTE: This is handled in Unicode::create_unicode_url, to work around the fact that we can't call into LibUnicode here
-    // FIXME: 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
-    // FIXME: 6. If asciiDomain is failure, then return failure.
-    auto ascii_domain_or_error = String::from_byte_string(domain);
+    // 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
+    auto ascii_domain_or_error = domain_to_ascii(domain, false);
+
+    // 6. If asciiDomain is failure, then return failure.
     if (ascii_domain_or_error.is_error())
     if (ascii_domain_or_error.is_error())
         return {};
         return {};
 
 

+ 0 - 2
Userland/Libraries/LibUnicode/CMakeLists.txt

@@ -10,12 +10,10 @@ set(SOURCES
     Segmentation.cpp
     Segmentation.cpp
     String.cpp
     String.cpp
     UnicodeUtils.cpp
     UnicodeUtils.cpp
-    URL.cpp
     ${UNICODE_DATA_SOURCES}
     ${UNICODE_DATA_SOURCES}
 )
 )
 set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
 set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
 
 
 serenity_lib(LibUnicode unicode)
 serenity_lib(LibUnicode unicode)
-target_link_libraries(LibUnicode PRIVATE LibURL)
 
 
 target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
 target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)

+ 0 - 57
Userland/Libraries/LibUnicode/URL.cpp

@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include <LibUnicode/IDNA.h>
-#include <LibUnicode/URL.h>
-
-namespace Unicode {
-
-// https://url.spec.whatwg.org/#concept-domain-to-ascii
-static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
-{
-    // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
-    // 2. If result is a failure value, domain-to-ASCII validation error, return failure.
-    Unicode::IDNA::ToAsciiOptions const options {
-        Unicode::IDNA::CheckHyphens::No,
-        Unicode::IDNA::CheckBidi::Yes,
-        Unicode::IDNA::CheckJoiners::Yes,
-        be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
-        Unicode::IDNA::TransitionalProcessing::No,
-        be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
-    };
-    auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
-
-    // 3. If result is the empty string, domain-to-ASCII validation error, return failure.
-    if (result.is_empty())
-        return Error::from_string_literal("Empty domain");
-
-    // 4. Return result.
-    return result;
-}
-
-// https://url.spec.whatwg.org/#concept-host-parser
-ErrorOr<URL::URL> create_unicode_url(String const& url_string)
-{
-    // NOTE: 1.-4. are implemented in URL::Parser::parse_host
-
-    URL::URL url = url_string;
-    if (!url.is_valid() || !url.host().has<String>())
-        return url;
-
-    auto& domain = url.host().get<String>();
-    if (domain.is_empty())
-        return url;
-
-    // 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
-    // 6. If asciiDomain is failure, then return failure.
-    auto ascii_domain = TRY(domain_to_ascii(domain.bytes_as_string_view(), false));
-
-    // FIXME: Reimplement 7. or call into URL::Parser::parse_host using ascii_domain (8. & 9. do not apply)
-    url.set_host(ascii_domain);
-    return url;
-}
-
-}

+ 0 - 16
Userland/Libraries/LibUnicode/URL.h

@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#pragma once
-
-#include <AK/String.h>
-#include <LibURL/URL.h>
-
-namespace Unicode {
-
-ErrorOr<URL::URL> create_unicode_url(String const&);
-
-}

+ 2 - 20
Userland/Libraries/LibWebView/URL.cpp

@@ -5,11 +5,9 @@
  * SPDX-License-Identifier: BSD-2-Clause
  * SPDX-License-Identifier: BSD-2-Clause
  */
  */
 
 
-#include <AK/LexicalPath.h>
 #include <AK/String.h>
 #include <AK/String.h>
 #include <LibCore/System.h>
 #include <LibCore/System.h>
 #include <LibFileSystem/FileSystem.h>
 #include <LibFileSystem/FileSystem.h>
-#include <LibUnicode/URL.h>
 #include <LibWebView/URL.h>
 #include <LibWebView/URL.h>
 
 
 #if defined(ENABLE_PUBLIC_SUFFIX)
 #if defined(ENABLE_PUBLIC_SUFFIX)
@@ -18,32 +16,16 @@
 
 
 namespace WebView {
 namespace WebView {
 
 
-static Optional<URL::URL> create_url_with_url_or_path(String const& url_or_path)
-{
-    auto url = Unicode::create_unicode_url(url_or_path);
-    if (!url.is_error() && url.value().is_valid())
-        return url.release_value();
-
-    auto path = LexicalPath::canonicalized_path(url_or_path.to_byte_string());
-    auto url_from_path = URL::create_with_file_scheme(path);
-    if (url_from_path.is_valid())
-        return url_from_path;
-
-    return {};
-}
-
 static Optional<URL::URL> query_public_suffix_list(StringView url_string)
 static Optional<URL::URL> query_public_suffix_list(StringView url_string)
 {
 {
     auto out = MUST(String::from_utf8(url_string));
     auto out = MUST(String::from_utf8(url_string));
     if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv))
     if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv))
         out = MUST(String::formatted("https://{}"sv, out));
         out = MUST(String::formatted("https://{}"sv, out));
 
 
-    auto maybe_url = create_url_with_url_or_path(out);
-    if (!maybe_url.has_value())
+    auto url = URL::create_with_url_or_path(out.to_byte_string());
+    if (!url.is_valid())
         return {};
         return {};
 
 
-    auto url = maybe_url.release_value();
-
     if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
     if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
         return url;
         return url;