diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 78d5f95aac9..a65c5e4725d 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -357,6 +357,7 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "BSD$" OR HAIKU) target_link_libraries(AK PRIVATE execinfo) endif() +add_serenity_subdirectory(Userland/Libraries/LibUnicode) add_serenity_subdirectory(Userland/Libraries/LibURL) # LibCore @@ -481,7 +482,6 @@ if (BUILD_LAGOM) TextCodec Threading TLS - Unicode Video Wasm WebSocket diff --git a/Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn b/Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn index 510fd1a03e5..1ed238886bc 100644 --- a/Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn +++ b/Meta/gn/secondary/Userland/Libraries/LibURL/BUILD.gn @@ -8,5 +8,8 @@ shared_library("LibURL") { "URL.cpp", "URL.h", ] - deps = [ "//AK" ] + deps = [ + "//AK", + "//Userland/Libraries/LibUnicode", + ] } diff --git a/Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn b/Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn index 8859f2468a0..da3f9d8001a 100644 --- a/Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn +++ b/Meta/gn/secondary/Userland/Libraries/LibUnicode/BUILD.gn @@ -177,13 +177,9 @@ source_set("LibUnicode") { "Punycode.cpp", "Segmentation.cpp", "String.cpp", - "URL.cpp", "UnicodeUtils.cpp", ] - deps = [ - "//AK", - "//Userland/Libraries/LibURL", - ] + deps = [ "//AK" ] if (enable_unicode_database_download) { deps += [ diff --git a/Userland/Libraries/LibURL/CMakeLists.txt b/Userland/Libraries/LibURL/CMakeLists.txt index 547dc63fa6a..73802ab5de9 100644 --- a/Userland/Libraries/LibURL/CMakeLists.txt +++ b/Userland/Libraries/LibURL/CMakeLists.txt @@ -4,3 +4,4 @@ set(SOURCES ) serenity_lib(LibURL url) +target_link_libraries(LibURL PRIVATE LibUnicode) diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index 7ab1826b7a4..032db387e3c 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace URL { @@ -574,6 +575,29 @@ static bool ends_in_a_number_checker(StringView input) return false; } +// https://url.spec.whatwg.org/#concept-domain-to-ascii +static ErrorOr domain_to_ascii(StringView domain, bool be_strict) +{ + // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46] + // 2. If result is a failure value, domain-to-ASCII validation error, return failure. + Unicode::IDNA::ToAsciiOptions const options { + Unicode::IDNA::CheckHyphens::No, + Unicode::IDNA::CheckBidi::Yes, + Unicode::IDNA::CheckJoiners::Yes, + be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No, + Unicode::IDNA::TransitionalProcessing::No, + be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No + }; + auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options)); + + // 3. If result is the empty string, domain-to-ASCII validation error, return failure. + if (result.is_empty()) + return Error::from_string_literal("Empty domain"); + + // 4. Return result. + return result; +} + // https://url.spec.whatwg.org/#concept-host-parser // NOTE: This is a very bare-bones implementation. static Optional parse_host(StringView input, bool is_opaque = false) @@ -603,10 +627,10 @@ static Optional parse_host(StringView input, bool is_opaque = false) // FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input. auto domain = percent_decode(input); - // NOTE: This is handled in Unicode::create_unicode_url, to work around the fact that we can't call into LibUnicode here - // FIXME: 5. Let asciiDomain be the result of running domain to ASCII with domain and false. - // FIXME: 6. If asciiDomain is failure, then return failure. - auto ascii_domain_or_error = String::from_byte_string(domain); + // 5. Let asciiDomain be the result of running domain to ASCII with domain and false. + auto ascii_domain_or_error = domain_to_ascii(domain, false); + + // 6. If asciiDomain is failure, then return failure. if (ascii_domain_or_error.is_error()) return {}; diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 38b0cc989c2..81283485f70 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -10,12 +10,10 @@ set(SOURCES Segmentation.cpp String.cpp UnicodeUtils.cpp - URL.cpp ${UNICODE_DATA_SOURCES} ) set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED}) serenity_lib(LibUnicode unicode) -target_link_libraries(LibUnicode PRIVATE LibURL) target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$) diff --git a/Userland/Libraries/LibUnicode/URL.cpp b/Userland/Libraries/LibUnicode/URL.cpp deleted file mode 100644 index d9c382453bd..00000000000 --- a/Userland/Libraries/LibUnicode/URL.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2023, Simon Wanner - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include - -namespace Unicode { - -// https://url.spec.whatwg.org/#concept-domain-to-ascii -static ErrorOr domain_to_ascii(StringView domain, bool be_strict) -{ - // 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46] - // 2. If result is a failure value, domain-to-ASCII validation error, return failure. - Unicode::IDNA::ToAsciiOptions const options { - Unicode::IDNA::CheckHyphens::No, - Unicode::IDNA::CheckBidi::Yes, - Unicode::IDNA::CheckJoiners::Yes, - be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No, - Unicode::IDNA::TransitionalProcessing::No, - be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No - }; - auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options)); - - // 3. If result is the empty string, domain-to-ASCII validation error, return failure. - if (result.is_empty()) - return Error::from_string_literal("Empty domain"); - - // 4. Return result. - return result; -} - -// https://url.spec.whatwg.org/#concept-host-parser -ErrorOr create_unicode_url(String const& url_string) -{ - // NOTE: 1.-4. are implemented in URL::Parser::parse_host - - URL::URL url = url_string; - if (!url.is_valid() || !url.host().has()) - return url; - - auto& domain = url.host().get(); - if (domain.is_empty()) - return url; - - // 5. Let asciiDomain be the result of running domain to ASCII with domain and false. - // 6. If asciiDomain is failure, then return failure. - auto ascii_domain = TRY(domain_to_ascii(domain.bytes_as_string_view(), false)); - - // FIXME: Reimplement 7. or call into URL::Parser::parse_host using ascii_domain (8. & 9. do not apply) - url.set_host(ascii_domain); - return url; -} - -} diff --git a/Userland/Libraries/LibUnicode/URL.h b/Userland/Libraries/LibUnicode/URL.h deleted file mode 100644 index 1e410da10e6..00000000000 --- a/Userland/Libraries/LibUnicode/URL.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2023, Simon Wanner - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#pragma once - -#include -#include - -namespace Unicode { - -ErrorOr create_unicode_url(String const&); - -} diff --git a/Userland/Libraries/LibWebView/URL.cpp b/Userland/Libraries/LibWebView/URL.cpp index 7788c6f3b36..1178bc4e16a 100644 --- a/Userland/Libraries/LibWebView/URL.cpp +++ b/Userland/Libraries/LibWebView/URL.cpp @@ -5,11 +5,9 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include #include #include #include -#include #include #if defined(ENABLE_PUBLIC_SUFFIX) @@ -18,32 +16,16 @@ namespace WebView { -static Optional create_url_with_url_or_path(String const& url_or_path) -{ - auto url = Unicode::create_unicode_url(url_or_path); - if (!url.is_error() && url.value().is_valid()) - return url.release_value(); - - auto path = LexicalPath::canonicalized_path(url_or_path.to_byte_string()); - auto url_from_path = URL::create_with_file_scheme(path); - if (url_from_path.is_valid()) - return url_from_path; - - return {}; -} - static Optional query_public_suffix_list(StringView url_string) { auto out = MUST(String::from_utf8(url_string)); if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv)) out = MUST(String::formatted("https://{}"sv, out)); - auto maybe_url = create_url_with_url_or_path(out); - if (!maybe_url.has_value()) + auto url = URL::create_with_url_or_path(out.to_byte_string()); + if (!url.is_valid()) return {}; - auto url = maybe_url.release_value(); - if (url.host().has() || url.host().has()) return url;