mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibURL+LibUnicode+LibWebView: Handle punycode directly in LibURL
We had defined punycode handling in LibUnicode when LibURL (AK at the time) was unable to depend on LibUnicode. This is no longer the case.
This commit is contained in:
parent
5ddfcfd07e
commit
576c2f4f4d
Notes:
sideshowbarker
2024-07-17 01:53:23 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/576c2f4f4d Pull-request: https://github.com/SerenityOS/serenity/pull/23717 Issue: https://github.com/SerenityOS/serenity/issues/23625 Reviewed-by: https://github.com/ADKaster ✅
9 changed files with 37 additions and 106 deletions
|
@ -357,6 +357,7 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "BSD$" OR HAIKU)
|
|||
target_link_libraries(AK PRIVATE execinfo)
|
||||
endif()
|
||||
|
||||
add_serenity_subdirectory(Userland/Libraries/LibUnicode)
|
||||
add_serenity_subdirectory(Userland/Libraries/LibURL)
|
||||
|
||||
# LibCore
|
||||
|
@ -481,7 +482,6 @@ if (BUILD_LAGOM)
|
|||
TextCodec
|
||||
Threading
|
||||
TLS
|
||||
Unicode
|
||||
Video
|
||||
Wasm
|
||||
WebSocket
|
||||
|
|
|
@ -8,5 +8,8 @@ shared_library("LibURL") {
|
|||
"URL.cpp",
|
||||
"URL.h",
|
||||
]
|
||||
deps = [ "//AK" ]
|
||||
deps = [
|
||||
"//AK",
|
||||
"//Userland/Libraries/LibUnicode",
|
||||
]
|
||||
}
|
||||
|
|
|
@ -177,13 +177,9 @@ source_set("LibUnicode") {
|
|||
"Punycode.cpp",
|
||||
"Segmentation.cpp",
|
||||
"String.cpp",
|
||||
"URL.cpp",
|
||||
"UnicodeUtils.cpp",
|
||||
]
|
||||
deps = [
|
||||
"//AK",
|
||||
"//Userland/Libraries/LibURL",
|
||||
]
|
||||
deps = [ "//AK" ]
|
||||
|
||||
if (enable_unicode_database_download) {
|
||||
deps += [
|
||||
|
|
|
@ -4,3 +4,4 @@ set(SOURCES
|
|||
)
|
||||
|
||||
serenity_lib(LibURL url)
|
||||
target_link_libraries(LibURL PRIVATE LibUnicode)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <AK/StringUtils.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibURL/Parser.h>
|
||||
#include <LibUnicode/IDNA.h>
|
||||
|
||||
namespace URL {
|
||||
|
||||
|
@ -574,6 +575,29 @@ static bool ends_in_a_number_checker(StringView input)
|
|||
return false;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-domain-to-ascii
|
||||
static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
|
||||
{
|
||||
// 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
|
||||
// 2. If result is a failure value, domain-to-ASCII validation error, return failure.
|
||||
Unicode::IDNA::ToAsciiOptions const options {
|
||||
Unicode::IDNA::CheckHyphens::No,
|
||||
Unicode::IDNA::CheckBidi::Yes,
|
||||
Unicode::IDNA::CheckJoiners::Yes,
|
||||
be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
|
||||
Unicode::IDNA::TransitionalProcessing::No,
|
||||
be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
|
||||
};
|
||||
auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
|
||||
|
||||
// 3. If result is the empty string, domain-to-ASCII validation error, return failure.
|
||||
if (result.is_empty())
|
||||
return Error::from_string_literal("Empty domain");
|
||||
|
||||
// 4. Return result.
|
||||
return result;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-host-parser
|
||||
// NOTE: This is a very bare-bones implementation.
|
||||
static Optional<Host> parse_host(StringView input, bool is_opaque = false)
|
||||
|
@ -603,10 +627,10 @@ static Optional<Host> parse_host(StringView input, bool is_opaque = false)
|
|||
// FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
|
||||
auto domain = percent_decode(input);
|
||||
|
||||
// NOTE: This is handled in Unicode::create_unicode_url, to work around the fact that we can't call into LibUnicode here
|
||||
// FIXME: 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
|
||||
// FIXME: 6. If asciiDomain is failure, then return failure.
|
||||
auto ascii_domain_or_error = String::from_byte_string(domain);
|
||||
// 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
|
||||
auto ascii_domain_or_error = domain_to_ascii(domain, false);
|
||||
|
||||
// 6. If asciiDomain is failure, then return failure.
|
||||
if (ascii_domain_or_error.is_error())
|
||||
return {};
|
||||
|
||||
|
|
|
@ -10,12 +10,10 @@ set(SOURCES
|
|||
Segmentation.cpp
|
||||
String.cpp
|
||||
UnicodeUtils.cpp
|
||||
URL.cpp
|
||||
${UNICODE_DATA_SOURCES}
|
||||
)
|
||||
set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
|
||||
|
||||
serenity_lib(LibUnicode unicode)
|
||||
target_link_libraries(LibUnicode PRIVATE LibURL)
|
||||
|
||||
target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibUnicode/IDNA.h>
|
||||
#include <LibUnicode/URL.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-domain-to-ascii
|
||||
static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
|
||||
{
|
||||
// 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
|
||||
// 2. If result is a failure value, domain-to-ASCII validation error, return failure.
|
||||
Unicode::IDNA::ToAsciiOptions const options {
|
||||
Unicode::IDNA::CheckHyphens::No,
|
||||
Unicode::IDNA::CheckBidi::Yes,
|
||||
Unicode::IDNA::CheckJoiners::Yes,
|
||||
be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
|
||||
Unicode::IDNA::TransitionalProcessing::No,
|
||||
be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
|
||||
};
|
||||
auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
|
||||
|
||||
// 3. If result is the empty string, domain-to-ASCII validation error, return failure.
|
||||
if (result.is_empty())
|
||||
return Error::from_string_literal("Empty domain");
|
||||
|
||||
// 4. Return result.
|
||||
return result;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-host-parser
|
||||
ErrorOr<URL::URL> create_unicode_url(String const& url_string)
|
||||
{
|
||||
// NOTE: 1.-4. are implemented in URL::Parser::parse_host
|
||||
|
||||
URL::URL url = url_string;
|
||||
if (!url.is_valid() || !url.host().has<String>())
|
||||
return url;
|
||||
|
||||
auto& domain = url.host().get<String>();
|
||||
if (domain.is_empty())
|
||||
return url;
|
||||
|
||||
// 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
|
||||
// 6. If asciiDomain is failure, then return failure.
|
||||
auto ascii_domain = TRY(domain_to_ascii(domain.bytes_as_string_view(), false));
|
||||
|
||||
// FIXME: Reimplement 7. or call into URL::Parser::parse_host using ascii_domain (8. & 9. do not apply)
|
||||
url.set_host(ascii_domain);
|
||||
return url;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <LibURL/URL.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
ErrorOr<URL::URL> create_unicode_url(String const&);
|
||||
|
||||
}
|
|
@ -5,11 +5,9 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/String.h>
|
||||
#include <LibCore/System.h>
|
||||
#include <LibFileSystem/FileSystem.h>
|
||||
#include <LibUnicode/URL.h>
|
||||
#include <LibWebView/URL.h>
|
||||
|
||||
#if defined(ENABLE_PUBLIC_SUFFIX)
|
||||
|
@ -18,32 +16,16 @@
|
|||
|
||||
namespace WebView {
|
||||
|
||||
static Optional<URL::URL> create_url_with_url_or_path(String const& url_or_path)
|
||||
{
|
||||
auto url = Unicode::create_unicode_url(url_or_path);
|
||||
if (!url.is_error() && url.value().is_valid())
|
||||
return url.release_value();
|
||||
|
||||
auto path = LexicalPath::canonicalized_path(url_or_path.to_byte_string());
|
||||
auto url_from_path = URL::create_with_file_scheme(path);
|
||||
if (url_from_path.is_valid())
|
||||
return url_from_path;
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static Optional<URL::URL> query_public_suffix_list(StringView url_string)
|
||||
{
|
||||
auto out = MUST(String::from_utf8(url_string));
|
||||
if (!out.starts_with_bytes("about:"sv) && !out.contains("://"sv))
|
||||
out = MUST(String::formatted("https://{}"sv, out));
|
||||
|
||||
auto maybe_url = create_url_with_url_or_path(out);
|
||||
if (!maybe_url.has_value())
|
||||
auto url = URL::create_with_url_or_path(out.to_byte_string());
|
||||
if (!url.is_valid())
|
||||
return {};
|
||||
|
||||
auto url = maybe_url.release_value();
|
||||
|
||||
if (url.host().has<URL::IPv4Address>() || url.host().has<URL::IPv6Address>())
|
||||
return url;
|
||||
|
||||
|
|
Loading…
Reference in a new issue