mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
AK+LibUnicode: Add Unicode::create_unicode_url
This is a workaround for the fact that AK::URLParser can't call into LibUnicode directly.
This commit is contained in:
parent
5bcb019106
commit
58f08107b0
Notes:
sideshowbarker
2024-07-17 07:38:17 +09:00
Author: https://github.com/skyrising Commit: https://github.com/SerenityOS/serenity/commit/58f08107b0 Pull-request: https://github.com/SerenityOS/serenity/pull/19414 Reviewed-by: https://github.com/AtkinsSJ Reviewed-by: https://github.com/nico Reviewed-by: https://github.com/shannonbooth Reviewed-by: https://github.com/trflynn89
4 changed files with 76 additions and 1 deletions
|
@ -603,7 +603,8 @@ static Optional<URL::Host> parse_host(StringView input, bool is_opaque = false)
|
|||
// FIXME: 4. Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input.
|
||||
auto domain = URL::percent_decode(input);
|
||||
|
||||
// FIXME: 5. Let asciiDomain be the result of running domain to ASCII on domain.
|
||||
// NOTE: This is handled in Unicode::create_unicode_url, to work around the fact that we can't call into LibUnicode here
|
||||
// FIXME: 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
|
||||
// FIXME: 6. If asciiDomain is failure, then return failure.
|
||||
auto ascii_domain_or_error = String::from_deprecated_string(domain);
|
||||
if (ascii_domain_or_error.is_error())
|
||||
|
|
|
@ -10,6 +10,7 @@ set(SOURCES
|
|||
Segmentation.cpp
|
||||
String.cpp
|
||||
UnicodeUtils.cpp
|
||||
URL.cpp
|
||||
${UNICODE_DATA_SOURCES}
|
||||
)
|
||||
set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
|
||||
|
|
57
Userland/Libraries/LibUnicode/URL.cpp
Normal file
57
Userland/Libraries/LibUnicode/URL.cpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibUnicode/IDNA.h>
|
||||
#include <LibUnicode/URL.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-domain-to-ascii
|
||||
static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
|
||||
{
|
||||
// 1. Let result be the result of running Unicode ToASCII with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46]
|
||||
// 2. If result is a failure value, domain-to-ASCII validation error, return failure.
|
||||
Unicode::IDNA::ToAsciiOptions const options {
|
||||
Unicode::IDNA::CheckHyphens::No,
|
||||
Unicode::IDNA::CheckBidi::Yes,
|
||||
Unicode::IDNA::CheckJoiners::Yes,
|
||||
be_strict ? Unicode::IDNA::UseStd3AsciiRules::Yes : Unicode::IDNA::UseStd3AsciiRules::No,
|
||||
Unicode::IDNA::TransitionalProcessing::No,
|
||||
be_strict ? Unicode::IDNA::VerifyDnsLength::Yes : Unicode::IDNA::VerifyDnsLength::No
|
||||
};
|
||||
auto result = TRY(Unicode::IDNA::to_ascii(Utf8View(domain), options));
|
||||
|
||||
// 3. If result is the empty string, domain-to-ASCII validation error, return failure.
|
||||
if (result.is_empty())
|
||||
return Error::from_string_literal("Empty domain");
|
||||
|
||||
// 4. Return result.
|
||||
return result;
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#concept-host-parser
|
||||
ErrorOr<URL> create_unicode_url(String const& url_string)
|
||||
{
|
||||
// NOTE: 1.-4. are implemented in URLParser::parse_host
|
||||
|
||||
URL url = url_string;
|
||||
if (!url.is_valid() || !url.host().has<String>())
|
||||
return url;
|
||||
|
||||
auto& domain = url.host().get<String>();
|
||||
if (domain.is_empty())
|
||||
return url;
|
||||
|
||||
// 5. Let asciiDomain be the result of running domain to ASCII with domain and false.
|
||||
// 6. If asciiDomain is failure, then return failure.
|
||||
auto ascii_domain = TRY(domain_to_ascii(domain.bytes_as_string_view(), false));
|
||||
|
||||
// FIXME: Reimplement 7. or call into URLParser::parse_host using ascii_domain (8. & 9. do not apply)
|
||||
url.set_host(ascii_domain);
|
||||
return url;
|
||||
}
|
||||
|
||||
}
|
16
Userland/Libraries/LibUnicode/URL.h
Normal file
16
Userland/Libraries/LibUnicode/URL.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <AK/URL.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
ErrorOr<URL> create_unicode_url(String const&);
|
||||
|
||||
}
|
Loading…
Reference in a new issue