ladybird/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp
Timothy Flynn e8dd2eea74 LibJS: Implement the Intl.DisplayNames constructor
There is notably FIXME notations in this commit regarding Unicode locale
extensions. We are not parsing extensions (or private use extensions) at
all yet.
2021-08-26 22:04:09 +01:00

380 lines
17 KiB
C++

/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/AllOf.h>
#include <AK/CharacterTypes.h>
#include <AK/QuickSort.h>
#include <AK/TypeCasts.h>
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/Intl/AbstractOperations.h>
#include <LibUnicode/Locale.h>
namespace JS::Intl {
// 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale)
{
// IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
// locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier;
auto locale_id = Unicode::parse_unicode_locale_id(locale);
if (!locale_id.has_value())
return {};
// locale does not use any of the backwards compatibility syntax described in Unicode Technical Standard #35 LDML § 3.3 BCP 47 Conformance;
// https://unicode.org/reports/tr35/#BCP_47_Conformance
if (locale.contains('_') || locale_id->language_id.is_root || !locale_id->language_id.language.has_value())
return {};
// the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and
if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) {
quick_sort(variants);
for (size_t i = 0; i < variants.size() - 1; ++i) {
if (variants[i] == variants[i + 1])
return {};
}
}
// FIXME: Handle extensions.
// if locale contains an extensions* component, that component
// does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
// contains at most one unicode_locale_extensions component,
// contains at most one transformed_extensions component, and
// if a transformed_extensions component that contains a tlang component is present, then
// the tlang component contains no duplicate unicode_variant_subtag subtags.
return locale_id;
}
// 6.2.3 CanonicalizeUnicodeLocaleId ( locale ), https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
static String canonicalize_unicode_locale_id(Unicode::LocaleID& locale)
{
// 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers.
// 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form.
auto locale_id = Unicode::canonicalize_unicode_locale_id(locale);
VERIFY(locale_id.has_value());
// FIXME: Handle extensions.
// 3. If localeId contains a substring extension that is a Unicode locale extension sequence, then
// a. Let components be ! UnicodeExtensionComponents(extension).
// b. Let attributes be components.[[Attributes]].
// c. Let keywords be components.[[Keywords]].
// d. Let newExtension be "u".
// e. For each element attr of attributes, do
// i. Append "-" to newExtension.
// ii. Append attr to newExtension.
// f. For each Record { [[Key]], [[Value]] } keyword in keywords, do
// i. Append "-" to newExtension.
// ii. Append keyword.[[Key]] to newExtension.
// iii. If keyword.[[Value]] is not the empty String, then
// 1. Append "-" to newExtension.
// 2. Append keyword.[[Value]] to newExtension.
// g. Assert: newExtension is not equal to "u".
// h. Let localeId be localeId with the substring corresponding to extension replaced by the string newExtension.
// 4. Return localeId.
return locale_id.release_value();
}
// 6.3.1 IsWellFormedCurrencyCode ( currency ), https://tc39.es/ecma402/#sec-canonicalcodefordisplaynames
static bool is_well_formed_currency_code(StringView currency)
{
// 1. Let normalized be the result of mapping currency to upper case as described in 6.1.
// 2. If the number of elements in normalized is not 3, return false.
if (currency.length() != 3)
return false;
// 3. If normalized contains any character that is not in the range "A" to "Z" (U+0041 to U+005A), return false.
if (!all_of(currency, is_ascii_alpha))
return false;
// 4. Return true.
return true;
}
// 9.2.1 CanonicalizeLocaleList ( locales ), https://tc39.es/ecma402/#sec-canonicalizelocalelist
Vector<String> canonicalize_locale_list(GlobalObject& global_object, Value locales)
{
auto& vm = global_object.vm();
// 1. If locales is undefined, then
if (locales.is_undefined()) {
// a. Return a new empty List.
return {};
}
// 2. Let seen be a new empty List.
Vector<String> seen;
Object* object = nullptr;
// 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then
// FIXME: When we have an Intl.Locale object, handle it it here.
if (locales.is_string()) {
// a. Let O be CreateArrayFromList(« locales »).
object = Array::create_from(global_object, { locales });
}
// 4. Else,
else {
// a. Let O be ? ToObject(locales).
object = locales.to_object(global_object);
if (vm.exception())
return {};
}
// 5. Let len be ? ToLength(? Get(O, "length")).
auto length_value = object->get(vm.names.length);
if (vm.exception())
return {};
auto length = length_value.to_length(global_object);
if (vm.exception())
return {};
// 6. Let k be 0.
// 7. Repeat, while k < len,
for (size_t k = 0; k < length; ++k) {
// a. Let Pk be ToString(k).
auto property_key = PropertyName { k };
// b. Let kPresent be ? HasProperty(O, Pk).
auto key_present = object->has_property(property_key);
if (vm.exception())
return {};
// c. If kPresent is true, then
if (key_present) {
// i. Let kValue be ? Get(O, Pk).
auto key_value = object->get(property_key);
if (vm.exception())
return {};
// ii. If Type(kValue) is not String or Object, throw a TypeError exception.
if (!key_value.is_string() && !key_value.is_object()) {
vm.throw_exception<TypeError>(global_object, ErrorType::NotAnObjectOrString, key_value.to_string_without_side_effects());
return {};
}
// iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
// 1. Let tag be kValue.[[Locale]].
// iv. Else,
// 1. Let tag be ? ToString(kValue).
// FIXME: When we have an Intl.Locale object, handle it it here.
auto tag = key_value.to_string(global_object);
if (vm.exception())
return {};
// v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
auto locale_id = is_structurally_valid_language_tag(tag);
if (!locale_id.has_value()) {
vm.throw_exception<RangeError>(global_object, ErrorType::IntlInvalidLanguageTag, tag);
return {};
}
// vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id);
// vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
if (!seen.contains_slow(canonicalized_tag))
seen.append(move(canonicalized_tag));
}
// d. Increase k by 1.
}
return seen;
}
// 9.2.2 BestAvailableLocale ( availableLocales, locale ), https://tc39.es/ecma402/#sec-bestavailablelocale
static Optional<String> best_available_locale(StringView const& locale)
{
// 1. Let candidate be locale.
StringView candidate = locale;
// 2. Repeat,
while (true) {
// a. If availableLocales contains an element equal to candidate, return candidate.
if (Unicode::is_locale_available(candidate))
return candidate;
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
auto pos = candidate.find_last('-');
if (!pos.has_value())
return {};
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
if ((*pos >= 2) && (candidate[*pos - 2] == '-'))
pos = *pos - 2;
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
candidate = candidate.substring_view(0, *pos);
}
}
// 9.2.3 LookupMatcher ( availableLocales, requestedLocales ), https://tc39.es/ecma402/#sec-lookupmatcher
static LocaleResult lookup_matcher(Vector<String> const& requested_locales)
{
// 1. Let result be a new Record.
LocaleResult result {};
// 2. For each element locale of requestedLocales, do
for (auto const& locale : requested_locales) {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
auto const& no_extensions_locale = locale; // FIXME: Handle extensions.
// b. Let availableLocale be BestAvailableLocale(availableLocales, noExtensionsLocale).
auto available_locale = best_available_locale(no_extensions_locale);
// c. If availableLocale is not undefined, then
if (available_locale.has_value()) {
// i. Set result.[[locale]] to availableLocale.
result.locale = available_locale.release_value();
// FIXME: Handle extensions.
// ii. If locale and noExtensionsLocale are not the same String value, then
// 1. Let extension be the String value consisting of the substring of the Unicode locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
// iii. Return result.
return result;
}
}
// 3. Let defLocale be DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
result.locale = Unicode::default_locale();
// 5. Return result.
return result;
}
// 9.2.4 BestFitMatcher ( availableLocales, requestedLocales ), https://tc39.es/ecma402/#sec-bestfitmatcher
static LocaleResult best_fit_matcher(Vector<String> const& requested_locales)
{
// The algorithm is implementation dependent, but should produce results that a typical user of the requested locales would
// perceive as at least as good as those produced by the LookupMatcher abstract operation.
return lookup_matcher(requested_locales);
}
// 9.2.7 ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData ), https://tc39.es/ecma402/#sec-resolvelocale
LocaleResult resolve_locale(Vector<String> const& requested_locales, LocaleOptions const& options, [[maybe_unused]] Vector<StringView> relevant_extension_keys)
{
// 1. Let matcher be options.[[localeMatcher]].
auto const& matcher = options.locale_matcher;
LocaleResult result;
// 2. If matcher is "lookup", then
if (matcher.is_string() && (matcher.as_string().string() == "lookup"sv)) {
// a. Let r be LookupMatcher(availableLocales, requestedLocales).
result = lookup_matcher(requested_locales);
}
// 3. Else,
else {
// a. Let r be BestFitMatcher(availableLocales, requestedLocales).
result = best_fit_matcher(requested_locales);
}
// 4. Let foundLocale be r.[[locale]].
// 5. Let result be a new Record.
// 6. Set result.[[dataLocale]] to foundLocale.
// FIXME: Handle extensions.
// 7. If r has an [[extension]] field, then
// a. Let components be ! UnicodeExtensionComponents(r.[[extension]]).
// b. Let keywords be components.[[Keywords]].
// 8. Let supportedExtension be "-u".
// 9. For each element key of relevantExtensionKeys, do
// a. Let foundLocaleData be localeData.[[<foundLocale>]].
// b. Assert: Type(foundLocaleData) is Record.
// c. Let keyLocaleData be foundLocaleData.[[<key>]].
// d. Assert: Type(keyLocaleData) is List.
// e. Let value be keyLocaleData[0].
// f. Assert: Type(value) is either String or Null.
// g. Let supportedExtensionAddition be "".
// h. If r has an [[extension]] field, then
// i. If keywords contains an element whose [[Key]] is the same as key, then
// 1. Let entry be the element of keywords whose [[Key]] is the same as key.
// 2. Let requestedValue be entry.[[Value]].
// 3. If requestedValue is not the empty String, then
// a. If keyLocaleData contains requestedValue, then
// i. Let value be requestedValue.
// ii. Let supportedExtensionAddition be the string-concatenation of "-", key, "-", and value.
// 4. Else if keyLocaleData contains "true", then
// a. Let value be "true".
// b. Let supportedExtensionAddition be the string-concatenation of "-" and key.
// i. If options has a field [[<key>]], then
// i. Let optionsValue be options.[[<key>]].
// ii. Assert: Type(optionsValue) is either String, Undefined, or Null.
// iii. If Type(optionsValue) is String, then
// 1. Let optionsValue be the string optionsValue after performing the algorithm steps to transform Unicode extension values to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
// 2. Let optionsValue be the string optionsValue after performing the algorithm steps to replace Unicode extension values with their canonical form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
// 3. If optionsValue is the empty String, then
// a. Let optionsValue be "true".
// iv. If keyLocaleData contains optionsValue, then
// 1. If SameValue(optionsValue, value) is false, then
// a. Let value be optionsValue.
// b. Let supportedExtensionAddition be "".
// j. Set result.[[<key>]] to value.
// k. Append supportedExtensionAddition to supportedExtension.
// 10. If the number of elements in supportedExtension is greater than 2, then
// a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension).
// 11. Set result.[[locale]] to foundLocale.
// 12. Return result.
return result;
}
// 9.2.13 GetOption ( options, property, type, values, fallback ), https://tc39.es/ecma402/#sec-getoption
Value get_option(GlobalObject& global_object, Value options, PropertyName const& property, Value::Type type, Vector<StringView> const& values, Fallback fallback)
{
auto& vm = global_object.vm();
// 1. Assert: Type(options) is Object.
VERIFY(options.is_object());
// 2. Let value be ? Get(options, property).
auto value = options.get(global_object, property);
if (vm.exception())
return {};
// 3. If value is undefined, return fallback.
if (value.is_undefined()) {
return fallback.visit(
[](Empty) { return js_undefined(); },
[](bool f) { return Value(f); },
[&vm](StringView f) { return Value(js_string(vm, f)); });
}
// 4. Assert: type is "boolean" or "string".
VERIFY((type == Value::Type::Boolean) || (type == Value::Type::String));
// 5. If type is "boolean", then
if (type == Value::Type::Boolean) {
// a. Let value be ! ToBoolean(value).
value = Value(value.to_boolean());
}
// 6. If type is "string", then
else {
// a. Let value be ? ToString(value).
value = value.to_primitive_string(global_object);
if (vm.exception())
return {};
}
// 7. If values is not undefined and values does not contain an element equal to value, throw a RangeError exception.
if (!values.is_empty()) {
// Note: Every location in the spec that invokes GetOption with type=boolean also has values=undefined.
VERIFY(value.is_string());
if (!values.contains_slow(value.as_string().string())) {
vm.throw_exception<RangeError>(global_object, ErrorType::OptionIsNotValidValue, value.to_string_without_side_effects(), property.as_string());
return {};
}
}
// 8. Return value.
return value;
}
}