mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-04 21:40:33 +00:00
LibUnicode: Canonicalize locale extensions
This commit is contained in:
parent
671eaa0c59
commit
6f0cb52dc4
Notes:
sideshowbarker
2024-07-18 05:04:25 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/6f0cb52dc48 Pull-request: https://github.com/SerenityOS/serenity/pull/9668 Reviewed-by: https://github.com/linusg
2 changed files with 104 additions and 2 deletions
|
@ -277,7 +277,7 @@ TEST_CASE(canonicalize_unicode_locale_id)
|
|||
VERIFY(locale_id.has_value());
|
||||
|
||||
auto canonical_locale = Unicode::canonicalize_unicode_locale_id(*locale_id);
|
||||
EXPECT_EQ(canonical_locale, expected_canonical_locale);
|
||||
EXPECT_EQ(*canonical_locale, expected_canonical_locale);
|
||||
};
|
||||
|
||||
test("aaa"sv, "aaa"sv);
|
||||
|
@ -287,4 +287,44 @@ TEST_CASE(canonicalize_unicode_locale_id)
|
|||
test("aaa-bBBB-cC"sv, "aaa-Bbbb-CC"sv);
|
||||
test("aaa-bbbb-cc-1234"sv, "aaa-Bbbb-CC-1234"sv);
|
||||
test("aaa-bbbb-cc-ABCDE"sv, "aaa-Bbbb-CC-abcde"sv);
|
||||
|
||||
test("en-u-aa"sv, "en-u-aa"sv);
|
||||
test("EN-U-AA"sv, "en-u-aa"sv);
|
||||
test("en-u-aa-bbb"sv, "en-u-aa-bbb"sv);
|
||||
test("EN-U-AA-BBB"sv, "en-u-aa-bbb"sv);
|
||||
test("en-u-aa-ccc-bbb"sv, "en-u-aa-ccc-bbb"sv);
|
||||
test("EN-U-AA-CCC-BBB"sv, "en-u-aa-ccc-bbb"sv);
|
||||
test("en-u-ddd-bbb-ccc"sv, "en-u-bbb-ccc-ddd"sv);
|
||||
test("EN-U-DDD-BBB-CCC"sv, "en-u-bbb-ccc-ddd"sv);
|
||||
test("en-u-2k-aaa-1k-bbb"sv, "en-u-1k-bbb-2k-aaa"sv);
|
||||
test("EN-U-2K-AAA-1K-BBB"sv, "en-u-1k-bbb-2k-aaa"sv);
|
||||
test("en-u-ccc-bbb-2k-aaa-1k-bbb"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv);
|
||||
test("EN-U-CCC-BBB-2K-AAA-1K-BBB"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv);
|
||||
test("en-u-1k-true"sv, "en-u-1k"sv);
|
||||
test("EN-U-1K-TRUE"sv, "en-u-1k"sv);
|
||||
|
||||
test("en-t-en"sv, "en-t-en"sv);
|
||||
test("EN-T-EN"sv, "en-t-en"sv);
|
||||
test("en-latn-t-en-latn"sv, "en-Latn-t-en-latn"sv);
|
||||
test("EN-LATN-T-EN-LATN"sv, "en-Latn-t-en-latn"sv);
|
||||
test("en-us-t-en-us"sv, "en-US-t-en-us"sv);
|
||||
test("EN-US-T-EN-US"sv, "en-US-t-en-us"sv);
|
||||
test("en-latn-us-t-en-latn-us"sv, "en-Latn-US-t-en-latn-us"sv);
|
||||
test("EN-LATN-US-T-EN-LATN-US"sv, "en-Latn-US-t-en-latn-us"sv);
|
||||
test("en-t-en-k2-bbb-k1-aaa"sv, "en-t-en-k1-aaa-k2-bbb"sv);
|
||||
test("EN-T-EN-K2-BBB-K1-AAA"sv, "en-t-en-k1-aaa-k2-bbb"sv);
|
||||
test("en-t-k1-true"sv, "en-t-k1-true"sv);
|
||||
test("EN-T-K1-TRUE"sv, "en-t-k1-true"sv);
|
||||
|
||||
test("en-0-aaa"sv, "en-0-aaa"sv);
|
||||
test("EN-0-AAA"sv, "en-0-aaa"sv);
|
||||
test("en-0-bbb-aaa"sv, "en-0-bbb-aaa"sv);
|
||||
test("EN-0-BBB-AAA"sv, "en-0-bbb-aaa"sv);
|
||||
test("en-z-bbb-0-aaa"sv, "en-0-aaa-z-bbb"sv);
|
||||
test("EN-Z-BBB-0-AAA"sv, "en-0-aaa-z-bbb"sv);
|
||||
|
||||
test("en-u-aa-t-en"sv, "en-t-en-u-aa"sv);
|
||||
test("EN-U-AA-T-EN"sv, "en-t-en-u-aa"sv);
|
||||
test("en-z-bbb-u-aa-t-en-0-aaa"sv, "en-0-aaa-t-en-u-aa-z-bbb"sv);
|
||||
test("EN-Z-BBB-U-AA-T-EN-0-AAA"sv, "en-0-aaa-t-en-u-aa-z-bbb"sv);
|
||||
}
|
||||
|
|
|
@ -518,7 +518,69 @@ Optional<String> canonicalize_unicode_locale_id(LocaleID& locale_id)
|
|||
for (auto const& variant : locale_id.language_id.variants)
|
||||
append_sep_and_string(variant);
|
||||
|
||||
// FIXME: Handle extensions and pu_extensions.
|
||||
quick_sort(locale_id.extensions, [](auto const& left, auto const& right) {
|
||||
auto key = [](auto const& extension) {
|
||||
return extension.visit(
|
||||
[](LocaleExtension const&) { return 'u'; },
|
||||
[](TransformedExtension const&) { return 't'; },
|
||||
[](OtherExtension const& ext) { return static_cast<char>(to_ascii_lowercase(ext.key)); });
|
||||
};
|
||||
|
||||
return key(left) < key(right);
|
||||
});
|
||||
|
||||
auto append_key_value_list = [&](auto const& key, auto const& values, bool remove_true_values) {
|
||||
append_sep_and_string(key);
|
||||
|
||||
for (auto const& type : values) {
|
||||
// Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec
|
||||
// because, for tvalues, that would result in invalid syntax:
|
||||
// https://unicode-org.atlassian.net/browse/CLDR-14318
|
||||
// This has also been noted by test262:
|
||||
// https://github.com/tc39/test262/blob/18bb955771669541c56c28748603f6afdb2e25ff/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js
|
||||
if (remove_true_values && type.equals_ignoring_case("true"sv))
|
||||
continue;
|
||||
append_sep_and_string(type);
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& extension : locale_id.extensions) {
|
||||
extension.visit(
|
||||
[&](LocaleExtension& ext) {
|
||||
quick_sort(ext.attributes);
|
||||
quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; });
|
||||
builder.append("-u"sv);
|
||||
|
||||
for (auto const& attribute : ext.attributes)
|
||||
append_sep_and_string(attribute);
|
||||
for (auto const& keyword : ext.keywords)
|
||||
append_key_value_list(keyword.key, keyword.types, true);
|
||||
},
|
||||
[&](TransformedExtension& ext) {
|
||||
quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; });
|
||||
builder.append("-t"sv);
|
||||
|
||||
if (ext.language.has_value()) {
|
||||
append_sep_and_string(ext.language->language);
|
||||
append_sep_and_string(ext.language->script);
|
||||
append_sep_and_string(ext.language->region);
|
||||
|
||||
quick_sort(ext.language->variants);
|
||||
for (auto const& variant : ext.language->variants)
|
||||
append_sep_and_string(variant);
|
||||
}
|
||||
|
||||
for (auto const& field : ext.fields)
|
||||
append_key_value_list(field.key, field.values, false);
|
||||
},
|
||||
[&](OtherExtension& ext) {
|
||||
builder.appendff("-{:c}", to_ascii_lowercase(ext.key));
|
||||
for (auto const& value : ext.values)
|
||||
append_sep_and_string(value);
|
||||
});
|
||||
}
|
||||
|
||||
// FIXME: Handle pu_extensions.
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue