mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-26 09:30:24 +00:00
LibUnicode: Do not limit language display names to known locales
Currently, the UnicodeLocale generator collects a list of known locales from the CLDR before processing language display names. For each locale, the identifier is broken into language, script, and region subtags, and we create a list of seen languages. When processing display names, we skip languages we hadn't seen in that first step. This is insufficient for language display names like "en-GB", which do not have an locale entry in the CLDR, and thus are skipped. So instead, create the list of known languages by actually reading through the list of languages which have a display name.
This commit is contained in:
parent
b0671ceb74
commit
c86f7a675d
Notes:
sideshowbarker
2024-07-17 20:55:55 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/c86f7a675dc Pull-request: https://github.com/SerenityOS/serenity/pull/11873
2 changed files with 35 additions and 7 deletions
|
@ -21,8 +21,8 @@
|
||||||
#include <LibCore/DirIterator.h>
|
#include <LibCore/DirIterator.h>
|
||||||
#include <LibCore/File.h>
|
#include <LibCore/File.h>
|
||||||
|
|
||||||
using StringIndexType = u16;
|
using StringIndexType = u32;
|
||||||
constexpr auto s_string_index_type = "u16"sv;
|
constexpr auto s_string_index_type = "u32"sv;
|
||||||
|
|
||||||
using DisplayPatternIndexType = u8;
|
using DisplayPatternIndexType = u8;
|
||||||
constexpr auto s_display_pattern_index_type = "u8"sv;
|
constexpr auto s_display_pattern_index_type = "u8"sv;
|
||||||
|
@ -319,8 +319,6 @@ static ErrorOr<void> parse_identity(String locale_path, UnicodeLocaleData& local
|
||||||
auto const& variant_string = identity_object.as_object().get("variant"sv);
|
auto const& variant_string = identity_object.as_object().get("variant"sv);
|
||||||
|
|
||||||
locale.language = language_string.as_string();
|
locale.language = language_string.as_string();
|
||||||
if (!locale_data.languages.contains_slow(locale.language))
|
|
||||||
locale_data.languages.append(locale.language);
|
|
||||||
|
|
||||||
if (territory_string.is_string()) {
|
if (territory_string.is_string()) {
|
||||||
locale.territory = territory_string.as_string();
|
locale.territory = territory_string.as_string();
|
||||||
|
@ -366,6 +364,27 @@ static ErrorOr<void> parse_locale_display_patterns(String locale_path, UnicodeLo
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ErrorOr<void> preprocess_languages(String locale_path, UnicodeLocaleData& locale_data)
|
||||||
|
{
|
||||||
|
LexicalPath languages_path(move(locale_path));
|
||||||
|
languages_path = languages_path.append("languages.json"sv);
|
||||||
|
|
||||||
|
auto languages_file = TRY(Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly));
|
||||||
|
auto locale_languages = TRY(JsonValue::from_string(languages_file->read_all()));
|
||||||
|
|
||||||
|
auto const& main_object = locale_languages.as_object().get("main"sv);
|
||||||
|
auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
|
||||||
|
auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
|
||||||
|
auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
|
||||||
|
|
||||||
|
languages_object.as_object().for_each_member([&](auto const& key, auto const&) {
|
||||||
|
if (!key.contains("-alt-"sv) && !locale_data.languages.contains_slow(key))
|
||||||
|
locale_data.languages.append(key);
|
||||||
|
});
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
static ErrorOr<void> parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
|
static ErrorOr<void> parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
|
||||||
{
|
{
|
||||||
LexicalPath languages_path(move(locale_path));
|
LexicalPath languages_path(move(locale_path));
|
||||||
|
@ -383,8 +402,11 @@ static ErrorOr<void> parse_locale_languages(String locale_path, UnicodeLocaleDat
|
||||||
languages.resize(locale_data.languages.size());
|
languages.resize(locale_data.languages.size());
|
||||||
|
|
||||||
languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
|
languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
|
||||||
if (auto index = locale_data.languages.find_first_index(key); index.has_value())
|
if (key.contains("-alt-"sv))
|
||||||
languages[*index] = locale_data.unique_strings.ensure(value.as_string());
|
return;
|
||||||
|
|
||||||
|
auto index = locale_data.languages.find_first_index(key).value();
|
||||||
|
languages[index] = locale_data.unique_strings.ensure(value.as_string());
|
||||||
});
|
});
|
||||||
|
|
||||||
locale.languages = locale_data.unique_language_lists.ensure(move(languages));
|
locale.languages = locale_data.unique_language_lists.ensure(move(languages));
|
||||||
|
@ -802,6 +824,7 @@ static ErrorOr<void> define_aliases_without_scripts(UnicodeLocaleData& locale_da
|
||||||
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, String dates_path, UnicodeLocaleData& locale_data)
|
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, String dates_path, UnicodeLocaleData& locale_data)
|
||||||
{
|
{
|
||||||
auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path));
|
auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path));
|
||||||
|
auto preprocess_iterator = TRY(path_to_dir_iterator(locale_names_path));
|
||||||
auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
|
auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
|
||||||
auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path)));
|
auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path)));
|
||||||
auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
|
auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
|
||||||
|
@ -835,6 +858,11 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
|
||||||
TRY(parse_identity(locale_path, locale_data, locale));
|
TRY(parse_identity(locale_path, locale_data, locale));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (preprocess_iterator.has_next()) {
|
||||||
|
auto locale_path = TRY(next_path_from_dir_iterator(preprocess_iterator));
|
||||||
|
TRY(preprocess_languages(locale_path, locale_data));
|
||||||
|
}
|
||||||
|
|
||||||
quick_sort(locale_data.languages);
|
quick_sort(locale_data.languages);
|
||||||
quick_sort(locale_data.territories);
|
quick_sort(locale_data.territories);
|
||||||
quick_sort(locale_data.scripts);
|
quick_sort(locale_data.scripts);
|
||||||
|
|
|
@ -25,7 +25,7 @@ enum class GeneralCategory : u8;
|
||||||
enum class HourCycle : u8;
|
enum class HourCycle : u8;
|
||||||
enum class HourCycleRegion : u8;
|
enum class HourCycleRegion : u8;
|
||||||
enum class Key : u8;
|
enum class Key : u8;
|
||||||
enum class Language : u8;
|
enum class Language : u16;
|
||||||
enum class ListPatternStyle : u8;
|
enum class ListPatternStyle : u8;
|
||||||
enum class ListPatternType : u8;
|
enum class ListPatternType : u8;
|
||||||
enum class Locale : u16;
|
enum class Locale : u16;
|
||||||
|
|
Loading…
Reference in a new issue