mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibUnicode: Port generator utility methods to ErrorOr
Most of these were VERIFY-ing for success, but propagating an error message up to serenity_main() is much nicer than just a SIGABRT.
This commit is contained in:
parent
7f780e43a6
commit
0aa3e5c2ea
Notes:
sideshowbarker
2024-07-18 00:46:21 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/0aa3e5c2ea7 Pull-request: https://github.com/SerenityOS/serenity/pull/11043
3 changed files with 87 additions and 109 deletions
|
@ -77,17 +77,21 @@ struct UnicodeLocaleData {
|
|||
size_t max_variant_size { 0 };
|
||||
};
|
||||
|
||||
static Optional<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
|
||||
// Some parsing is expected to fail. For example, the CLDR contains language mappings
|
||||
// with locales such as "en-GB-oed" that are canonically invalid locale IDs.
|
||||
#define TRY_OR_DISCARD(expression) \
|
||||
({ \
|
||||
auto _temporary_result = (expression); \
|
||||
if (_temporary_result.is_error()) \
|
||||
return; \
|
||||
_temporary_result.release_value(); \
|
||||
})
|
||||
|
||||
static ErrorOr<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
|
||||
{
|
||||
auto parsed_key = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, key);
|
||||
if (!parsed_key.has_value())
|
||||
return {};
|
||||
|
||||
auto parsed_alias = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, alias);
|
||||
if (!parsed_alias.has_value())
|
||||
return {};
|
||||
|
||||
return LanguageMapping { parsed_key.release_value(), parsed_alias.release_value() };
|
||||
auto parsed_key = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, key));
|
||||
auto parsed_alias = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, alias));
|
||||
return LanguageMapping { move(parsed_key), move(parsed_alias) };
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
|
||||
|
@ -107,13 +111,10 @@ static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLo
|
|||
auto alias = value.as_object().get("_replacement"sv).as_string();
|
||||
|
||||
if (key.contains('-')) {
|
||||
auto mapping = parse_language_mapping(locale_data, key, alias);
|
||||
if (!mapping.has_value())
|
||||
return;
|
||||
|
||||
locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.complex_mappings.append(mapping.release_value());
|
||||
auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, alias));
|
||||
locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.complex_mappings.append(move(mapping));
|
||||
} else {
|
||||
alias_map.set(key, locale_data.unique_strings.ensure(alias));
|
||||
}
|
||||
|
@ -141,13 +142,10 @@ static ErrorOr<void> parse_likely_subtags(String core_supplemental_path, Unicode
|
|||
auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
|
||||
|
||||
likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
|
||||
auto mapping = parse_language_mapping(locale_data, key, value.as_string());
|
||||
if (!mapping.has_value())
|
||||
return;
|
||||
|
||||
locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.likely_subtags.append(mapping.release_value());
|
||||
auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, value.as_string()));
|
||||
locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
|
||||
locale_data.likely_subtags.append(move(mapping));
|
||||
});
|
||||
|
||||
return {};
|
||||
|
@ -417,7 +415,7 @@ static ErrorOr<void> parse_default_content_locales(String core_path, UnicodeLoca
|
|||
return {};
|
||||
}
|
||||
|
||||
static void define_aliases_without_scripts(UnicodeLocaleData& locale_data)
|
||||
static ErrorOr<void> define_aliases_without_scripts(UnicodeLocaleData& locale_data)
|
||||
{
|
||||
// From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
|
||||
//
|
||||
|
@ -431,40 +429,42 @@ static void define_aliases_without_scripts(UnicodeLocaleData& locale_data)
|
|||
return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; });
|
||||
};
|
||||
|
||||
auto append_alias_without_script = [&](auto const& locale) {
|
||||
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale);
|
||||
VERIFY(parsed_locale.has_value());
|
||||
|
||||
if ((parsed_locale->language == 0) || (parsed_locale->script == 0) || (parsed_locale->region == 0))
|
||||
return;
|
||||
auto append_alias_without_script = [&](auto const& locale) -> ErrorOr<void> {
|
||||
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale));
|
||||
if ((parsed_locale.language == 0) || (parsed_locale.script == 0) || (parsed_locale.region == 0))
|
||||
return {};
|
||||
|
||||
auto locale_without_script = String::formatted("{}-{}",
|
||||
locale_data.unique_strings.get(parsed_locale->language),
|
||||
locale_data.unique_strings.get(parsed_locale->region));
|
||||
locale_data.unique_strings.get(parsed_locale.language),
|
||||
locale_data.unique_strings.get(parsed_locale.region));
|
||||
|
||||
if (locale_data.locales.contains(locale_without_script))
|
||||
return;
|
||||
return {};
|
||||
if (find_alias(locale_without_script) != locale_data.locale_aliases.end())
|
||||
return;
|
||||
return {};
|
||||
|
||||
if (auto it = find_alias(locale); it != locale_data.locale_aliases.end())
|
||||
locale_data.locale_aliases.append({ it->name, locale_without_script });
|
||||
else
|
||||
locale_data.locale_aliases.append({ locale, locale_without_script });
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
for (auto const& locale : locale_data.locales)
|
||||
append_alias_without_script(locale.key);
|
||||
TRY(append_alias_without_script(locale.key));
|
||||
for (auto const& locale : locale_data.locale_aliases)
|
||||
append_alias_without_script(locale.alias);
|
||||
TRY(append_alias_without_script(locale.alias));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
auto identity_iterator = path_to_dir_iterator(locale_names_path);
|
||||
auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path));
|
||||
auto misc_iterator = path_to_dir_iterator(move(misc_path));
|
||||
auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
|
||||
auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path));
|
||||
auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
|
||||
auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path)));
|
||||
auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
|
||||
|
||||
LexicalPath core_supplemental_path(core_path);
|
||||
core_supplemental_path = core_supplemental_path.append("supplemental"sv);
|
||||
|
@ -473,74 +473,56 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
|
|||
TRY(parse_core_aliases(core_supplemental_path.string(), locale_data));
|
||||
TRY(parse_likely_subtags(core_supplemental_path.string(), locale_data));
|
||||
|
||||
auto remove_variants_from_path = [&](String path) -> Optional<String> {
|
||||
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path));
|
||||
if (!parsed_locale.has_value())
|
||||
return {};
|
||||
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
|
||||
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale->language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale->script); !script.is_empty())
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale.language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
|
||||
builder.appendff("-{}", script);
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale->region); !region.is_empty())
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
|
||||
builder.appendff("-{}", region);
|
||||
|
||||
return builder.build();
|
||||
};
|
||||
|
||||
while (identity_iterator.has_next()) {
|
||||
auto locale_path = identity_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(locale_path));
|
||||
auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator));
|
||||
auto language = TRY(remove_variants_from_path(locale_path));
|
||||
|
||||
auto language = remove_variants_from_path(locale_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_identity(locale_path, locale_data, locale));
|
||||
}
|
||||
|
||||
while (locale_names_iterator.has_next()) {
|
||||
auto locale_path = locale_names_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(locale_path));
|
||||
auto locale_path = TRY(next_path_from_dir_iterator(locale_names_iterator));
|
||||
auto language = TRY(remove_variants_from_path(locale_path));
|
||||
|
||||
auto language = remove_variants_from_path(locale_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_locale_languages(locale_path, locale_data, locale));
|
||||
TRY(parse_locale_territories(locale_path, locale_data, locale));
|
||||
TRY(parse_locale_scripts(locale_path, locale_data, locale));
|
||||
}
|
||||
|
||||
while (misc_iterator.has_next()) {
|
||||
auto misc_path = misc_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(misc_path));
|
||||
auto misc_path = TRY(next_path_from_dir_iterator(misc_iterator));
|
||||
auto language = TRY(remove_variants_from_path(misc_path));
|
||||
|
||||
auto language = remove_variants_from_path(misc_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_locale_list_patterns(misc_path, locale_data, locale));
|
||||
}
|
||||
|
||||
while (numbers_iterator.has_next()) {
|
||||
auto numbers_path = numbers_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(numbers_path));
|
||||
auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
|
||||
auto language = TRY(remove_variants_from_path(numbers_path));
|
||||
|
||||
auto language = remove_variants_from_path(numbers_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_locale_currencies(numbers_path, locale_data, locale));
|
||||
TRY(parse_numeric_keywords(numbers_path, locale_data, locale));
|
||||
}
|
||||
|
||||
TRY(parse_default_content_locales(move(core_path), locale_data));
|
||||
define_aliases_without_scripts(locale_data);
|
||||
TRY(define_aliases_without_scripts(locale_data));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -445,45 +445,35 @@ static ErrorOr<void> parse_units(String locale_units_path, UnicodeLocaleData& lo
|
|||
|
||||
static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
|
||||
auto units_iterator = path_to_dir_iterator(move(units_path));
|
||||
auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
|
||||
auto units_iterator = TRY(path_to_dir_iterator(move(units_path)));
|
||||
|
||||
auto remove_variants_from_path = [&](String path) -> Optional<String> {
|
||||
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path));
|
||||
if (!parsed_locale.has_value())
|
||||
return {};
|
||||
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
|
||||
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale->language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale->script); !script.is_empty())
|
||||
builder.append(locale_data.unique_strings.get(parsed_locale.language));
|
||||
if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
|
||||
builder.appendff("-{}", script);
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale->region); !region.is_empty())
|
||||
if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
|
||||
builder.appendff("-{}", region);
|
||||
|
||||
return builder.build();
|
||||
};
|
||||
|
||||
while (numbers_iterator.has_next()) {
|
||||
auto numbers_path = numbers_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(numbers_path));
|
||||
auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
|
||||
auto language = TRY(remove_variants_from_path(numbers_path));
|
||||
|
||||
auto language = remove_variants_from_path(numbers_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_number_systems(numbers_path, locale_data, locale));
|
||||
}
|
||||
|
||||
while (units_iterator.has_next()) {
|
||||
auto units_path = units_iterator.next_full_path();
|
||||
VERIFY(Core::File::is_directory(units_path));
|
||||
auto units_path = TRY(next_path_from_dir_iterator(units_iterator));
|
||||
auto language = TRY(remove_variants_from_path(units_path));
|
||||
|
||||
auto language = remove_variants_from_path(units_path);
|
||||
if (!language.has_value())
|
||||
continue;
|
||||
|
||||
auto& locale = locale_data.locales.ensure(*language);
|
||||
auto& locale = locale_data.locales.ensure(language);
|
||||
TRY(parse_units(units_path, locale_data, locale));
|
||||
}
|
||||
|
||||
|
|
|
@ -94,7 +94,7 @@ struct Alias {
|
|||
|
||||
template<typename StringIndexType>
|
||||
struct CanonicalLanguageID {
|
||||
static Optional<CanonicalLanguageID> parse(UniqueStringStorage<StringIndexType>& unique_strings, StringView language)
|
||||
static ErrorOr<CanonicalLanguageID> parse(UniqueStringStorage<StringIndexType>& unique_strings, StringView language)
|
||||
{
|
||||
CanonicalLanguageID language_id {};
|
||||
|
||||
|
@ -107,7 +107,7 @@ struct CanonicalLanguageID {
|
|||
if (segments.size() == ++index)
|
||||
return language_id;
|
||||
} else {
|
||||
return {};
|
||||
return Error::from_string_literal("Expected language subtag"sv);
|
||||
}
|
||||
|
||||
if (Unicode::is_unicode_script_subtag(segments[index])) {
|
||||
|
@ -124,7 +124,7 @@ struct CanonicalLanguageID {
|
|||
|
||||
while (index < segments.size()) {
|
||||
if (!Unicode::is_unicode_variant_subtag(segments[index]))
|
||||
return {};
|
||||
return Error::from_string_literal("Expected variant subtag"sv);
|
||||
language_id.variants.append(unique_strings.ensure(segments[index++]));
|
||||
}
|
||||
|
||||
|
@ -137,21 +137,27 @@ struct CanonicalLanguageID {
|
|||
Vector<StringIndexType> variants {};
|
||||
};
|
||||
|
||||
inline Core::DirIterator path_to_dir_iterator(String path)
|
||||
inline ErrorOr<Core::DirIterator> path_to_dir_iterator(String path)
|
||||
{
|
||||
LexicalPath lexical_path(move(path));
|
||||
lexical_path = lexical_path.append("main"sv);
|
||||
VERIFY(Core::File::is_directory(lexical_path.string()));
|
||||
|
||||
Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir);
|
||||
if (iterator.has_error()) {
|
||||
warnln("{}: {}", lexical_path.string(), iterator.error_string());
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
if (iterator.has_error())
|
||||
return Error::from_string_literal(iterator.error_string());
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
inline ErrorOr<String> next_path_from_dir_iterator(Core::DirIterator& iterator)
|
||||
{
|
||||
auto next_path = iterator.next_full_path();
|
||||
if (iterator.has_error())
|
||||
return Error::from_string_literal(iterator.error_string());
|
||||
|
||||
return next_path;
|
||||
}
|
||||
|
||||
inline void ensure_from_string_types_are_generated(SourceGenerator& generator)
|
||||
{
|
||||
static bool generated_from_string_types = false;
|
||||
|
|
Loading…
Reference in a new issue