mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-11 17:00:37 +00:00
LibJS+LibUnicode: Support multiple identifiers within format pattern
This wasn't the case for compact patterns, but unit patterns can contain multiple (up to 2, really) identifiers that must each be recognized by LibJS. Each generated NumberFormat object now stores an array of identifiers parsed. The format pattern itself is encoded with the index into this array for that identifier, e.g. the compact format string "0K" will become "{number}{compactIdentifier:0}".
This commit is contained in:
parent
3b68370212
commit
04b8b87c17
Notes:
sideshowbarker
2024-07-18 01:04:14 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/04b8b87c17b Pull-request: https://github.com/SerenityOS/serenity/pull/10939 Reviewed-by: https://github.com/linusg ✅
3 changed files with 72 additions and 40 deletions
|
@ -57,7 +57,7 @@ struct NumberFormat : public Unicode::NumberFormat {
|
|||
StringIndexType zero_format_index { 0 };
|
||||
StringIndexType positive_format_index { 0 };
|
||||
StringIndexType negative_format_index { 0 };
|
||||
StringIndexType identifier_index { 0 };
|
||||
Vector<StringIndexType> identifier_indices {};
|
||||
};
|
||||
|
||||
struct NumberSystem {
|
||||
|
@ -88,46 +88,61 @@ struct UnicodeLocaleData {
|
|||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
HashMap<String, Locale> locales;
|
||||
Vector<String> numeric_symbols;
|
||||
size_t max_identifier_count { 0 };
|
||||
};
|
||||
|
||||
static String parse_identifier(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
|
||||
static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
|
||||
{
|
||||
static Utf8View whitespace { "\u0020\u00a0"sv };
|
||||
|
||||
Utf8View utf8_pattern { pattern };
|
||||
Optional<size_t> start_index;
|
||||
Optional<size_t> end_index;
|
||||
bool inside_replacement = false;
|
||||
while (true) {
|
||||
Utf8View utf8_pattern { pattern };
|
||||
Optional<size_t> start_index;
|
||||
Optional<size_t> end_index;
|
||||
bool inside_replacement = false;
|
||||
|
||||
for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
|
||||
if (*it == '{') {
|
||||
if (start_index.has_value()) {
|
||||
end_index = utf8_pattern.byte_offset_of(it);
|
||||
break;
|
||||
for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
|
||||
if (*it == '{') {
|
||||
if (start_index.has_value()) {
|
||||
end_index = utf8_pattern.byte_offset_of(it);
|
||||
break;
|
||||
}
|
||||
|
||||
inside_replacement = true;
|
||||
} else if (*it == '}') {
|
||||
inside_replacement = false;
|
||||
} else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
|
||||
start_index = utf8_pattern.byte_offset_of(it);
|
||||
}
|
||||
|
||||
inside_replacement = true;
|
||||
} else if (*it == '}') {
|
||||
inside_replacement = false;
|
||||
} else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
|
||||
start_index = utf8_pattern.byte_offset_of(it);
|
||||
}
|
||||
|
||||
if (!start_index.has_value())
|
||||
return pattern;
|
||||
|
||||
end_index = end_index.value_or(pattern.length());
|
||||
|
||||
utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
|
||||
utf8_pattern = utf8_pattern.trim(whitespace);
|
||||
|
||||
auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
|
||||
auto identifier_index = locale_data.unique_strings.ensure(move(identifier));
|
||||
size_t replacement_index = 0;
|
||||
|
||||
if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) {
|
||||
replacement_index = *index;
|
||||
} else {
|
||||
replacement_index = format.identifier_indices.size();
|
||||
format.identifier_indices.append(identifier_index);
|
||||
|
||||
locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size());
|
||||
}
|
||||
|
||||
pattern = String::formatted("{}{{{}:{}}}{}",
|
||||
*start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
|
||||
replacement,
|
||||
replacement_index,
|
||||
pattern.substring_view(*start_index + utf8_pattern.byte_length()));
|
||||
}
|
||||
|
||||
if (!start_index.has_value())
|
||||
return pattern;
|
||||
end_index = end_index.value_or(pattern.length());
|
||||
|
||||
utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
|
||||
utf8_pattern = utf8_pattern.trim(whitespace);
|
||||
|
||||
auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
|
||||
format.identifier_index = locale_data.unique_strings.ensure(move(identifier));
|
||||
|
||||
return String::formatted("{}{}{}",
|
||||
*start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
|
||||
replacement,
|
||||
pattern.substring_view(*start_index + utf8_pattern.byte_length()));
|
||||
}
|
||||
|
||||
static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
|
||||
|
@ -188,7 +203,7 @@ static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& loc
|
|||
}
|
||||
|
||||
if (type == NumberFormatType::Compact)
|
||||
return parse_identifier(move(pattern), "{compactIdentifier}"sv, locale_data, format);
|
||||
return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format);
|
||||
|
||||
return pattern;
|
||||
};
|
||||
|
@ -413,6 +428,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
|
|||
SourceGenerator generator { builder };
|
||||
generator.set("string_index_type"sv, s_string_index_type);
|
||||
generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size()));
|
||||
generator.set("identifier_count", String::number(locale_data.max_identifier_count));
|
||||
|
||||
generator.append(R"~~~(
|
||||
#include <AK/Array.h>
|
||||
|
@ -437,7 +453,10 @@ struct NumberFormat {
|
|||
number_format.zero_format = s_string_list[zero_format];
|
||||
number_format.positive_format = s_string_list[positive_format];
|
||||
number_format.negative_format = s_string_list[negative_format];
|
||||
number_format.identifier = s_string_list[identifier];
|
||||
|
||||
number_format.identifiers.ensure_capacity(identifiers.size());
|
||||
for (@string_index_type@ identifier : identifiers)
|
||||
number_format.identifiers.append(s_string_list[identifier]);
|
||||
|
||||
return number_format;
|
||||
}
|
||||
|
@ -448,7 +467,7 @@ struct NumberFormat {
|
|||
@string_index_type@ zero_format { 0 };
|
||||
@string_index_type@ positive_format { 0 };
|
||||
@string_index_type@ negative_format { 0 };
|
||||
@string_index_type@ identifier { 0 };
|
||||
Array<@string_index_type@, @identifier_count@> identifiers {};
|
||||
};
|
||||
|
||||
struct NumberSystem {
|
||||
|
@ -479,8 +498,18 @@ struct NumberSystem {
|
|||
generator.set("zero_format"sv, String::number(number_format.zero_format_index));
|
||||
generator.set("positive_format"sv, String::number(number_format.positive_format_index));
|
||||
generator.set("negative_format"sv, String::number(number_format.negative_format_index));
|
||||
generator.set("identifier"sv, String::number(number_format.identifier_index));
|
||||
generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @identifier@ },");
|
||||
generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, { ");
|
||||
|
||||
bool first = true;
|
||||
for (auto identifier_index : number_format.identifier_indices) {
|
||||
if (!first)
|
||||
generator.append(", ");
|
||||
|
||||
generator.append(String::number(identifier_index));
|
||||
first = false;
|
||||
}
|
||||
|
||||
generator.append(" } },");
|
||||
};
|
||||
|
||||
auto append_number_formats = [&](String name, auto const& number_formats) {
|
||||
|
|
|
@ -962,11 +962,14 @@ Vector<PatternPartition> partition_notation_sub_pattern(NumberFormat& number_for
|
|||
}
|
||||
// iv. Else if p is equal to "compactSymbol", then
|
||||
// v. Else if p is equal to "compactName", then
|
||||
else if (part == "compactIdentifier"sv) {
|
||||
else if (part.starts_with("compactIdentifier:"sv)) {
|
||||
// Note: Our implementation combines "compactSymbol" and "compactName" into one field, "compactIdentifier".
|
||||
|
||||
auto identifier_index = part.substring_view("compactIdentifier:"sv.length()).to_uint();
|
||||
VERIFY(identifier_index.has_value());
|
||||
|
||||
// 1. Let compactSymbol be an ILD string representing exponent in short form, which may depend on x in languages having different plural forms. The implementation must be able to provide this string, or else the pattern would not have a "{compactSymbol}" placeholder.
|
||||
auto compact_identifier = number_format.compact_format().identifier;
|
||||
auto compact_identifier = number_format.compact_format().identifiers[*identifier_index];
|
||||
|
||||
// 2. Append a new Record { [[Type]]: "compact", [[Value]]: compactSymbol } as the last element of result.
|
||||
result.append({ "compact"sv, compact_identifier });
|
||||
|
|
|
@ -122,7 +122,7 @@ struct NumberFormat {
|
|||
StringView zero_format {};
|
||||
StringView positive_format {};
|
||||
StringView negative_format {};
|
||||
StringView identifier {};
|
||||
Vector<StringView> identifiers {};
|
||||
};
|
||||
|
||||
struct ListPatterns {
|
||||
|
|
Loading…
Reference in a new issue