LibJS+LibUnicode: Fix computation of compact pattern exponents

The compact scale of each formatting rule was precomputed in commit:
be69eae651

Using the formula: compact scale = magnitude - pattern scale

This computation was off-by-one.

For example, consider the format key "10000-count-one", which maps to
"00 thousand" in en-US. What we are really after is the exponent that
best represents the string "thousand" for values greater than 10000
and less than 100000 (the next format key). We were previously doing:

    log10(10000) - "00 thousand".count("0") = 2

Which clearly isn't what we want. Instead, if we do:

    log10(10000) + 1 - "00 thousand".count("0") = 3

We get the correct exponent for each format key for each locale.

This commit also renames the generated variable from "compact_scale" to
"exponent" to match the terminology used in ECMA-402.
This commit is contained in:
Timothy Flynn 2021-11-15 07:56:20 -05:00 committed by Linus Groh
parent 48d5684780
commit 1f546476d5
Notes: sideshowbarker 2024-07-18 01:05:11 +09:00
3 changed files with 23 additions and 23 deletions

View file

@ -90,10 +90,12 @@ struct UnicodeLocaleData {
Vector<String> numeric_symbols;
};
static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
{
// https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns
// https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns
VERIFY((patterns.size() == 1) || (patterns.size() == 2));
auto replace_patterns = [&](String pattern) {
static HashMap<StringView, StringView> replacements = {
{ "{0}"sv, "{number}"sv },
@ -183,16 +185,6 @@ static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data,
return pattern;
};
auto patterns = pattern.split(';');
VERIFY((patterns.size() == 1) || (patterns.size() == 2));
if (format.magnitude != 0) {
auto number_of_zeroes_in_pattern = patterns[0].count("0"sv);
VERIFY(format.magnitude >= number_of_zeroes_in_pattern);
format.compact_scale = format.magnitude - number_of_zeroes_in_pattern;
}
auto zero_format = replace_patterns(move(patterns[0]));
format.positive_format_index = locale_data.unique_strings.ensure(String::formatted("{{plusSign}}{}", zero_format));
@ -238,17 +230,25 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData&
if (split_key.size() != 3)
return;
auto patterns = value.as_string().split(';');
NumberFormat format {};
if (auto type = split_key[0].template to_uint<u64>(); type.has_value()) {
VERIFY(*type % 10 == 0);
format.magnitude = static_cast<u8>(log10(*type));
if (patterns[0] != "0"sv) {
auto number_of_zeroes_in_pattern = patterns[0].count("0"sv);
VERIFY(format.magnitude >= number_of_zeroes_in_pattern);
format.exponent = format.magnitude + 1 - number_of_zeroes_in_pattern;
}
} else {
VERIFY(split_key[0] == "unitPattern"sv);
}
format.plurality = NumberFormat::plurality_from_string(split_key[2]);
parse_number_pattern(value.as_string(), locale_data, NumberFormatType::Compact, format);
parse_number_pattern(move(patterns), locale_data, NumberFormatType::Compact, format);
result.append(move(format));
});
@ -279,7 +279,7 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData&
auto& number_system = ensure_number_system(system);
auto format_object = value.as_object().get("standard"sv);
parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system);
parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system);
auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv);
number_system.decimal_long_formats = parse_number_format(long_format.as_object());
@ -291,10 +291,10 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData&
auto& number_system = ensure_number_system(system);
auto format_object = value.as_object().get("standard"sv);
parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.currency_format);
parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.currency_format);
format_object = value.as_object().get("accounting"sv);
parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.accounting_format);
parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.accounting_format);
number_system.currency_unit_formats = parse_number_format(value.as_object());
@ -307,13 +307,13 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData&
auto& number_system = ensure_number_system(system);
auto format_object = value.as_object().get("standard"sv);
parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.percent_format);
parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.percent_format);
} else if (key.starts_with(scientific_formats_prefix)) {
auto system = key.substring(scientific_formats_prefix.length());
auto& number_system = ensure_number_system(system);
auto format_object = value.as_object().get("standard"sv);
parse_number_pattern(format_object.as_string(), locale_data, NumberFormatType::Standard, number_system.scientific_format);
parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.scientific_format);
}
});
}
@ -424,7 +424,7 @@ struct NumberFormat {
Unicode::NumberFormat number_format {};
number_format.magnitude = magnitude;
number_format.compact_scale = compact_scale;
number_format.exponent = exponent;
number_format.plurality = static_cast<Unicode::NumberFormat::Plurality>(plurality);
number_format.zero_format = s_string_list[zero_format];
number_format.positive_format = s_string_list[positive_format];
@ -435,7 +435,7 @@ struct NumberFormat {
}
u8 magnitude { 0 };
u8 compact_scale { 0 };
u8 exponent { 0 };
u8 plurality { 0 };
@string_index_type@ zero_format { 0 };
@string_index_type@ positive_format { 0 };
@ -466,13 +466,13 @@ struct NumberSystem {
auto append_number_format = [&](auto const& number_format) {
generator.set("magnitude"sv, String::number(number_format.magnitude));
generator.set("compact_scale"sv, String::number(number_format.compact_scale));
generator.set("exponent"sv, String::number(number_format.exponent));
generator.set("plurality"sv, String::number(static_cast<u8>(number_format.plurality)));
generator.set("zero_format"sv, String::number(number_format.zero_format_index));
generator.set("positive_format"sv, String::number(number_format.positive_format_index));
generator.set("negative_format"sv, String::number(number_format.negative_format_index));
generator.set("compact_identifier"sv, String::number(number_format.compact_identifier_index));
generator.append("{ @magnitude@, @compact_scale@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @compact_identifier@ },");
generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, @compact_identifier@ },");
};
auto append_number_formats = [&](String name, auto const& number_formats) {

View file

@ -1599,7 +1599,7 @@ int compute_exponent_for_magniude(NumberFormat& number_format, int magnitude)
best_number_format = &format_rule;
}
return best_number_format ? best_number_format->compact_scale : 0;
return best_number_format ? best_number_format->exponent : 0;
}
default:

View file

@ -117,7 +117,7 @@ struct NumberFormat {
};
u8 magnitude { 0 };
u8 compact_scale { 0 };
u8 exponent { 0 };
Plurality plurality { Plurality::Other };
StringView zero_format {};
StringView positive_format {};