LibJS+LibLocale: Replace number range formatting with ICU

This uses ICU for the Intl.NumberFormat `formatRange` and
`formatRangeToParts` prototypes.

Note: All of the changes to the test files in this patch are now aligned
with both Chrome and Safari.
This commit is contained in:
Timothy Flynn 2024-06-09 19:26:25 -04:00 committed by Andreas Kling
parent 67f3de2320
commit f6bee0f5a8
Notes: sideshowbarker 2024-07-17 02:22:23 +09:00
6 changed files with 148 additions and 195 deletions

View file

@ -229,7 +229,7 @@ ThrowCompletionOr<MathematicalValue> to_intl_mathematical_value(VM& vm, Value va
} }
// 15.5.19 PartitionNumberRangePattern ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-partitionnumberrangepattern // 15.5.19 PartitionNumberRangePattern ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-partitionnumberrangepattern
ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pattern(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end) ThrowCompletionOr<Vector<::Locale::NumberFormat::Partition>> partition_number_range_pattern(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{ {
// 1. If x is NaN or y is NaN, throw a RangeError exception. // 1. If x is NaN or y is NaN, throw a RangeError exception.
if (start.is_nan()) if (start.is_nan())
@ -237,132 +237,38 @@ ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pat
if (end.is_nan()) if (end.is_nan())
return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "end"sv); return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "end"sv);
// 2. Let result be a new empty List. return number_format.formatter().format_range_to_parts(start.to_value(), end.to_value());
Vector<PatternPartitionWithSource> result;
// 3. Let xResult be ? PartitionNumberPattern(numberFormat, x).
auto raw_start_result = partition_number_pattern(number_format, start);
auto start_result = PatternPartitionWithSource::create_from_parent_list(move(raw_start_result));
// 4. Let yResult be ? PartitionNumberPattern(numberFormat, y).
auto raw_end_result = partition_number_pattern(number_format, end);
auto end_result = PatternPartitionWithSource::create_from_parent_list(move(raw_end_result));
// 5. If ! FormatNumeric(numberFormat, x) is equal to ! FormatNumeric(numberFormat, y), then
auto formatted_start = format_numeric(number_format, start);
auto formatted_end = format_numeric(number_format, end);
if (formatted_start == formatted_end) {
// a. Let appxResult be ? FormatApproximately(numberFormat, xResult).
auto approximate_result = format_approximately(number_format, move(start_result));
// b. For each r in appxResult, do
for (auto& result : approximate_result) {
// i. Set r.[[Source]] to "shared".
result.source = "shared"sv;
}
// c. Return appxResult.
return approximate_result;
}
// 6. For each element r in xResult, do
result.ensure_capacity(start_result.size());
for (auto& start_part : start_result) {
// a. Append a new Record { [[Type]]: r.[[Type]], [[Value]]: r.[[Value]], [[Source]]: "startRange" } as the last element of result.
PatternPartitionWithSource part;
part.type = start_part.type;
part.value = move(start_part.value);
part.source = "startRange"sv;
result.unchecked_append(move(part));
}
// 7. Let rangeSeparator be an ILND String value used to separate two numbers.
auto range_separator_symbol = ::Locale::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), ::Locale::NumericSymbol::RangeSeparator).value_or("-"sv);
auto range_separator = ::Locale::augment_range_pattern(range_separator_symbol, result.last().value, end_result[0].value);
// 8. Append a new Record { [[Type]]: "literal", [[Value]]: rangeSeparator, [[Source]]: "shared" } element to result.
PatternPartitionWithSource part;
part.type = "literal"sv;
part.value = range_separator.has_value()
? range_separator.release_value()
: MUST(String::from_utf8(range_separator_symbol));
part.source = "shared"sv;
result.append(move(part));
// 9. For each element r in yResult, do
result.ensure_capacity(result.size() + end_result.size());
for (auto& end_part : end_result) {
// a. Append a new Record { [[Type]]: r.[[Type]], [[Value]]: r.[[Value]], [[Source]]: "endRange" } as the last element of result.
PatternPartitionWithSource part;
part.type = end_part.type;
part.value = move(end_part.value);
part.source = "endRange"sv;
result.unchecked_append(move(part));
}
// 10. Return ! CollapseNumberRange(result).
return collapse_number_range(move(result));
}
// 15.5.20 FormatApproximately ( numberFormat, result ), https://tc39.es/ecma402/#sec-formatapproximately
Vector<PatternPartitionWithSource> format_approximately(NumberFormat& number_format, Vector<PatternPartitionWithSource> result)
{
// 1. Let approximatelySign be an ILND String value used to signify that a number is approximate.
auto approximately_sign = ::Locale::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), ::Locale::NumericSymbol::ApproximatelySign);
// 2. If approximatelySign is not empty, insert a new Record { [[Type]]: "approximatelySign", [[Value]]: approximatelySign } at an ILND index in result. For example, if numberFormat has [[Locale]] "en-US" and [[NumberingSystem]] "latn" and [[Style]] "decimal", the new Record might be inserted before the first element of result.
if (approximately_sign.has_value() && !approximately_sign->is_empty()) {
PatternPartitionWithSource partition;
partition.type = "approximatelySign"sv;
partition.value = MUST(String::from_utf8(*approximately_sign));
result.insert_before_matching(move(partition), [](auto const& part) {
return part.type.is_one_of("integer"sv, "decimal"sv, "plusSign"sv, "minusSign"sv, "percentSign"sv, "currency"sv);
});
}
// 3. Return result.
return result;
}
// 15.5.21 CollapseNumberRange ( result ), https://tc39.es/ecma402/#sec-collapsenumberrange
Vector<PatternPartitionWithSource> collapse_number_range(Vector<PatternPartitionWithSource> result)
{
// Returning result unmodified is guaranteed to be a correct implementation of CollapseNumberRange.
return result;
} }
// 15.5.22 FormatNumericRange ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrange // 15.5.22 FormatNumericRange ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrange
ThrowCompletionOr<String> format_numeric_range(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end) ThrowCompletionOr<String> format_numeric_range(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{ {
// 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y). // 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y).
auto parts = TRY(partition_number_range_pattern(vm, number_format, move(start), move(end))); {
// NOTE: We short-circuit PartitionNumberRangePattern as we do not need individual partitions. But we must still
// perform the NaN sanity checks from its first step.
// 2. Let result be the empty String. // 1. If x is NaN or y is NaN, throw a RangeError exception.
StringBuilder result; if (start.is_nan())
return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "start"sv);
// 3. For each part in parts, do if (end.is_nan())
for (auto& part : parts) { return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "end"sv);
// a. Set result to the string-concatenation of result and part.[[Value]].
result.append(part.value);
} }
// 2. Let result be the empty String.
// 3. For each part in parts, do
// a. Set result to the string-concatenation of result and part.[[Value]].
// 4. Return result. // 4. Return result.
return MUST(result.to_string()); return number_format.formatter().format_range(start.to_value(), end.to_value());
} }
// 15.5.23 FormatNumericRangeToParts ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrangetoparts // 15.5.23 FormatNumericRangeToParts ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrangetoparts
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end) ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{ {
auto& realm = *vm.current_realm(); auto& realm = *vm.current_realm();
// 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y). // 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y).
auto parts = TRY(partition_number_range_pattern(vm, number_format, move(start), move(end))); auto parts = TRY(partition_number_range_pattern(vm, number_format, start, end));
// 2. Let result be ! ArrayCreate(0). // 2. Let result be ! ArrayCreate(0).
auto result = MUST(Array::create(realm, 0)); auto result = MUST(Array::create(realm, 0));

View file

@ -194,10 +194,8 @@ Vector<::Locale::NumberFormat::Partition> partition_number_pattern(NumberFormat
String format_numeric(NumberFormat const&, MathematicalValue const& number); String format_numeric(NumberFormat const&, MathematicalValue const& number);
NonnullGCPtr<Array> format_numeric_to_parts(VM&, NumberFormat const&, MathematicalValue const& number); NonnullGCPtr<Array> format_numeric_to_parts(VM&, NumberFormat const&, MathematicalValue const& number);
ThrowCompletionOr<MathematicalValue> to_intl_mathematical_value(VM&, Value value); ThrowCompletionOr<MathematicalValue> to_intl_mathematical_value(VM&, Value value);
ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pattern(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end); ThrowCompletionOr<Vector<::Locale::NumberFormat::Partition>> partition_number_range_pattern(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
Vector<PatternPartitionWithSource> format_approximately(NumberFormat&, Vector<PatternPartitionWithSource> result); ThrowCompletionOr<String> format_numeric_range(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
Vector<PatternPartitionWithSource> collapse_number_range(Vector<PatternPartitionWithSource> result); ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
ThrowCompletionOr<String> format_numeric_range(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end);
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end);
} }

View file

@ -16,10 +16,7 @@
#include <math.h> #include <math.h>
#include <unicode/numberformatter.h> #include <unicode/numberformatter.h>
#include <unicode/numberrangeformatter.h>
#if ENABLE_UNICODE_DATA
# include <LibUnicode/UnicodeData.h>
#endif
namespace Locale { namespace Locale {
@ -534,6 +531,11 @@ static constexpr StringView icu_number_format_field_to_string(i32 field, NumberF
} }
struct Range { struct Range {
constexpr bool contains(i32 position) const
{
return start <= position && position < end;
}
constexpr bool operator<(Range const& other) const constexpr bool operator<(Range const& other) const
{ {
if (start < other.start) if (start < other.start)
@ -604,8 +606,9 @@ static void flatten_partitions(Vector<Range>& partitions)
class NumberFormatImpl : public NumberFormat { class NumberFormatImpl : public NumberFormat {
public: public:
NumberFormatImpl(icu::number::LocalizedNumberFormatter formatter, bool is_unit) NumberFormatImpl(icu::Locale& locale, icu::number::LocalizedNumberFormatter formatter, bool is_unit)
: m_formatter(move(formatter)) : m_locale(locale)
, m_formatter(move(formatter))
, m_is_unit(is_unit) , m_is_unit(is_unit)
{ {
} }
@ -648,10 +651,46 @@ public:
if (!formatted.has_value()) if (!formatted.has_value())
return {}; return {};
return format_to_parts_impl(formatted, value); return format_to_parts_impl(formatted, value, value);
}
virtual String format_range(Value const& start, Value const& end) const override
{
UErrorCode status = U_ZERO_ERROR;
auto formatted = format_range_impl(start, end);
if (!formatted.has_value())
return {};
auto result = formatted->toTempString(status);
if (icu_failure(status))
return {};
return icu_string_to_string(result);
}
virtual Vector<Partition> format_range_to_parts(Value const& start, Value const& end) const override
{
auto formatted = format_range_impl(start, end);
if (!formatted.has_value())
return {};
return format_to_parts_impl(formatted, start, end);
} }
private: private:
static icu::Formattable value_to_formattable(Value const& value)
{
UErrorCode status = U_ZERO_ERROR;
auto formattable = value.visit(
[&](double number) { return icu::Formattable { number }; },
[&](String const& number) { return icu::Formattable(icu_string_piece(number), status); });
VERIFY(icu_success(status));
return formattable;
}
Optional<icu::number::FormattedNumber> format_impl(Value const& value) const Optional<icu::number::FormattedNumber> format_impl(Value const& value) const
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -670,8 +709,34 @@ private:
return formatted; return formatted;
} }
Optional<icu::number::FormattedNumberRange> format_range_impl(Value const& start, Value const& end) const
{
UErrorCode status = U_ZERO_ERROR;
if (!m_range_formatter.has_value()) {
auto skeleton = icu::number::NumberFormatter::forSkeleton(m_formatter.toSkeleton(status), status);
if (icu_failure(status))
return {};
auto formatter = icu::number::UnlocalizedNumberRangeFormatter().numberFormatterBoth(move(skeleton)).locale(m_locale);
if (icu_failure(status))
return {};
m_range_formatter = move(formatter);
}
auto formattable_start = value_to_formattable(start);
auto formattable_end = value_to_formattable(end);
auto formatted = m_range_formatter->formatFormattableRange(formattable_start, formattable_end, status);
if (icu_failure(status))
return {};
return formatted;
}
template<typename Formatted> template<typename Formatted>
Vector<Partition> format_to_parts_impl(Formatted const& formatted, Value const& value) const Vector<Partition> format_to_parts_impl(Formatted const& formatted, Value const& start, Value const& end) const
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -683,22 +748,48 @@ private:
ranges.empend(LITERAL_FIELD, 0, formatted_number.length()); ranges.empend(LITERAL_FIELD, 0, formatted_number.length());
icu::ConstrainedFieldPosition position; icu::ConstrainedFieldPosition position;
Optional<Range> start_range;
Optional<Range> end_range;
while (static_cast<bool>(formatted->nextPosition(position, status)) && icu_success(status)) { while (static_cast<bool>(formatted->nextPosition(position, status)) && icu_success(status)) {
if (position.getCategory() == UFIELD_CATEGORY_NUMBER_RANGE_SPAN) {
if (position.getField() == 0)
start_range.emplace(position.getField(), position.getStart(), position.getLimit());
else
end_range.emplace(position.getField(), position.getStart(), position.getLimit());
} else {
ranges.empend(position.getField(), position.getStart(), position.getLimit()); ranges.empend(position.getField(), position.getStart(), position.getLimit());
} }
}
flatten_partitions(ranges); flatten_partitions(ranges);
auto apply_to_partition = [&](Partition& partition, auto field, auto index) {
if (start_range.has_value() && start_range->contains(index)) {
partition.type = icu_number_format_field_to_string(field, start, m_is_unit);
partition.source = "startRange"sv;
return;
}
if (end_range.has_value() && end_range->contains(index)) {
partition.type = icu_number_format_field_to_string(field, end, m_is_unit);
partition.source = "endRange"sv;
return;
}
partition.type = icu_number_format_field_to_string(field, end, m_is_unit);
partition.source = "shared"sv;
};
Vector<Partition> result; Vector<Partition> result;
result.ensure_capacity(ranges.size()); result.ensure_capacity(ranges.size());
for (auto const& range : ranges) { for (auto const& range : ranges) {
auto string = formatted_number.tempSubStringBetween(range.start, range.end); auto value = formatted_number.tempSubStringBetween(range.start, range.end);
Partition partition; Partition partition;
partition.type = icu_number_format_field_to_string(range.field, value, m_is_unit); partition.value = icu_string_to_string(value);
partition.value = icu_string_to_string(string); apply_to_partition(partition, range.field, range.start);
result.unchecked_append(move(partition)); result.unchecked_append(move(partition));
} }
@ -706,7 +797,9 @@ private:
return result; return result;
} }
icu::Locale& m_locale;
icu::number::LocalizedNumberFormatter m_formatter; icu::number::LocalizedNumberFormatter m_formatter;
mutable Optional<icu::number::LocalizedNumberRangeFormatter> m_range_formatter;
bool m_is_unit { false }; bool m_is_unit { false };
}; };
@ -731,7 +824,7 @@ NonnullOwnPtr<NumberFormat> NumberFormat::create(
} }
bool is_unit = display_options.style == NumberFormatStyle::Unit; bool is_unit = display_options.style == NumberFormatStyle::Unit;
return adopt_own(*new NumberFormatImpl(move(formatter), is_unit)); return adopt_own(*new NumberFormatImpl(locale_data->locale(), move(formatter), is_unit));
} }
Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; } Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
@ -764,50 +857,4 @@ String replace_digits_for_number_system(StringView system, StringView number)
return MUST(builder.to_string()); return MUST(builder.to_string());
} }
#if ENABLE_UNICODE_DATA
static u32 last_code_point(StringView string)
{
Utf8View utf8_string { string };
u32 code_point = 0;
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
code_point = *it;
return code_point;
}
#endif
// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper)
{
#if ENABLE_UNICODE_DATA
auto range_pattern_with_spacing = [&]() {
return MUST(String::formatted(" {} ", range_separator));
};
Utf8View utf8_range_separator { range_separator };
Utf8View utf8_upper { upper };
// NOTE: Our implementation does the prescribed checks backwards for simplicity.
// To determine whether to add spacing, the currently recommended heuristic is:
// 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space))
return {};
}
// 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number))
return range_pattern_with_spacing();
}
if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number))
return range_pattern_with_spacing();
#endif
return {};
}
} }

View file

@ -151,6 +151,7 @@ public:
struct Partition { struct Partition {
StringView type; StringView type;
String value; String value;
StringView source;
}; };
using Value = Variant<double, String>; using Value = Variant<double, String>;
@ -159,6 +160,9 @@ public:
virtual String format_to_decimal(Value const&) const = 0; virtual String format_to_decimal(Value const&) const = 0;
virtual Vector<Partition> format_to_parts(Value const&) const = 0; virtual Vector<Partition> format_to_parts(Value const&) const = 0;
virtual String format_range(Value const&, Value const&) const = 0;
virtual Vector<Partition> format_range_to_parts(Value const&, Value const&) const = 0;
protected: protected:
NumberFormat() = default; NumberFormat() = default;
}; };
@ -181,6 +185,4 @@ Optional<StringView> get_number_system_symbol(StringView locale, StringView syst
Optional<ReadonlySpan<u32>> get_digits_for_number_system(StringView system); Optional<ReadonlySpan<u32>> get_digits_for_number_system(StringView system);
String replace_digits_for_number_system(StringView system, StringView number); String replace_digits_for_number_system(StringView system, StringView number);
Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper);
} }