LibJS+LibUnicode: Make the collation sensitivity default locale-aware

Note this happens to be 'variant' for every locale currently.
This commit is contained in:
Timothy Flynn 2024-08-14 16:18:06 -04:00 committed by Andreas Kling
parent 78625c746d
commit ca1257c6f9
Notes: github-actions[bot] 2024-08-15 11:45:25 +00:00
5 changed files with 53 additions and 14 deletions

View file

@ -37,7 +37,7 @@ public:
StringView usage_string() const { return Unicode::usage_to_string(m_usage); }
Unicode::Sensitivity sensitivity() const { return m_sensitivity; }
void set_sensitivity(StringView sensitivity) { m_sensitivity = Unicode::sensitivity_from_string(sensitivity); }
void set_sensitivity(Unicode::Sensitivity sensitivity) { m_sensitivity = sensitivity; }
StringView sensitivity_string() const { return Unicode::sensitivity_to_string(m_sensitivity); }
Unicode::CaseFirst case_first() const { return m_case_first; }

View file

@ -152,24 +152,26 @@ ThrowCompletionOr<NonnullGCPtr<Object>> CollatorConstructor::construct(FunctionO
// 31. Let resolvedLocaleData be r.[[LocaleData]].
// 32. Let sensitivity be ? GetOption(options, "sensitivity", string, « "base", "accent", "case", "variant" », undefined).
auto sensitivity = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {}));
auto sensitivity_value = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {}));
// 33. If sensitivity is undefined, then
if (sensitivity.is_undefined()) {
if (sensitivity_value.is_undefined()) {
// a. If usage is "sort", then
if (collator->usage() == Unicode::Usage::Sort) {
// i. Set sensitivity to "variant".
sensitivity = PrimitiveString::create(vm, "variant"_string);
sensitivity_value = PrimitiveString::create(vm, "variant"_string);
}
// b. Else,
else {
// FIXME: i. Set sensitivity to resolvedLocaleData.[[sensitivity]].
sensitivity = PrimitiveString::create(vm, "base"_string);
// i. Set sensitivity to resolvedLocaleData.[[sensitivity]].
// NOTE: We do not acquire the default [[sensitivity]] here. Instead, we default the option to null,
// and let LibUnicode fill in the default value if an override was not provided here.
}
}
// 34. Set collator.[[Sensitivity]] to sensitivity.
collator->set_sensitivity(sensitivity.as_string().utf8_string_view());
Optional<Unicode::Sensitivity> sensitivity;
if (!sensitivity_value.is_undefined())
sensitivity = Unicode::sensitivity_from_string(sensitivity_value.as_string().utf8_string_view());
// 35. Let defaultIgnorePunctuation be resolvedLocaleData.[[ignorePunctuation]].
// NOTE: We do not acquire the default [[ignorePunctuation]] here. Instead, we default the option to null,
@ -187,12 +189,15 @@ ThrowCompletionOr<NonnullGCPtr<Object>> CollatorConstructor::construct(FunctionO
collator->locale(),
collator->usage(),
collator->collation(),
collator->sensitivity(),
sensitivity,
collator->case_first(),
collator->numeric(),
ignore_punctuation);
collator->set_collator(move(icu_collator));
// 34. Set collator.[[Sensitivity]] to sensitivity.
collator->set_sensitivity(collator->collator().sensitivity());
// 37. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
collator->set_ignore_punctuation(collator->collator().ignore_punctuation());

View file

@ -28,9 +28,12 @@ describe("correct behavior", () => {
const en1 = new Intl.Collator("en");
expect(en1.resolvedOptions().sensitivity).toBe("variant");
const en2 = new Intl.Collator("en", { usage: "search" });
expect(en2.resolvedOptions().sensitivity).toBe("variant");
["base", "accent", "case", "variant"].forEach(sensitivity => {
const en2 = new Intl.Collator("en", { sensitivity: sensitivity });
expect(en2.resolvedOptions().sensitivity).toBe(sensitivity);
const en3 = new Intl.Collator("en", { sensitivity: sensitivity });
expect(en3.resolvedOptions().sensitivity).toBe(sensitivity);
});
});

View file

@ -92,6 +92,28 @@ static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
VERIFY_NOT_REACHED();
}
static Sensitivity sensitivity_for_collator(icu::Collator const& collator)
{
UErrorCode status = U_ZERO_ERROR;
auto attribute = collator.getAttribute(UCOL_STRENGTH, status);
VERIFY(icu_success(status));
switch (attribute) {
case UCOL_PRIMARY:
attribute = collator.getAttribute(UCOL_CASE_LEVEL, status);
VERIFY(icu_success(status));
return attribute == UCOL_ON ? Sensitivity::Case : Sensitivity::Base;
case UCOL_SECONDARY:
return Sensitivity::Accent;
default:
return Sensitivity::Variant;
}
}
CaseFirst case_first_from_string(StringView case_first)
{
if (case_first == "upper"sv)
@ -165,6 +187,11 @@ public:
VERIFY_NOT_REACHED();
}
virtual Sensitivity sensitivity() const override
{
return sensitivity_for_collator(*m_collator);
}
virtual bool ignore_punctuation() const override
{
return ignore_punctuation_for_collator(*m_collator);
@ -178,7 +205,7 @@ NonnullOwnPtr<Collator> Collator::create(
StringView locale,
Usage usage,
StringView collation,
Sensitivity sensitivity,
Optional<Sensitivity> sensitivity,
CaseFirst case_first,
bool numeric,
Optional<bool> ignore_punctuation)
@ -198,10 +225,13 @@ NonnullOwnPtr<Collator> Collator::create(
VERIFY(icu_success(status));
};
if (!sensitivity.has_value())
sensitivity = sensitivity_for_collator(*collator);
if (!ignore_punctuation.has_value())
ignore_punctuation = ignore_punctuation_for_collator(*collator);
set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
set_attribute(UCOL_STRENGTH, icu_sensitivity(*sensitivity));
set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);

View file

@ -41,7 +41,7 @@ public:
StringView locale,
Usage,
StringView collation,
Sensitivity,
Optional<Sensitivity>,
CaseFirst,
bool numeric,
Optional<bool> ignore_punctuation);
@ -55,6 +55,7 @@ public:
};
virtual Order compare(StringView, StringView) const = 0;
virtual Sensitivity sensitivity() const = 0;
virtual bool ignore_punctuation() const = 0;
protected: