Browse Source

LibJS+LibUnicode: Make the collation sensitivity default locale-aware

Note this happens to be 'variant' for every locale currently.
Timothy Flynn 11 months ago
parent
commit
ca1257c6f9

+ 1 - 1
Userland/Libraries/LibJS/Runtime/Intl/Collator.h

@@ -37,7 +37,7 @@ public:
     StringView usage_string() const { return Unicode::usage_to_string(m_usage); }
 
     Unicode::Sensitivity sensitivity() const { return m_sensitivity; }
-    void set_sensitivity(StringView sensitivity) { m_sensitivity = Unicode::sensitivity_from_string(sensitivity); }
+    void set_sensitivity(Unicode::Sensitivity sensitivity) { m_sensitivity = sensitivity; }
     StringView sensitivity_string() const { return Unicode::sensitivity_to_string(m_sensitivity); }
 
     Unicode::CaseFirst case_first() const { return m_case_first; }

+ 13 - 8
Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp

@@ -152,24 +152,26 @@ ThrowCompletionOr<NonnullGCPtr<Object>> CollatorConstructor::construct(FunctionO
     // 31. Let resolvedLocaleData be r.[[LocaleData]].
 
     // 32. Let sensitivity be ? GetOption(options, "sensitivity", string, « "base", "accent", "case", "variant" », undefined).
-    auto sensitivity = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {}));
+    auto sensitivity_value = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {}));
 
     // 33. If sensitivity is undefined, then
-    if (sensitivity.is_undefined()) {
+    if (sensitivity_value.is_undefined()) {
         // a. If usage is "sort", then
         if (collator->usage() == Unicode::Usage::Sort) {
             // i. Set sensitivity to "variant".
-            sensitivity = PrimitiveString::create(vm, "variant"_string);
+            sensitivity_value = PrimitiveString::create(vm, "variant"_string);
         }
         // b. Else,
         else {
-            // FIXME: i. Set sensitivity to resolvedLocaleData.[[sensitivity]].
-            sensitivity = PrimitiveString::create(vm, "base"_string);
+            // i. Set sensitivity to resolvedLocaleData.[[sensitivity]].
+            // NOTE: We do not acquire the default [[sensitivity]] here. Instead, we default the option to null,
+            //       and let LibUnicode fill in the default value if an override was not provided here.
         }
     }
 
-    // 34. Set collator.[[Sensitivity]] to sensitivity.
-    collator->set_sensitivity(sensitivity.as_string().utf8_string_view());
+    Optional<Unicode::Sensitivity> sensitivity;
+    if (!sensitivity_value.is_undefined())
+        sensitivity = Unicode::sensitivity_from_string(sensitivity_value.as_string().utf8_string_view());
 
     // 35. Let defaultIgnorePunctuation be resolvedLocaleData.[[ignorePunctuation]].
     // NOTE: We do not acquire the default [[ignorePunctuation]] here. Instead, we default the option to null,
@@ -187,12 +189,15 @@ ThrowCompletionOr<NonnullGCPtr<Object>> CollatorConstructor::construct(FunctionO
         collator->locale(),
         collator->usage(),
         collator->collation(),
-        collator->sensitivity(),
+        sensitivity,
         collator->case_first(),
         collator->numeric(),
         ignore_punctuation);
     collator->set_collator(move(icu_collator));
 
+    // 34. Set collator.[[Sensitivity]] to sensitivity.
+    collator->set_sensitivity(collator->collator().sensitivity());
+
     // 37. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
     collator->set_ignore_punctuation(collator->collator().ignore_punctuation());
 

+ 5 - 2
Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.resolvedOptions.js

@@ -28,9 +28,12 @@ describe("correct behavior", () => {
         const en1 = new Intl.Collator("en");
         expect(en1.resolvedOptions().sensitivity).toBe("variant");
 
+        const en2 = new Intl.Collator("en", { usage: "search" });
+        expect(en2.resolvedOptions().sensitivity).toBe("variant");
+
         ["base", "accent", "case", "variant"].forEach(sensitivity => {
-            const en2 = new Intl.Collator("en", { sensitivity: sensitivity });
-            expect(en2.resolvedOptions().sensitivity).toBe(sensitivity);
+            const en3 = new Intl.Collator("en", { sensitivity: sensitivity });
+            expect(en3.resolvedOptions().sensitivity).toBe(sensitivity);
         });
     });
 

+ 32 - 2
Userland/Libraries/LibUnicode/Collator.cpp

@@ -92,6 +92,28 @@ static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
     VERIFY_NOT_REACHED();
 }
 
+static Sensitivity sensitivity_for_collator(icu::Collator const& collator)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    auto attribute = collator.getAttribute(UCOL_STRENGTH, status);
+    VERIFY(icu_success(status));
+
+    switch (attribute) {
+    case UCOL_PRIMARY:
+        attribute = collator.getAttribute(UCOL_CASE_LEVEL, status);
+        VERIFY(icu_success(status));
+
+        return attribute == UCOL_ON ? Sensitivity::Case : Sensitivity::Base;
+
+    case UCOL_SECONDARY:
+        return Sensitivity::Accent;
+
+    default:
+        return Sensitivity::Variant;
+    }
+}
+
 CaseFirst case_first_from_string(StringView case_first)
 {
     if (case_first == "upper"sv)
@@ -165,6 +187,11 @@ public:
         VERIFY_NOT_REACHED();
     }
 
+    virtual Sensitivity sensitivity() const override
+    {
+        return sensitivity_for_collator(*m_collator);
+    }
+
     virtual bool ignore_punctuation() const override
     {
         return ignore_punctuation_for_collator(*m_collator);
@@ -178,7 +205,7 @@ NonnullOwnPtr<Collator> Collator::create(
     StringView locale,
     Usage usage,
     StringView collation,
-    Sensitivity sensitivity,
+    Optional<Sensitivity> sensitivity,
     CaseFirst case_first,
     bool numeric,
     Optional<bool> ignore_punctuation)
@@ -198,10 +225,13 @@ NonnullOwnPtr<Collator> Collator::create(
         VERIFY(icu_success(status));
     };
 
+    if (!sensitivity.has_value())
+        sensitivity = sensitivity_for_collator(*collator);
+
     if (!ignore_punctuation.has_value())
         ignore_punctuation = ignore_punctuation_for_collator(*collator);
 
-    set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
+    set_attribute(UCOL_STRENGTH, icu_sensitivity(*sensitivity));
     set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
     set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
     set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);

+ 2 - 1
Userland/Libraries/LibUnicode/Collator.h

@@ -41,7 +41,7 @@ public:
         StringView locale,
         Usage,
         StringView collation,
-        Sensitivity,
+        Optional<Sensitivity>,
         CaseFirst,
         bool numeric,
         Optional<bool> ignore_punctuation);
@@ -55,6 +55,7 @@ public:
     };
     virtual Order compare(StringView, StringView) const = 0;
 
+    virtual Sensitivity sensitivity() const = 0;
     virtual bool ignore_punctuation() const = 0;
 
 protected: