Browse Source

LibJS+LibUnicode: Fully implement Intl.Collator with ICU

We were never able to implement anything other than a basic, locale-
unaware collator with the JSON export of the CLDR as it did not have
collation data. We can now use ICU to implement collation.
Timothy Flynn 11 months ago
parent
commit
eb7e3583c9

+ 1 - 78
Userland/Libraries/LibJS/Runtime/Intl/Collator.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -16,83 +16,6 @@ Collator::Collator(Object& prototype)
 {
 }
 
-void Collator::set_usage(StringView type)
-{
-    if (type == "sort"sv)
-        m_usage = Usage::Sort;
-    else if (type == "search"sv)
-        m_usage = Usage::Search;
-    else
-        VERIFY_NOT_REACHED();
-}
-
-StringView Collator::usage_string() const
-{
-    switch (m_usage) {
-    case Usage::Sort:
-        return "sort"sv;
-    case Usage::Search:
-        return "search"sv;
-    default:
-        VERIFY_NOT_REACHED();
-    }
-}
-
-void Collator::set_sensitivity(StringView type)
-{
-    if (type == "base"sv)
-        m_sensitivity = Sensitivity::Base;
-    else if (type == "accent"sv)
-        m_sensitivity = Sensitivity::Accent;
-    else if (type == "case"sv)
-        m_sensitivity = Sensitivity::Case;
-    else if (type == "variant"sv)
-        m_sensitivity = Sensitivity::Variant;
-    else
-        VERIFY_NOT_REACHED();
-}
-
-StringView Collator::sensitivity_string() const
-{
-    switch (m_sensitivity) {
-    case Sensitivity::Base:
-        return "base"sv;
-    case Sensitivity::Accent:
-        return "accent"sv;
-    case Sensitivity::Case:
-        return "case"sv;
-    case Sensitivity::Variant:
-        return "variant"sv;
-    default:
-        VERIFY_NOT_REACHED();
-    }
-}
-
-void Collator::set_case_first(StringView case_first)
-{
-    if (case_first == "upper"sv)
-        m_case_first = CaseFirst::Upper;
-    else if (case_first == "lower"sv)
-        m_case_first = CaseFirst::Lower;
-    else if (case_first == "false"sv)
-        m_case_first = CaseFirst::False;
-    else
-        VERIFY_NOT_REACHED();
-}
-
-StringView Collator::case_first_string() const
-{
-    switch (m_case_first) {
-    case CaseFirst::Upper:
-        return "upper"sv;
-    case CaseFirst::Lower:
-        return "lower"sv;
-    case CaseFirst::False:
-        return "false"sv;
-    default:
-        VERIFY_NOT_REACHED();
-    }
-}
 void Collator::visit_edges(Visitor& visitor)
 {
     Base::visit_edges(visitor);

+ 25 - 36
Userland/Libraries/LibJS/Runtime/Intl/Collator.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -11,6 +11,7 @@
 #include <AK/StringView.h>
 #include <LibJS/Runtime/Intl/CollatorCompareFunction.h>
 #include <LibJS/Runtime/Object.h>
+#include <LibUnicode/Collator.h>
 
 namespace JS::Intl {
 
@@ -19,24 +20,6 @@ class Collator final : public Object {
     JS_DECLARE_ALLOCATOR(Collator);
 
 public:
-    enum class Usage {
-        Sort,
-        Search,
-    };
-
-    enum class Sensitivity {
-        Base,
-        Accent,
-        Case,
-        Variant,
-    };
-
-    enum class CaseFirst {
-        Upper,
-        Lower,
-        False,
-    };
-
     static constexpr auto relevant_extension_keys()
     {
         // 10.2.3 Internal slots, https://tc39.es/ecma402/#sec-intl-collator-internal-slots
@@ -49,17 +32,17 @@ public:
     String const& locale() const { return m_locale; }
     void set_locale(String locale) { m_locale = move(locale); }
 
-    Usage usage() const { return m_usage; }
-    void set_usage(StringView usage);
-    StringView usage_string() const;
+    Unicode::Usage usage() const { return m_usage; }
+    void set_usage(StringView usage) { m_usage = Unicode::usage_from_string(usage); }
+    StringView usage_string() const { return Unicode::usage_to_string(m_usage); }
 
-    Sensitivity sensitivity() const { return m_sensitivity; }
-    void set_sensitivity(StringView sensitivity);
-    StringView sensitivity_string() const;
+    Unicode::Sensitivity sensitivity() const { return m_sensitivity; }
+    void set_sensitivity(StringView sensitivity) { m_sensitivity = Unicode::sensitivity_from_string(sensitivity); }
+    StringView sensitivity_string() const { return Unicode::sensitivity_to_string(m_sensitivity); }
 
-    CaseFirst case_first() const { return m_case_first; }
-    void set_case_first(StringView case_first);
-    StringView case_first_string() const;
+    Unicode::CaseFirst case_first() const { return m_case_first; }
+    void set_case_first(StringView case_first) { m_case_first = Unicode::case_first_from_string(case_first); }
+    StringView case_first_string() const { return Unicode::case_first_to_string(m_case_first); }
 
     String const& collation() const { return m_collation; }
     void set_collation(String collation) { m_collation = move(collation); }
@@ -73,19 +56,25 @@ public:
     CollatorCompareFunction* bound_compare() const { return m_bound_compare; }
     void set_bound_compare(CollatorCompareFunction* bound_compare) { m_bound_compare = bound_compare; }
 
+    Unicode::Collator const& collator() const { return *m_collator; }
+    void set_collator(NonnullOwnPtr<Unicode::Collator> collator) { m_collator = move(collator); }
+
 private:
     explicit Collator(Object& prototype);
 
     virtual void visit_edges(Visitor&) override;
 
-    String m_locale;                                    // [[Locale]]
-    Usage m_usage { Usage::Sort };                      // [[Usage]]
-    Sensitivity m_sensitivity { Sensitivity::Variant }; // [[Sensitivity]]
-    CaseFirst m_case_first { CaseFirst::False };        // [[CaseFirst]]
-    String m_collation;                                 // [[Collation]]
-    bool m_ignore_punctuation { false };                // [[IgnorePunctuation]]
-    bool m_numeric { false };                           // [[Numeric]]
-    GCPtr<CollatorCompareFunction> m_bound_compare;     // [[BoundCompare]]
+    String m_locale;                                                      // [[Locale]]
+    Unicode::Usage m_usage { Unicode::Usage::Sort };                      // [[Usage]]
+    Unicode::Sensitivity m_sensitivity { Unicode::Sensitivity::Variant }; // [[Sensitivity]]
+    Unicode::CaseFirst m_case_first { Unicode::CaseFirst::False };        // [[CaseFirst]]
+    String m_collation;                                                   // [[Collation]]
+    bool m_ignore_punctuation { false };                                  // [[IgnorePunctuation]]
+    bool m_numeric { false };                                             // [[Numeric]]
+    GCPtr<CollatorCompareFunction> m_bound_compare;                       // [[BoundCompare]]
+
+    // Non-standard. Stores the ICU collator for the Intl object's collation options.
+    OwnPtr<Unicode::Collator> m_collator;
 };
 
 }

+ 22 - 19
Userland/Libraries/LibJS/Runtime/Intl/CollatorCompareFunction.cpp

@@ -31,22 +31,10 @@ void CollatorCompareFunction::initialize(Realm&)
     define_direct_property(vm.names.name, PrimitiveString::create(vm, String {}), Attribute::Configurable);
 }
 
-// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
-double compare_strings(Collator& collator, Utf8View const& x, Utf8View const& y)
+void CollatorCompareFunction::visit_edges(Visitor& visitor)
 {
-    // FIXME: Implement https://unicode.org/reports/tr10
-    (void)collator;
-    auto x_iterator = x.begin();
-    auto y_iterator = y.begin();
-    for (; x_iterator != x.end() && y_iterator != y.end(); ++x_iterator, ++y_iterator) {
-        if (*x_iterator != *y_iterator)
-            return static_cast<double>(*x_iterator) - static_cast<double>(*y_iterator);
-    }
-    if (x_iterator != x.end())
-        return 1.0;
-    if (y_iterator != y.end())
-        return -1.0;
-    return 0.0;
+    Base::visit_edges(visitor);
+    visitor.visit(m_collator);
 }
 
 // 10.3.3.1 Collator Compare Functions, https://tc39.es/ecma402/#sec-collator-compare-functions
@@ -61,17 +49,32 @@ ThrowCompletionOr<Value> CollatorCompareFunction::call()
 
     // 5. Let X be ? ToString(x).
     auto x = TRY(vm.argument(0).to_string(vm));
+
     // 6. Let Y be ? ToString(y).
     auto y = TRY(vm.argument(1).to_string(vm));
 
     // 7. Return CompareStrings(collator, X, Y).
-    return compare_strings(m_collator, x.code_points(), y.code_points());
+    return compare_strings(m_collator, x, y);
 }
 
-void CollatorCompareFunction::visit_edges(Visitor& visitor)
+// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
+int compare_strings(Collator const& collator, StringView x, StringView y)
 {
-    Base::visit_edges(visitor);
-    visitor.visit(m_collator);
+    auto result = collator.collator().compare(x, y);
+
+    // The result is intended to correspond with a sort order of String values according to the effective locale and
+    // collation options of collator, and will be negative when x is ordered before y, positive when x is ordered after
+    // y, and zero in all other cases (representing no relative ordering between x and y).
+    switch (result) {
+    case Unicode::Collator::Order::Before:
+        return -1;
+    case Unicode::Collator::Order::Equal:
+        return 0;
+    case Unicode::Collator::Order::After:
+        return 1;
+    }
+
+    VERIFY_NOT_REACHED();
 }
 
 }

+ 1 - 1
Userland/Libraries/LibJS/Runtime/Intl/CollatorCompareFunction.h

@@ -30,6 +30,6 @@ private:
     NonnullGCPtr<Collator> m_collator; // [[Collator]]
 };
 
-double compare_strings(Collator&, Utf8View const& x, Utf8View const& y);
+int compare_strings(Collator const&, StringView x, StringView y);
 
 }

+ 12 - 1
Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp

@@ -114,7 +114,7 @@ static ThrowCompletionOr<NonnullGCPtr<Collator>> initialize_collator(VM& vm, Col
     // 27. If sensitivity is undefined, then
     if (sensitivity.is_undefined()) {
         // a. If usage is "sort", then
-        if (collator.usage() == Collator::Usage::Sort) {
+        if (collator.usage() == Unicode::Usage::Sort) {
             // i. Let sensitivity be "variant".
             sensitivity = PrimitiveString::create(vm, "variant"_string);
         }
@@ -136,6 +136,17 @@ static ThrowCompletionOr<NonnullGCPtr<Collator>> initialize_collator(VM& vm, Col
     // 30. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
     collator.set_ignore_punctuation(ignore_punctuation.as_bool());
 
+    // Non-standard, create an ICU collator for this Intl object.
+    auto icu_collator = Unicode::Collator::create(
+        collator.locale(),
+        collator.usage(),
+        collator.collation(),
+        collator.sensitivity(),
+        collator.case_first(),
+        collator.numeric(),
+        collator.ignore_punctuation());
+    collator.set_collator(move(icu_collator));
+
     // 31. Return collator.
     return collator;
 }

+ 1 - 1
Userland/Libraries/LibJS/Runtime/StringPrototype.cpp

@@ -566,7 +566,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::locale_compare)
     auto collator = TRY(construct(vm, realm.intrinsics().intl_collator_constructor(), vm.argument(1), vm.argument(2)));
 
     // 5. Return CompareStrings(collator, S, thatValue).
-    return Intl::compare_strings(static_cast<Intl::Collator&>(*collator), string.code_points(), that_value.code_points());
+    return Intl::compare_strings(static_cast<Intl::Collator const&>(*collator), string, that_value);
 }
 
 // 22.1.3.13 String.prototype.match ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.match

+ 62 - 5
Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.compare.js

@@ -17,13 +17,13 @@ describe("correct behavior", () => {
             const aTob = collator.compare(a, b);
             const bToa = collator.compare(b, a);
 
-            expect(aTob > 0).toBeTrue();
-            expect(aTob).toBe(-bToa);
+            expect(aTob).toBe(1);
+            expect(bToa).toBe(-1);
         }
 
         compareBoth("a", "");
         compareBoth("1", "");
-        compareBoth("a", "A");
+        compareBoth("A", "a");
         compareBoth("7", "3");
         compareBoth("0000", "0");
 
@@ -31,8 +31,65 @@ describe("correct behavior", () => {
         expect(collator.compare("undefined", undefined)).toBe(0);
 
         expect(collator.compare("null", null)).toBe(0);
-        expect(collator.compare("null", undefined)).not.toBe(0);
-        expect(collator.compare("null") < 0).toBeTrue();
+        expect(collator.compare("null", undefined)).toBe(-1);
+        expect(collator.compare("null")).toBe(-1);
+    });
+
+    test("canonically equivalent strings", () => {
+        var tests = [
+            ["ä\u0306", "a\u0308\u0306"],
+            ["ă\u0308", "a\u0306\u0308"],
+            ["ạ\u0308", "a\u0323\u0308"],
+            ["a\u0308\u0323", "a\u0323\u0308"],
+            ["ä\u0323", "a\u0323\u0308"],
+            ["Å", "Å"],
+            ["Å", "A\u030A"],
+            ["Ç", "C\u0327"],
+            ["ḋ\u0323", "ḍ\u0307"],
+            ["ḋ\u0323", "d\u0323\u0307"],
+            ["ô", "o\u0302"],
+            ["ö", "o\u0308"],
+            ["q\u0307\u0323", "q\u0323\u0307"],
+            ["ṩ", "s\u0323\u0307"],
+            ["ự", "ụ\u031B"],
+            ["ự", "u\u031B\u0323"],
+            ["ự", "ư\u0323"],
+            ["ự", "u\u0323\u031B"],
+            ["Ω", "Ω"],
+            ["x\u031B\u0323", "x\u0323\u031B"],
+            ["퓛", "\u1111\u1171\u11B6"],
+            ["北", "\uD87E\uDC2B"],
+            ["가", "\u1100\u1161"],
+            ["\uD834\uDD5E", "\uD834\uDD57\uD834\uDD65"],
+        ];
+
+        const en = new Intl.Collator("en");
+        const ja = new Intl.Collator("ja");
+        const th = new Intl.Collator("th");
+
+        tests.forEach(test => {
+            expect(en.compare(test[0], test[1])).toBe(0);
+            expect(ja.compare(test[0], test[1])).toBe(0);
+            expect(th.compare(test[0], test[1])).toBe(0);
+        });
+    });
+
+    test("ignorePunctuation", () => {
+        [undefined, true, false].forEach(ignorePunctuation => {
+            let expected = false;
+
+            const en = new Intl.Collator("en", { ignorePunctuation });
+            expect(en.compare("", " ")).toBe(en.resolvedOptions().ignorePunctuation ? 0 : -1);
+            expect(en.compare("", ",")).toBe(en.resolvedOptions().ignorePunctuation ? 0 : -1);
+
+            const ja = new Intl.Collator("ja", { ignorePunctuation });
+            expect(ja.compare("", " ")).toBe(ja.resolvedOptions().ignorePunctuation ? 0 : -1);
+            expect(ja.compare("", ",")).toBe(ja.resolvedOptions().ignorePunctuation ? 0 : -1);
+
+            const th = new Intl.Collator("th", { ignorePunctuation });
+            expect(th.compare("", " ")).toBe(th.resolvedOptions().ignorePunctuation ? 0 : -1);
+            expect(th.compare("", ",")).toBe(th.resolvedOptions().ignorePunctuation ? 0 : -1);
+        });
     });
 
     test("UTF-16", () => {

+ 1 - 1
Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.localeCompare.js

@@ -15,7 +15,7 @@ test("basic functionality", () => {
 
     compareBoth("a", "");
     compareBoth("1", "");
-    compareBoth("a", "A");
+    compareBoth("A", "a");
     compareBoth("7", "3");
     compareBoth("0000", "0");
 

+ 1 - 0
Userland/Libraries/LibUnicode/CMakeLists.txt

@@ -2,6 +2,7 @@ include(${SerenityOS_SOURCE_DIR}/Meta/CMake/unicode_data.cmake)
 
 set(SOURCES
     CharacterTypes.cpp
+    Collator.cpp
     CurrencyCode.cpp
     DateTimeFormat.cpp
     DisplayNames.cpp

+ 196 - 0
Userland/Libraries/LibUnicode/Collator.cpp

@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibUnicode/Collator.h>
+#include <LibUnicode/ICU.h>
+
+#include <unicode/coll.h>
+
+namespace Unicode {
+
+Usage usage_from_string(StringView usage)
+{
+    if (usage == "sort"sv)
+        return Usage::Sort;
+    if (usage == "search"sv)
+        return Usage::Search;
+    VERIFY_NOT_REACHED();
+}
+
+StringView usage_to_string(Usage usage)
+{
+    switch (usage) {
+    case Usage::Sort:
+        return "sort"sv;
+    case Usage::Search:
+        return "search"sv;
+    }
+    VERIFY_NOT_REACHED();
+}
+
+static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
+{
+    auto result = adopt_own(*locale.clone());
+    UErrorCode status = U_ZERO_ERROR;
+
+    switch (usage) {
+    case Usage::Sort:
+        result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
+        break;
+    case Usage::Search:
+        result->setUnicodeKeywordValue("co", "search", status);
+        break;
+    }
+
+    VERIFY(icu_success(status));
+    return result;
+}
+
+Sensitivity sensitivity_from_string(StringView sensitivity)
+{
+    if (sensitivity == "base"sv)
+        return Sensitivity::Base;
+    if (sensitivity == "accent"sv)
+        return Sensitivity::Accent;
+    if (sensitivity == "case"sv)
+        return Sensitivity::Case;
+    if (sensitivity == "variant"sv)
+        return Sensitivity::Variant;
+    VERIFY_NOT_REACHED();
+}
+
+StringView sensitivity_to_string(Sensitivity sensitivity)
+{
+    switch (sensitivity) {
+    case Sensitivity::Base:
+        return "base"sv;
+    case Sensitivity::Accent:
+        return "accent"sv;
+    case Sensitivity::Case:
+        return "case"sv;
+    case Sensitivity::Variant:
+        return "variant"sv;
+    }
+    VERIFY_NOT_REACHED();
+}
+
+static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
+{
+    switch (sensitivity) {
+    case Sensitivity::Base:
+        return UCOL_PRIMARY;
+    case Sensitivity::Accent:
+        return UCOL_SECONDARY;
+    case Sensitivity::Case:
+        return UCOL_PRIMARY;
+    case Sensitivity::Variant:
+        return UCOL_TERTIARY;
+    }
+    VERIFY_NOT_REACHED();
+}
+
+CaseFirst case_first_from_string(StringView case_first)
+{
+    if (case_first == "upper"sv)
+        return CaseFirst::Upper;
+    if (case_first == "lower"sv)
+        return CaseFirst::Lower;
+    if (case_first == "false"sv)
+        return CaseFirst::False;
+    VERIFY_NOT_REACHED();
+}
+
+StringView case_first_to_string(CaseFirst case_first)
+{
+    switch (case_first) {
+    case CaseFirst::Upper:
+        return "upper"sv;
+    case CaseFirst::Lower:
+        return "lower"sv;
+    case CaseFirst::False:
+        return "false"sv;
+    }
+    VERIFY_NOT_REACHED();
+}
+
+static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
+{
+    switch (case_first) {
+    case CaseFirst::Upper:
+        return UCOL_UPPER_FIRST;
+    case CaseFirst::Lower:
+        return UCOL_LOWER_FIRST;
+    case CaseFirst::False:
+        return UCOL_OFF;
+    }
+    VERIFY_NOT_REACHED();
+}
+
+class CollatorImpl : public Collator {
+public:
+    explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
+        : m_collator(move(collator))
+    {
+    }
+
+    virtual Collator::Order compare(StringView lhs, StringView rhs) const override
+    {
+        UErrorCode status = U_ZERO_ERROR;
+
+        auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
+        VERIFY(icu_success(status));
+
+        switch (result) {
+        case UCOL_LESS:
+            return Order::Before;
+        case UCOL_EQUAL:
+            return Order::Equal;
+        case UCOL_GREATER:
+            return Order::After;
+        }
+
+        VERIFY_NOT_REACHED();
+    }
+
+private:
+    NonnullOwnPtr<icu::Collator> m_collator;
+};
+
+NonnullOwnPtr<Collator> Collator::create(
+    StringView locale,
+    Usage usage,
+    StringView collation,
+    Sensitivity sensitivity,
+    CaseFirst case_first,
+    bool numeric,
+    bool ignore_punctuation)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    auto locale_data = LocaleData::for_locale(locale);
+    VERIFY(locale_data.has_value());
+
+    auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
+
+    auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
+    VERIFY(icu_success(status));
+
+    auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
+        collator->setAttribute(attribute, value, status);
+        VERIFY(icu_success(status));
+    };
+
+    set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
+    set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
+    set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
+    set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
+    set_attribute(UCOL_ALTERNATE_HANDLING, ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
+    set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
+
+    return adopt_own(*new CollatorImpl(move(collator)));
+}
+
+}

+ 62 - 0
Userland/Libraries/LibUnicode/Collator.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/NonnullOwnPtr.h>
+#include <AK/StringView.h>
+
+namespace Unicode {
+
+enum class Usage {
+    Sort,
+    Search,
+};
+Usage usage_from_string(StringView);
+StringView usage_to_string(Usage);
+
+enum class Sensitivity {
+    Base,
+    Accent,
+    Case,
+    Variant,
+};
+Sensitivity sensitivity_from_string(StringView);
+StringView sensitivity_to_string(Sensitivity);
+
+enum class CaseFirst {
+    Upper,
+    Lower,
+    False,
+};
+CaseFirst case_first_from_string(StringView);
+StringView case_first_to_string(CaseFirst);
+
+class Collator {
+public:
+    static NonnullOwnPtr<Collator> create(
+        StringView locale,
+        Usage,
+        StringView collation,
+        Sensitivity,
+        CaseFirst,
+        bool numeric,
+        bool ignore_punctuation);
+
+    virtual ~Collator() = default;
+
+    enum class Order {
+        Before,
+        Equal,
+        After,
+    };
+    virtual Order compare(StringView, StringView) const = 0;
+
+protected:
+    Collator() = default;
+};
+
+}