123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- /*
- * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <LibUnicode/Collator.h>
- #include <LibUnicode/ICU.h>
- #include <unicode/coll.h>
- namespace Unicode {
- Usage usage_from_string(StringView usage)
- {
- if (usage == "sort"sv)
- return Usage::Sort;
- if (usage == "search"sv)
- return Usage::Search;
- VERIFY_NOT_REACHED();
- }
- StringView usage_to_string(Usage usage)
- {
- switch (usage) {
- case Usage::Sort:
- return "sort"sv;
- case Usage::Search:
- return "search"sv;
- }
- VERIFY_NOT_REACHED();
- }
- static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
- {
- auto result = adopt_own(*locale.clone());
- UErrorCode status = U_ZERO_ERROR;
- switch (usage) {
- case Usage::Sort:
- result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
- break;
- case Usage::Search:
- result->setUnicodeKeywordValue("co", "search", status);
- break;
- }
- VERIFY(icu_success(status));
- return result;
- }
- Sensitivity sensitivity_from_string(StringView sensitivity)
- {
- if (sensitivity == "base"sv)
- return Sensitivity::Base;
- if (sensitivity == "accent"sv)
- return Sensitivity::Accent;
- if (sensitivity == "case"sv)
- return Sensitivity::Case;
- if (sensitivity == "variant"sv)
- return Sensitivity::Variant;
- VERIFY_NOT_REACHED();
- }
- StringView sensitivity_to_string(Sensitivity sensitivity)
- {
- switch (sensitivity) {
- case Sensitivity::Base:
- return "base"sv;
- case Sensitivity::Accent:
- return "accent"sv;
- case Sensitivity::Case:
- return "case"sv;
- case Sensitivity::Variant:
- return "variant"sv;
- }
- VERIFY_NOT_REACHED();
- }
- static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
- {
- switch (sensitivity) {
- case Sensitivity::Base:
- return UCOL_PRIMARY;
- case Sensitivity::Accent:
- return UCOL_SECONDARY;
- case Sensitivity::Case:
- return UCOL_PRIMARY;
- case Sensitivity::Variant:
- return UCOL_TERTIARY;
- }
- VERIFY_NOT_REACHED();
- }
- CaseFirst case_first_from_string(StringView case_first)
- {
- if (case_first == "upper"sv)
- return CaseFirst::Upper;
- if (case_first == "lower"sv)
- return CaseFirst::Lower;
- if (case_first == "false"sv)
- return CaseFirst::False;
- VERIFY_NOT_REACHED();
- }
- StringView case_first_to_string(CaseFirst case_first)
- {
- switch (case_first) {
- case CaseFirst::Upper:
- return "upper"sv;
- case CaseFirst::Lower:
- return "lower"sv;
- case CaseFirst::False:
- return "false"sv;
- }
- VERIFY_NOT_REACHED();
- }
- static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
- {
- switch (case_first) {
- case CaseFirst::Upper:
- return UCOL_UPPER_FIRST;
- case CaseFirst::Lower:
- return UCOL_LOWER_FIRST;
- case CaseFirst::False:
- return UCOL_OFF;
- }
- VERIFY_NOT_REACHED();
- }
- class CollatorImpl : public Collator {
- public:
- explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
- : m_collator(move(collator))
- {
- }
- virtual Collator::Order compare(StringView lhs, StringView rhs) const override
- {
- UErrorCode status = U_ZERO_ERROR;
- auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
- VERIFY(icu_success(status));
- switch (result) {
- case UCOL_LESS:
- return Order::Before;
- case UCOL_EQUAL:
- return Order::Equal;
- case UCOL_GREATER:
- return Order::After;
- }
- VERIFY_NOT_REACHED();
- }
- private:
- NonnullOwnPtr<icu::Collator> m_collator;
- };
- NonnullOwnPtr<Collator> Collator::create(
- StringView locale,
- Usage usage,
- StringView collation,
- Sensitivity sensitivity,
- CaseFirst case_first,
- bool numeric,
- bool ignore_punctuation)
- {
- UErrorCode status = U_ZERO_ERROR;
- auto locale_data = LocaleData::for_locale(locale);
- VERIFY(locale_data.has_value());
- auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
- auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
- VERIFY(icu_success(status));
- auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
- collator->setAttribute(attribute, value, status);
- VERIFY(icu_success(status));
- };
- set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
- set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
- set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
- set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
- set_attribute(UCOL_ALTERNATE_HANDLING, ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
- set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
- return adopt_own(*new CollatorImpl(move(collator)));
- }
- }
|