Collator.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /*
  2. * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibUnicode/Collator.h>
  7. #include <LibUnicode/ICU.h>
  8. #include <unicode/coll.h>
  9. namespace Unicode {
  10. Usage usage_from_string(StringView usage)
  11. {
  12. if (usage == "sort"sv)
  13. return Usage::Sort;
  14. if (usage == "search"sv)
  15. return Usage::Search;
  16. VERIFY_NOT_REACHED();
  17. }
  18. StringView usage_to_string(Usage usage)
  19. {
  20. switch (usage) {
  21. case Usage::Sort:
  22. return "sort"sv;
  23. case Usage::Search:
  24. return "search"sv;
  25. }
  26. VERIFY_NOT_REACHED();
  27. }
  28. static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
  29. {
  30. auto result = adopt_own(*locale.clone());
  31. UErrorCode status = U_ZERO_ERROR;
  32. switch (usage) {
  33. case Usage::Sort:
  34. result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
  35. break;
  36. case Usage::Search:
  37. result->setUnicodeKeywordValue("co", "search", status);
  38. break;
  39. }
  40. VERIFY(icu_success(status));
  41. return result;
  42. }
  43. Sensitivity sensitivity_from_string(StringView sensitivity)
  44. {
  45. if (sensitivity == "base"sv)
  46. return Sensitivity::Base;
  47. if (sensitivity == "accent"sv)
  48. return Sensitivity::Accent;
  49. if (sensitivity == "case"sv)
  50. return Sensitivity::Case;
  51. if (sensitivity == "variant"sv)
  52. return Sensitivity::Variant;
  53. VERIFY_NOT_REACHED();
  54. }
  55. StringView sensitivity_to_string(Sensitivity sensitivity)
  56. {
  57. switch (sensitivity) {
  58. case Sensitivity::Base:
  59. return "base"sv;
  60. case Sensitivity::Accent:
  61. return "accent"sv;
  62. case Sensitivity::Case:
  63. return "case"sv;
  64. case Sensitivity::Variant:
  65. return "variant"sv;
  66. }
  67. VERIFY_NOT_REACHED();
  68. }
  69. static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
  70. {
  71. switch (sensitivity) {
  72. case Sensitivity::Base:
  73. return UCOL_PRIMARY;
  74. case Sensitivity::Accent:
  75. return UCOL_SECONDARY;
  76. case Sensitivity::Case:
  77. return UCOL_PRIMARY;
  78. case Sensitivity::Variant:
  79. return UCOL_TERTIARY;
  80. }
  81. VERIFY_NOT_REACHED();
  82. }
  83. CaseFirst case_first_from_string(StringView case_first)
  84. {
  85. if (case_first == "upper"sv)
  86. return CaseFirst::Upper;
  87. if (case_first == "lower"sv)
  88. return CaseFirst::Lower;
  89. if (case_first == "false"sv)
  90. return CaseFirst::False;
  91. VERIFY_NOT_REACHED();
  92. }
  93. StringView case_first_to_string(CaseFirst case_first)
  94. {
  95. switch (case_first) {
  96. case CaseFirst::Upper:
  97. return "upper"sv;
  98. case CaseFirst::Lower:
  99. return "lower"sv;
  100. case CaseFirst::False:
  101. return "false"sv;
  102. }
  103. VERIFY_NOT_REACHED();
  104. }
  105. static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
  106. {
  107. switch (case_first) {
  108. case CaseFirst::Upper:
  109. return UCOL_UPPER_FIRST;
  110. case CaseFirst::Lower:
  111. return UCOL_LOWER_FIRST;
  112. case CaseFirst::False:
  113. return UCOL_OFF;
  114. }
  115. VERIFY_NOT_REACHED();
  116. }
  117. class CollatorImpl : public Collator {
  118. public:
  119. explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
  120. : m_collator(move(collator))
  121. {
  122. }
  123. virtual Collator::Order compare(StringView lhs, StringView rhs) const override
  124. {
  125. UErrorCode status = U_ZERO_ERROR;
  126. auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
  127. VERIFY(icu_success(status));
  128. switch (result) {
  129. case UCOL_LESS:
  130. return Order::Before;
  131. case UCOL_EQUAL:
  132. return Order::Equal;
  133. case UCOL_GREATER:
  134. return Order::After;
  135. }
  136. VERIFY_NOT_REACHED();
  137. }
  138. private:
  139. NonnullOwnPtr<icu::Collator> m_collator;
  140. };
  141. NonnullOwnPtr<Collator> Collator::create(
  142. StringView locale,
  143. Usage usage,
  144. StringView collation,
  145. Sensitivity sensitivity,
  146. CaseFirst case_first,
  147. bool numeric,
  148. bool ignore_punctuation)
  149. {
  150. UErrorCode status = U_ZERO_ERROR;
  151. auto locale_data = LocaleData::for_locale(locale);
  152. VERIFY(locale_data.has_value());
  153. auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
  154. auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
  155. VERIFY(icu_success(status));
  156. auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
  157. collator->setAttribute(attribute, value, status);
  158. VERIFY(icu_success(status));
  159. };
  160. set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
  161. set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
  162. set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
  163. set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
  164. set_attribute(UCOL_ALTERNATE_HANDLING, ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
  165. set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
  166. return adopt_own(*new CollatorImpl(move(collator)));
  167. }
  168. }