Collator.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /*
  2. * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibUnicode/Collator.h>
  7. #include <LibUnicode/ICU.h>
  8. #include <unicode/coll.h>
  9. namespace Unicode {
  10. Usage usage_from_string(StringView usage)
  11. {
  12. if (usage == "sort"sv)
  13. return Usage::Sort;
  14. if (usage == "search"sv)
  15. return Usage::Search;
  16. VERIFY_NOT_REACHED();
  17. }
  18. StringView usage_to_string(Usage usage)
  19. {
  20. switch (usage) {
  21. case Usage::Sort:
  22. return "sort"sv;
  23. case Usage::Search:
  24. return "search"sv;
  25. }
  26. VERIFY_NOT_REACHED();
  27. }
  28. static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
  29. {
  30. auto result = adopt_own(*locale.clone());
  31. UErrorCode status = U_ZERO_ERROR;
  32. switch (usage) {
  33. case Usage::Sort:
  34. result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
  35. break;
  36. case Usage::Search:
  37. result->setUnicodeKeywordValue("co", "search", status);
  38. break;
  39. }
  40. VERIFY(icu_success(status));
  41. return result;
  42. }
  43. Sensitivity sensitivity_from_string(StringView sensitivity)
  44. {
  45. if (sensitivity == "base"sv)
  46. return Sensitivity::Base;
  47. if (sensitivity == "accent"sv)
  48. return Sensitivity::Accent;
  49. if (sensitivity == "case"sv)
  50. return Sensitivity::Case;
  51. if (sensitivity == "variant"sv)
  52. return Sensitivity::Variant;
  53. VERIFY_NOT_REACHED();
  54. }
  55. StringView sensitivity_to_string(Sensitivity sensitivity)
  56. {
  57. switch (sensitivity) {
  58. case Sensitivity::Base:
  59. return "base"sv;
  60. case Sensitivity::Accent:
  61. return "accent"sv;
  62. case Sensitivity::Case:
  63. return "case"sv;
  64. case Sensitivity::Variant:
  65. return "variant"sv;
  66. }
  67. VERIFY_NOT_REACHED();
  68. }
  69. static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
  70. {
  71. switch (sensitivity) {
  72. case Sensitivity::Base:
  73. return UCOL_PRIMARY;
  74. case Sensitivity::Accent:
  75. return UCOL_SECONDARY;
  76. case Sensitivity::Case:
  77. return UCOL_PRIMARY;
  78. case Sensitivity::Variant:
  79. return UCOL_TERTIARY;
  80. }
  81. VERIFY_NOT_REACHED();
  82. }
  83. static Sensitivity sensitivity_for_collator(icu::Collator const& collator)
  84. {
  85. UErrorCode status = U_ZERO_ERROR;
  86. auto attribute = collator.getAttribute(UCOL_STRENGTH, status);
  87. VERIFY(icu_success(status));
  88. switch (attribute) {
  89. case UCOL_PRIMARY:
  90. attribute = collator.getAttribute(UCOL_CASE_LEVEL, status);
  91. VERIFY(icu_success(status));
  92. return attribute == UCOL_ON ? Sensitivity::Case : Sensitivity::Base;
  93. case UCOL_SECONDARY:
  94. return Sensitivity::Accent;
  95. default:
  96. return Sensitivity::Variant;
  97. }
  98. }
  99. CaseFirst case_first_from_string(StringView case_first)
  100. {
  101. if (case_first == "upper"sv)
  102. return CaseFirst::Upper;
  103. if (case_first == "lower"sv)
  104. return CaseFirst::Lower;
  105. if (case_first == "false"sv)
  106. return CaseFirst::False;
  107. VERIFY_NOT_REACHED();
  108. }
  109. StringView case_first_to_string(CaseFirst case_first)
  110. {
  111. switch (case_first) {
  112. case CaseFirst::Upper:
  113. return "upper"sv;
  114. case CaseFirst::Lower:
  115. return "lower"sv;
  116. case CaseFirst::False:
  117. return "false"sv;
  118. }
  119. VERIFY_NOT_REACHED();
  120. }
  121. static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
  122. {
  123. switch (case_first) {
  124. case CaseFirst::Upper:
  125. return UCOL_UPPER_FIRST;
  126. case CaseFirst::Lower:
  127. return UCOL_LOWER_FIRST;
  128. case CaseFirst::False:
  129. return UCOL_OFF;
  130. }
  131. VERIFY_NOT_REACHED();
  132. }
  133. static bool ignore_punctuation_for_collator(icu::Collator const& collator)
  134. {
  135. UErrorCode status = U_ZERO_ERROR;
  136. auto attribute = collator.getAttribute(UCOL_ALTERNATE_HANDLING, status);
  137. VERIFY(icu_success(status));
  138. return attribute == UCOL_SHIFTED;
  139. }
  140. class CollatorImpl : public Collator {
  141. public:
  142. explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
  143. : m_collator(move(collator))
  144. {
  145. }
  146. virtual Collator::Order compare(StringView lhs, StringView rhs) const override
  147. {
  148. UErrorCode status = U_ZERO_ERROR;
  149. auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
  150. VERIFY(icu_success(status));
  151. switch (result) {
  152. case UCOL_LESS:
  153. return Order::Before;
  154. case UCOL_EQUAL:
  155. return Order::Equal;
  156. case UCOL_GREATER:
  157. return Order::After;
  158. }
  159. VERIFY_NOT_REACHED();
  160. }
  161. virtual Sensitivity sensitivity() const override
  162. {
  163. return sensitivity_for_collator(*m_collator);
  164. }
  165. virtual bool ignore_punctuation() const override
  166. {
  167. return ignore_punctuation_for_collator(*m_collator);
  168. }
  169. private:
  170. NonnullOwnPtr<icu::Collator> m_collator;
  171. };
  172. NonnullOwnPtr<Collator> Collator::create(
  173. StringView locale,
  174. Usage usage,
  175. StringView collation,
  176. Optional<Sensitivity> sensitivity,
  177. CaseFirst case_first,
  178. bool numeric,
  179. Optional<bool> ignore_punctuation)
  180. {
  181. UErrorCode status = U_ZERO_ERROR;
  182. auto locale_data = LocaleData::for_locale(locale);
  183. VERIFY(locale_data.has_value());
  184. auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
  185. auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
  186. VERIFY(icu_success(status));
  187. auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
  188. collator->setAttribute(attribute, value, status);
  189. VERIFY(icu_success(status));
  190. };
  191. if (!sensitivity.has_value())
  192. sensitivity = sensitivity_for_collator(*collator);
  193. if (!ignore_punctuation.has_value())
  194. ignore_punctuation = ignore_punctuation_for_collator(*collator);
  195. set_attribute(UCOL_STRENGTH, icu_sensitivity(*sensitivity));
  196. set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
  197. set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
  198. set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
  199. set_attribute(UCOL_ALTERNATE_HANDLING, *ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
  200. set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
  201. return adopt_own(*new CollatorImpl(move(collator)));
  202. }
  203. }