Normalize.cpp 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. /*
  2. * Copyright (c) 2022, mat
  3. * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #define AK_DONT_REPLACE_STD
  8. #include <AK/StringBuilder.h>
  9. #include <LibLocale/ICU.h>
  10. #include <LibUnicode/Normalize.h>
  11. #include <unicode/normalizer2.h>
  12. namespace Unicode {
  13. NormalizationForm normalization_form_from_string(StringView form)
  14. {
  15. if (form == "NFD"sv)
  16. return NormalizationForm::NFD;
  17. if (form == "NFC"sv)
  18. return NormalizationForm::NFC;
  19. if (form == "NFKD"sv)
  20. return NormalizationForm::NFKD;
  21. if (form == "NFKC"sv)
  22. return NormalizationForm::NFKC;
  23. VERIFY_NOT_REACHED();
  24. }
  25. StringView normalization_form_to_string(NormalizationForm form)
  26. {
  27. switch (form) {
  28. case NormalizationForm::NFD:
  29. return "NFD"sv;
  30. case NormalizationForm::NFC:
  31. return "NFC"sv;
  32. case NormalizationForm::NFKD:
  33. return "NFKD"sv;
  34. case NormalizationForm::NFKC:
  35. return "NFKC"sv;
  36. }
  37. VERIFY_NOT_REACHED();
  38. }
  39. String normalize(StringView string, NormalizationForm form)
  40. {
  41. UErrorCode status = U_ZERO_ERROR;
  42. icu::Normalizer2 const* normalizer = nullptr;
  43. switch (form) {
  44. case NormalizationForm::NFD:
  45. normalizer = icu::Normalizer2::getNFDInstance(status);
  46. break;
  47. case NormalizationForm::NFC:
  48. normalizer = icu::Normalizer2::getNFCInstance(status);
  49. break;
  50. case NormalizationForm::NFKD:
  51. normalizer = icu::Normalizer2::getNFKDInstance(status);
  52. break;
  53. case NormalizationForm::NFKC:
  54. normalizer = icu::Normalizer2::getNFKCInstance(status);
  55. break;
  56. }
  57. if (Locale::icu_failure(status))
  58. return MUST(String::from_utf8(string));
  59. VERIFY(normalizer);
  60. StringBuilder builder { string.length() };
  61. icu::StringByteSink sink { &builder };
  62. normalizer->normalizeUTF8(0, Locale::icu_string_piece(string), sink, nullptr, status);
  63. if (Locale::icu_failure(status))
  64. return MUST(String::from_utf8(string));
  65. return MUST(builder.to_string());
  66. }
  67. }