Normalize.cpp 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. /*
  2. * Copyright (c) 2022, mat
  3. * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/StringBuilder.h>
  8. #include <LibUnicode/ICU.h>
  9. #include <LibUnicode/Normalize.h>
  10. #include <unicode/normalizer2.h>
  11. namespace Unicode {
  12. NormalizationForm normalization_form_from_string(StringView form)
  13. {
  14. if (form == "NFD"sv)
  15. return NormalizationForm::NFD;
  16. if (form == "NFC"sv)
  17. return NormalizationForm::NFC;
  18. if (form == "NFKD"sv)
  19. return NormalizationForm::NFKD;
  20. if (form == "NFKC"sv)
  21. return NormalizationForm::NFKC;
  22. VERIFY_NOT_REACHED();
  23. }
  24. StringView normalization_form_to_string(NormalizationForm form)
  25. {
  26. switch (form) {
  27. case NormalizationForm::NFD:
  28. return "NFD"sv;
  29. case NormalizationForm::NFC:
  30. return "NFC"sv;
  31. case NormalizationForm::NFKD:
  32. return "NFKD"sv;
  33. case NormalizationForm::NFKC:
  34. return "NFKC"sv;
  35. }
  36. VERIFY_NOT_REACHED();
  37. }
  38. String normalize(StringView string, NormalizationForm form)
  39. {
  40. UErrorCode status = U_ZERO_ERROR;
  41. icu::Normalizer2 const* normalizer = nullptr;
  42. switch (form) {
  43. case NormalizationForm::NFD:
  44. normalizer = icu::Normalizer2::getNFDInstance(status);
  45. break;
  46. case NormalizationForm::NFC:
  47. normalizer = icu::Normalizer2::getNFCInstance(status);
  48. break;
  49. case NormalizationForm::NFKD:
  50. normalizer = icu::Normalizer2::getNFKDInstance(status);
  51. break;
  52. case NormalizationForm::NFKC:
  53. normalizer = icu::Normalizer2::getNFKCInstance(status);
  54. break;
  55. }
  56. if (icu_failure(status))
  57. return MUST(String::from_utf8(string));
  58. VERIFY(normalizer);
  59. StringBuilder builder { string.length() };
  60. icu::StringByteSink sink { &builder };
  61. normalizer->normalizeUTF8(0, icu_string_piece(string), sink, nullptr, status);
  62. if (icu_failure(status))
  63. return MUST(String::from_utf8(string));
  64. return MUST(builder.to_string());
  65. }
  66. }