String.cpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /*
  2. * Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/String.h>
  7. #include <AK/StringBuilder.h>
  8. #include <LibUnicode/ICU.h>
  9. #include <unicode/bytestream.h>
  10. #include <unicode/casemap.h>
  11. #include <unicode/stringoptions.h>
  12. // This file contains definitions of AK::String methods which require UCD data.
  13. namespace AK {
  14. struct ResolvedLocale {
  15. ByteString buffer;
  16. char const* locale { nullptr };
  17. };
  18. static ResolvedLocale resolve_locale(Optional<StringView> const& locale)
  19. {
  20. if (!locale.has_value())
  21. return {};
  22. ResolvedLocale resolved_locale;
  23. resolved_locale.buffer = *locale;
  24. resolved_locale.locale = resolved_locale.buffer.characters();
  25. return resolved_locale;
  26. }
  27. ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
  28. {
  29. UErrorCode status = U_ZERO_ERROR;
  30. StringBuilder builder { bytes_as_string_view().length() };
  31. icu::StringByteSink sink { &builder };
  32. auto resolved_locale = resolve_locale(locale);
  33. icu::CaseMap::utf8ToLower(resolved_locale.locale, 0, Unicode::icu_string_piece(*this), sink, nullptr, status);
  34. if (Unicode::icu_failure(status))
  35. return Error::from_string_literal("Unable to convert string to lowercase");
  36. return builder.to_string_without_validation();
  37. }
  38. ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
  39. {
  40. UErrorCode status = U_ZERO_ERROR;
  41. StringBuilder builder { bytes_as_string_view().length() };
  42. icu::StringByteSink sink { &builder };
  43. auto resolved_locale = resolve_locale(locale);
  44. icu::CaseMap::utf8ToUpper(resolved_locale.locale, 0, Unicode::icu_string_piece(*this), sink, nullptr, status);
  45. if (Unicode::icu_failure(status))
  46. return Error::from_string_literal("Unable to convert string to uppercase");
  47. return builder.to_string_without_validation();
  48. }
  49. ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation) const
  50. {
  51. UErrorCode status = U_ZERO_ERROR;
  52. StringBuilder builder { bytes_as_string_view().length() };
  53. icu::StringByteSink sink { &builder };
  54. auto resolved_locale = resolve_locale(locale);
  55. u32 options = 0;
  56. if (trailing_code_point_transformation == TrailingCodePointTransformation::PreserveExisting)
  57. options |= U_TITLECASE_NO_LOWERCASE;
  58. icu::CaseMap::utf8ToTitle(resolved_locale.locale, options, nullptr, Unicode::icu_string_piece(*this), sink, nullptr, status);
  59. if (Unicode::icu_failure(status))
  60. return Error::from_string_literal("Unable to convert string to titlecase");
  61. return builder.to_string_without_validation();
  62. }
  63. static ErrorOr<void> build_casefold_string(StringView string, StringBuilder& builder)
  64. {
  65. UErrorCode status = U_ZERO_ERROR;
  66. icu::StringByteSink sink { &builder };
  67. icu::CaseMap::utf8Fold(0, Unicode::icu_string_piece(string), sink, nullptr, status);
  68. if (Unicode::icu_failure(status))
  69. return Error::from_string_literal("Unable to casefold string");
  70. return {};
  71. }
  72. ErrorOr<String> String::to_casefold() const
  73. {
  74. StringBuilder builder { bytes_as_string_view().length() };
  75. TRY(build_casefold_string(*this, builder));
  76. return builder.to_string_without_validation();
  77. }
  78. bool String::equals_ignoring_case(String const& other) const
  79. {
  80. StringBuilder lhs_builder { bytes_as_string_view().length() };
  81. if (build_casefold_string(*this, lhs_builder).is_error())
  82. return false;
  83. StringBuilder rhs_builder { other.bytes_as_string_view().length() };
  84. if (build_casefold_string(other, rhs_builder).is_error())
  85. return false;
  86. return lhs_builder.string_view() == rhs_builder.string_view();
  87. }
  88. Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_t from_byte_offset) const
  89. {
  90. auto haystack = bytes_as_string_view().substring_view(from_byte_offset);
  91. if (haystack.is_empty())
  92. return {};
  93. StringBuilder lhs_builder { haystack.length() };
  94. if (build_casefold_string(haystack, lhs_builder).is_error())
  95. return {};
  96. StringBuilder rhs_builder { needle.length() };
  97. if (build_casefold_string(needle, rhs_builder).is_error())
  98. return false;
  99. if (auto index = lhs_builder.string_view().find(rhs_builder.string_view()); index.has_value())
  100. return *index + from_byte_offset;
  101. return {};
  102. }
  103. }