String.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. /*
  2. * Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #define AK_DONT_REPLACE_STD
  7. #include <AK/String.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibLocale/ICU.h>
  10. #include <unicode/bytestream.h>
  11. #include <unicode/casemap.h>
  12. #include <unicode/stringoptions.h>
  13. // This file contains definitions of AK::String methods which require UCD data.
  14. namespace AK {
  15. struct ResolvedLocale {
  16. ByteString buffer;
  17. char const* locale { nullptr };
  18. };
  19. static ResolvedLocale resolve_locale(Optional<StringView> const& locale)
  20. {
  21. if (!locale.has_value())
  22. return {};
  23. ResolvedLocale resolved_locale;
  24. resolved_locale.buffer = *locale;
  25. resolved_locale.locale = resolved_locale.buffer.characters();
  26. return resolved_locale;
  27. }
  28. ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
  29. {
  30. UErrorCode status = U_ZERO_ERROR;
  31. StringBuilder builder { bytes_as_string_view().length() };
  32. icu::StringByteSink sink { &builder };
  33. auto resolved_locale = resolve_locale(locale);
  34. icu::CaseMap::utf8ToLower(resolved_locale.locale, 0, Locale::icu_string_piece(*this), sink, nullptr, status);
  35. if (Locale::icu_failure(status))
  36. return Error::from_string_literal("Unable to convert string to lowercase");
  37. return builder.to_string_without_validation();
  38. }
  39. ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
  40. {
  41. UErrorCode status = U_ZERO_ERROR;
  42. StringBuilder builder { bytes_as_string_view().length() };
  43. icu::StringByteSink sink { &builder };
  44. auto resolved_locale = resolve_locale(locale);
  45. icu::CaseMap::utf8ToUpper(resolved_locale.locale, 0, Locale::icu_string_piece(*this), sink, nullptr, status);
  46. if (Locale::icu_failure(status))
  47. return Error::from_string_literal("Unable to convert string to uppercase");
  48. return builder.to_string_without_validation();
  49. }
  50. ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation) const
  51. {
  52. UErrorCode status = U_ZERO_ERROR;
  53. StringBuilder builder { bytes_as_string_view().length() };
  54. icu::StringByteSink sink { &builder };
  55. auto resolved_locale = resolve_locale(locale);
  56. u32 options = 0;
  57. if (trailing_code_point_transformation == TrailingCodePointTransformation::PreserveExisting)
  58. options |= U_TITLECASE_NO_LOWERCASE;
  59. icu::CaseMap::utf8ToTitle(resolved_locale.locale, options, nullptr, Locale::icu_string_piece(*this), sink, nullptr, status);
  60. if (Locale::icu_failure(status))
  61. return Error::from_string_literal("Unable to convert string to titlecase");
  62. return builder.to_string_without_validation();
  63. }
  64. static ErrorOr<void> build_casefold_string(StringView string, StringBuilder& builder)
  65. {
  66. UErrorCode status = U_ZERO_ERROR;
  67. icu::StringByteSink sink { &builder };
  68. icu::CaseMap::utf8Fold(0, Locale::icu_string_piece(string), sink, nullptr, status);
  69. if (Locale::icu_failure(status))
  70. return Error::from_string_literal("Unable to casefold string");
  71. return {};
  72. }
  73. ErrorOr<String> String::to_casefold() const
  74. {
  75. StringBuilder builder { bytes_as_string_view().length() };
  76. TRY(build_casefold_string(*this, builder));
  77. return builder.to_string_without_validation();
  78. }
  79. bool String::equals_ignoring_case(String const& other) const
  80. {
  81. StringBuilder lhs_builder { bytes_as_string_view().length() };
  82. if (build_casefold_string(*this, lhs_builder).is_error())
  83. return false;
  84. StringBuilder rhs_builder { other.bytes_as_string_view().length() };
  85. if (build_casefold_string(other, rhs_builder).is_error())
  86. return false;
  87. return lhs_builder.string_view() == rhs_builder.string_view();
  88. }
  89. Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_t from_byte_offset) const
  90. {
  91. auto haystack = bytes_as_string_view().substring_view(from_byte_offset);
  92. if (haystack.is_empty())
  93. return {};
  94. StringBuilder lhs_builder { haystack.length() };
  95. if (build_casefold_string(haystack, lhs_builder).is_error())
  96. return {};
  97. StringBuilder rhs_builder { needle.length() };
  98. if (build_casefold_string(needle, rhs_builder).is_error())
  99. return false;
  100. if (auto index = lhs_builder.string_view().find(rhs_builder.string_view()); index.has_value())
  101. return *index + from_byte_offset;
  102. return {};
  103. }
  104. }