String.cpp 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /*
  2. * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/String.h>
  7. #include <AK/StringBuilder.h>
  8. #include <AK/Utf32View.h>
  9. #include <AK/Utf8View.h>
  10. #include <LibUnicode/CharacterTypes.h>
  11. #include <LibUnicode/UnicodeUtils.h>
  12. // This file contains definitions of AK::String methods which require UCD data.
  13. namespace AK {
  14. ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
  15. {
  16. StringBuilder builder;
  17. TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale));
  18. return builder.to_string();
  19. }
  20. ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
  21. {
  22. StringBuilder builder;
  23. TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale));
  24. return builder.to_string();
  25. }
  26. ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
  27. {
  28. StringBuilder builder;
  29. TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale, Unicode::TrailingCodePointTransformation::Lowercase));
  30. return builder.to_string();
  31. }
  32. ErrorOr<String> String::to_casefold() const
  33. {
  34. StringBuilder builder;
  35. TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
  36. return builder.to_string();
  37. }
  38. class CasefoldStringComparator {
  39. public:
  40. explicit CasefoldStringComparator(Utf8View string)
  41. : m_string(string)
  42. , m_it(m_string.begin())
  43. {
  44. }
  45. bool has_more_data() const
  46. {
  47. return !m_casefolded_code_points.is_empty() || (m_it != m_string.end());
  48. }
  49. u32 next_code_point()
  50. {
  51. VERIFY(has_more_data());
  52. if (m_casefolded_code_points.is_empty()) {
  53. m_current_code_point = *m_it;
  54. ++m_it;
  55. m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point);
  56. VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point.
  57. }
  58. auto code_point = m_casefolded_code_points[0];
  59. m_casefolded_code_points = m_casefolded_code_points.substring_view(1);
  60. return code_point;
  61. }
  62. private:
  63. Utf8View m_string;
  64. Utf8CodePointIterator m_it;
  65. u32 m_current_code_point { 0 };
  66. Utf32View m_casefolded_code_points;
  67. };
  68. // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
  69. bool String::equals_ignoring_case(String const& other) const
  70. {
  71. // A string X is a caseless match for a string Y if and only if:
  72. // toCasefold(X) = toCasefold(Y)
  73. CasefoldStringComparator lhs { code_points() };
  74. CasefoldStringComparator rhs { other.code_points() };
  75. while (lhs.has_more_data() && rhs.has_more_data()) {
  76. if (lhs.next_code_point() != rhs.next_code_point())
  77. return false;
  78. }
  79. return !lhs.has_more_data() && !rhs.has_more_data();
  80. }
  81. }