Strings.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. /*
  2. * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  3. * Copyright (c) 2022, networkException <networkexception@serenityos.org>
  4. * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
  5. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  6. *
  7. * SPDX-License-Identifier: BSD-2-Clause
  8. */
  9. #include <AK/CharacterTypes.h>
  10. #include <AK/DeprecatedString.h>
  11. #include <AK/Utf16View.h>
  12. #include <AK/Utf8View.h>
  13. #include <LibWeb/Infra/CharacterTypes.h>
  14. #include <LibWeb/Infra/Strings.h>
  15. namespace Web::Infra {
  16. // https://infra.spec.whatwg.org/#ascii-case-insensitive
  17. bool is_ascii_case_insensitive_match(StringView a, StringView b)
  18. {
  19. // A string A is an ASCII case-insensitive match for a string B,
  20. // if the ASCII lowercase of A is the ASCII lowercase of B.
  21. Utf8View a_view { a };
  22. Utf8View b_view { b };
  23. if (a_view.length() != b_view.length())
  24. return false;
  25. auto b_iterator = b_view.begin();
  26. for (auto a_char : a_view) {
  27. auto b_char = *b_iterator;
  28. ++b_iterator;
  29. if (to_ascii_lowercase(a_char) != to_ascii_lowercase(b_char))
  30. return false;
  31. }
  32. return true;
  33. }
  34. // https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
  35. DeprecatedString strip_and_collapse_whitespace(StringView string)
  36. {
  37. // Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point.
  38. StringBuilder builder;
  39. for (auto code_point : Utf8View { string }) {
  40. if (Infra::is_ascii_whitespace(code_point)) {
  41. if (!builder.string_view().ends_with(' '))
  42. builder.append(' ');
  43. continue;
  44. }
  45. builder.append_code_point(code_point);
  46. }
  47. // ...and then remove any leading and trailing ASCII whitespace from that string.
  48. return builder.string_view().trim(Infra::ASCII_WHITESPACE);
  49. }
  50. // https://infra.spec.whatwg.org/#code-unit-prefix
  51. bool is_code_unit_prefix(StringView potential_prefix, StringView input)
  52. {
  53. auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
  54. auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
  55. // 1. Let i be 0.
  56. size_t i = 0;
  57. // 2. While true:
  58. while (true) {
  59. // 1. If i is greater than or equal to potentialPrefix’s length, then return true.
  60. if (i >= potential_prefix.length())
  61. return true;
  62. // 2. If i is greater than or equal to input’s length, then return false.
  63. if (i >= input.length())
  64. return false;
  65. // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix.
  66. auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i);
  67. // 4. Let inputCodeUnit be the ith code unit of input.
  68. auto input_code_unit = Utf16View(input_utf16).code_unit_at(i);
  69. // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit.
  70. if (potential_prefix_code_unit != input_code_unit)
  71. return false;
  72. // 6. Set i to i + 1.
  73. ++i;
  74. }
  75. }
  76. // https://infra.spec.whatwg.org/#scalar-value-string
  77. ErrorOr<String> convert_to_scalar_value_string(StringView string)
  78. {
  79. // To convert a string into a scalar value string, replace any surrogates with U+FFFD.
  80. StringBuilder scalar_value_builder;
  81. auto utf8_view = Utf8View { string };
  82. for (u32 code_point : utf8_view) {
  83. if (is_unicode_surrogate(code_point))
  84. code_point = 0xFFFD;
  85. TRY(scalar_value_builder.try_append(code_point));
  86. }
  87. return scalar_value_builder.to_string();
  88. }
  89. }