Strings.cpp 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /*
  2. * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  3. * Copyright (c) 2022, networkException <networkexception@serenityos.org>
  4. * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/DeprecatedString.h>
  9. #include <AK/Utf16View.h>
  10. #include <AK/Utf8View.h>
  11. #include <LibWeb/Infra/CharacterTypes.h>
  12. #include <LibWeb/Infra/Strings.h>
  13. namespace Web::Infra {
  14. // https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
  15. DeprecatedString strip_and_collapse_whitespace(StringView string)
  16. {
  17. // Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point.
  18. StringBuilder builder;
  19. for (auto code_point : Utf8View { string }) {
  20. if (Infra::is_ascii_whitespace(code_point)) {
  21. if (!builder.string_view().ends_with(' '))
  22. builder.append(' ');
  23. continue;
  24. }
  25. builder.append_code_point(code_point);
  26. }
  27. // ...and then remove any leading and trailing ASCII whitespace from that string.
  28. return builder.string_view().trim(Infra::ASCII_WHITESPACE);
  29. }
  30. // https://infra.spec.whatwg.org/#code-unit-prefix
  31. bool is_code_unit_prefix(StringView potential_prefix, StringView input)
  32. {
  33. auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
  34. auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
  35. // 1. Let i be 0.
  36. size_t i = 0;
  37. // 2. While true:
  38. while (true) {
  39. // 1. If i is greater than or equal to potentialPrefix’s length, then return true.
  40. if (i >= potential_prefix.length())
  41. return true;
  42. // 2. If i is greater than or equal to input’s length, then return false.
  43. if (i >= input.length())
  44. return false;
  45. // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix.
  46. auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i);
  47. // 4. Let inputCodeUnit be the ith code unit of input.
  48. auto input_code_unit = Utf16View(input_utf16).code_unit_at(i);
  49. // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit.
  50. if (potential_prefix_code_unit != input_code_unit)
  51. return false;
  52. // 6. Set i to i + 1.
  53. ++i;
  54. }
  55. }
  56. // https://infra.spec.whatwg.org/#scalar-value-string
  57. ErrorOr<String> convert_to_scalar_value_string(StringView string)
  58. {
  59. // To convert a string into a scalar value string, replace any surrogates with U+FFFD.
  60. StringBuilder scalar_value_builder;
  61. auto utf8_view = Utf8View { string };
  62. for (u32 code_point : utf8_view) {
  63. if (is_unicode_surrogate(code_point))
  64. code_point = 0xFFFD;
  65. TRY(scalar_value_builder.try_append(code_point));
  66. }
  67. return scalar_value_builder.to_string();
  68. }
  69. }