Serialize.cpp 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. /*
  2. * Copyright (c) 2021, Sam Atkins <atkinssj@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/StringBuilder.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibWeb/CSS/Serialize.h>
  9. namespace Web::CSS {
  10. // https://www.w3.org/TR/cssom-1/#escape-a-character
  11. String escape_a_character(u32 character)
  12. {
  13. StringBuilder builder;
  14. builder.append('\\');
  15. builder.append_code_point(character);
  16. return builder.to_string();
  17. }
  18. // https://www.w3.org/TR/cssom-1/#escape-a-character-as-code-point
  19. String escape_a_character_as_code_point(u32 character)
  20. {
  21. return String::formatted("\\{:x} ", character);
  22. }
  23. // https://www.w3.org/TR/cssom-1/#serialize-an-identifier
  24. String serialize_an_identifier(StringView const& ident)
  25. {
  26. StringBuilder builder;
  27. Utf8View characters { ident };
  28. auto first_character = characters.is_empty() ? 0 : *characters.begin();
  29. // To serialize an identifier means to create a string represented by the concatenation of,
  30. // for each character of the identifier:
  31. for (auto character : characters) {
  32. // If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
  33. if (character == 0) {
  34. builder.append_code_point(0xFFFD);
  35. continue;
  36. }
  37. // If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F,
  38. // then the character escaped as code point.
  39. if ((character >= 0x0001 && character <= 0x001F) || (character == 0x007F)) {
  40. builder.append(escape_a_character_as_code_point(character));
  41. continue;
  42. }
  43. // If the character is the first character and is in the range [0-9] (U+0030 to U+0039),
  44. // then the character escaped as code point.
  45. if (builder.is_empty() && character >= '0' && character <= '9') {
  46. builder.append(escape_a_character_as_code_point(character));
  47. continue;
  48. }
  49. // If the character is the second character and is in the range [0-9] (U+0030 to U+0039)
  50. // and the first character is a "-" (U+002D), then the character escaped as code point.
  51. if (builder.length() == 1 && first_character == '-' && character >= '0' && character <= '9') {
  52. builder.append(escape_a_character_as_code_point(character));
  53. continue;
  54. }
  55. // If the character is the first character and is a "-" (U+002D), and there is no second
  56. // character, then the escaped character.
  57. if (builder.is_empty() && character == '-' && characters.length() == 1) {
  58. builder.append(escape_a_character(character));
  59. continue;
  60. }
  61. // If the character is not handled by one of the above rules and is greater than or equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] (U+0061 to U+007A), then the character itself.
  62. if ((character >= 0x0080)
  63. || (character == '-') || (character == '_')
  64. || (character >= '0' && character <= '9')
  65. || (character >= 'A' && character <= 'Z')
  66. || (character >= 'a' && character <= 'z')) {
  67. builder.append_code_point(character);
  68. continue;
  69. }
  70. // Otherwise, the escaped character.
  71. builder.append(escape_a_character(character));
  72. }
  73. return builder.to_string();
  74. }
  75. }