CharacterTypes.h 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. /*
  2. * Copyright (c) 2024, the Ladybird developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/Types.h>
  9. namespace Web::CSS {
  10. // https://www.w3.org/TR/css-syntax-3/#digit
  11. constexpr bool is_digit(u32 code_point)
  12. {
  13. // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive.
  14. return code_point >= 0x30 && code_point <= 0x39;
  15. }
  16. // https://www.w3.org/TR/css-syntax-3/#hex-digit
  17. constexpr bool is_hex_digit(u32 code_point)
  18. {
  19. // A digit,
  20. // or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F) inclusive,
  21. // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f) inclusive.
  22. return is_digit(code_point) || (code_point >= 0x41 && code_point <= 0x46) || (code_point >= 0x61 && code_point <= 0x66);
  23. }
  24. // https://www.w3.org/TR/css-syntax-3/#ident-start-code-point
  25. constexpr bool is_ident_start_code_point(u32 code_point)
  26. {
  27. // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
  28. // Note: the is_unicode condition is used to reject the Tokenizer's EOF codepoint.
  29. return is_ascii_alpha(code_point) || (!is_ascii(code_point) && is_unicode(code_point)) || code_point == '_';
  30. }
  31. // https://www.w3.org/TR/css-syntax-3/#ident-code-point
  32. constexpr bool is_ident_code_point(u32 code_point)
  33. {
  34. // An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-).
  35. return is_ident_start_code_point(code_point) || is_ascii_digit(code_point) || code_point == '-';
  36. }
  37. // https://www.w3.org/TR/css-syntax-3/#non-printable-code-point
  38. constexpr bool is_non_printable_code_point(u32 code_point)
  39. {
  40. return code_point <= 0x8 || code_point == 0xB || (code_point >= 0xE && code_point <= 0x1F) || code_point == 0x7F;
  41. }
  42. // https://www.w3.org/TR/css-syntax-3/#newline
  43. constexpr inline bool is_newline(u32 code_point)
  44. {
  45. // U+000A LINE FEED.
  46. // Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
  47. // as they are converted to U+000A LINE FEED during preprocessing.
  48. return code_point == 0xA;
  49. }
  50. // https://www.w3.org/TR/css-syntax-3/#whitespace
  51. constexpr bool is_whitespace(u32 code_point)
  52. {
  53. // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
  54. return is_newline(code_point) || code_point == '\t' || code_point == ' ';
  55. }
  56. // https://www.w3.org/TR/css-syntax-3/#maximum-allowed-code-point
  57. constexpr bool is_greater_than_maximum_allowed_code_point(u32 code_point)
  58. {
  59. // The greatest code point defined by Unicode: U+10FFFF.
  60. return code_point > 0x10FFFF;
  61. }
  62. }