CharacterTypes.h 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. /*
  2. * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/DeprecatedString.h>
  8. #include <AK/Forward.h>
  9. #include <AK/Optional.h>
  10. #include <AK/Span.h>
  11. #include <AK/String.h>
  12. #include <AK/Types.h>
  13. #include <AK/Vector.h>
  14. #include <LibUnicode/Forward.h>
  15. namespace Unicode {
  16. struct CodePointRange {
  17. u32 first { 0 };
  18. u32 last { 0 };
  19. };
  20. struct CodePointRangeComparator {
  21. constexpr int operator()(u32 code_point, CodePointRange const& range)
  22. {
  23. return (code_point > range.last) - (code_point < range.first);
  24. }
  25. };
  26. struct BlockName {
  27. CodePointRange code_point_range {};
  28. StringView display_name;
  29. };
  30. enum class TrailingCodePointTransformation : u8 {
  31. // Default behaviour; Puts the first typographic letter unit of each word, if lowercase, in titlecase; the other characters in lowercase.
  32. Lowercase,
  33. // Puts the first typographic letter unit of each word, if lowercase, in titlecase; other characters are unaffected. (https://drafts.csswg.org/css-text/#valdef-text-transform-capitalize)
  34. PreserveExisting,
  35. };
  36. Optional<DeprecatedString> code_point_display_name(u32 code_point);
  37. Optional<StringView> code_point_block_display_name(u32 code_point);
  38. Optional<StringView> code_point_abbreviation(u32 code_point);
  39. ReadonlySpan<BlockName> block_display_names();
  40. u32 canonical_combining_class(u32 code_point);
  41. // Note: The single code point case conversions only perform simple case folding.
  42. // Use the full-string transformations for full case folding.
  43. u32 to_unicode_lowercase(u32 code_point);
  44. u32 to_unicode_uppercase(u32 code_point);
  45. u32 to_unicode_titlecase(u32 code_point);
  46. ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
  47. ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
  48. ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase);
  49. ErrorOr<String> to_unicode_casefold_full(StringView);
  50. Optional<GeneralCategory> general_category_from_string(StringView);
  51. bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);
  52. Optional<Property> property_from_string(StringView);
  53. bool code_point_has_property(u32 code_point, Property property);
  54. bool is_ecma262_property(Property);
  55. Optional<Script> script_from_string(StringView);
  56. bool code_point_has_script(u32 code_point, Script script);
  57. bool code_point_has_script_extension(u32 code_point, Script script);
  58. bool code_point_has_grapheme_break_property(u32 code_point, GraphemeBreakProperty property);
  59. bool code_point_has_word_break_property(u32 code_point, WordBreakProperty property);
  60. bool code_point_has_sentence_break_property(u32 code_point, SentenceBreakProperty property);
  61. Optional<BidirectionalClass> bidirectional_class_from_string(StringView);
  62. Optional<BidirectionalClass> bidirectional_class(u32 code_point);
  63. }