CharacterTypes.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /*
  2. * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/Platform.h>
  8. #include <AK/StringBuilder.h>
  9. #include <AK/Types.h>
  10. #include <AK/Utf8View.h>
  11. #include <LibUnicode/CharacterTypes.h>
  12. #include <LibUnicode/UnicodeUtils.h>
  13. #if ENABLE_UNICODE_DATA
  14. # include <LibUnicode/UnicodeData.h>
  15. #endif
  16. namespace Unicode {
  17. Optional<DeprecatedString> __attribute__((weak)) code_point_display_name(u32) { return {}; }
  18. Optional<StringView> __attribute__((weak)) code_point_block_display_name(u32) { return {}; }
  19. Optional<StringView> __attribute__((weak)) code_point_abbreviation(u32) { return {}; }
  20. u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; }
  21. ReadonlySpan<BlockName> __attribute__((weak)) block_display_names() { return {}; }
  22. u32 __attribute__((weak)) to_unicode_lowercase(u32 code_point)
  23. {
  24. return to_ascii_lowercase(code_point);
  25. }
  26. u32 __attribute__((weak)) to_unicode_uppercase(u32 code_point)
  27. {
  28. return to_ascii_uppercase(code_point);
  29. }
  30. u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point)
  31. {
  32. return to_ascii_uppercase(code_point);
  33. }
  34. ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView string, Optional<StringView> const& locale)
  35. {
  36. StringBuilder builder;
  37. TRY(Detail::build_lowercase_string(Utf8View { string }, builder, locale));
  38. return builder.to_deprecated_string();
  39. }
  40. ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView string, Optional<StringView> const& locale)
  41. {
  42. StringBuilder builder;
  43. TRY(Detail::build_uppercase_string(Utf8View { string }, builder, locale));
  44. return builder.to_deprecated_string();
  45. }
  46. ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation)
  47. {
  48. StringBuilder builder;
  49. TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale, trailing_code_point_transformation));
  50. return builder.to_string();
  51. }
  52. ErrorOr<String> to_unicode_casefold_full(StringView string)
  53. {
  54. StringBuilder builder;
  55. TRY(Detail::build_casefold_string(Utf8View { string }, builder));
  56. return builder.to_string();
  57. }
  58. Optional<GeneralCategory> __attribute__((weak)) general_category_from_string(StringView) { return {}; }
  59. bool __attribute__((weak)) code_point_has_general_category(u32, GeneralCategory) { return {}; }
  60. Optional<Property> __attribute__((weak)) property_from_string(StringView) { return {}; }
  61. bool __attribute__((weak)) code_point_has_property(u32, Property) { return {}; }
  62. bool is_ecma262_property([[maybe_unused]] Property property)
  63. {
  64. #if ENABLE_UNICODE_DATA
  65. // EMCA-262 only allows a subset of Unicode properties: https://tc39.es/ecma262/#table-binary-unicode-properties
  66. switch (property) {
  67. case Unicode::Property::ASCII:
  68. case Unicode::Property::ASCII_Hex_Digit:
  69. case Unicode::Property::Alphabetic:
  70. case Unicode::Property::Any:
  71. case Unicode::Property::Assigned:
  72. case Unicode::Property::Bidi_Control:
  73. case Unicode::Property::Bidi_Mirrored:
  74. case Unicode::Property::Case_Ignorable:
  75. case Unicode::Property::Cased:
  76. case Unicode::Property::Changes_When_Casefolded:
  77. case Unicode::Property::Changes_When_Casemapped:
  78. case Unicode::Property::Changes_When_Lowercased:
  79. case Unicode::Property::Changes_When_NFKC_Casefolded:
  80. case Unicode::Property::Changes_When_Titlecased:
  81. case Unicode::Property::Changes_When_Uppercased:
  82. case Unicode::Property::Dash:
  83. case Unicode::Property::Default_Ignorable_Code_Point:
  84. case Unicode::Property::Deprecated:
  85. case Unicode::Property::Diacritic:
  86. case Unicode::Property::Emoji:
  87. case Unicode::Property::Emoji_Component:
  88. case Unicode::Property::Emoji_Modifier:
  89. case Unicode::Property::Emoji_Modifier_Base:
  90. case Unicode::Property::Emoji_Presentation:
  91. case Unicode::Property::Extended_Pictographic:
  92. case Unicode::Property::Extender:
  93. case Unicode::Property::Grapheme_Base:
  94. case Unicode::Property::Grapheme_Extend:
  95. case Unicode::Property::Hex_Digit:
  96. case Unicode::Property::IDS_Binary_Operator:
  97. case Unicode::Property::IDS_Trinary_Operator:
  98. case Unicode::Property::ID_Continue:
  99. case Unicode::Property::ID_Start:
  100. case Unicode::Property::Ideographic:
  101. case Unicode::Property::Join_Control:
  102. case Unicode::Property::Logical_Order_Exception:
  103. case Unicode::Property::Lowercase:
  104. case Unicode::Property::Math:
  105. case Unicode::Property::Noncharacter_Code_Point:
  106. case Unicode::Property::Pattern_Syntax:
  107. case Unicode::Property::Pattern_White_Space:
  108. case Unicode::Property::Quotation_Mark:
  109. case Unicode::Property::Radical:
  110. case Unicode::Property::Regional_Indicator:
  111. case Unicode::Property::Sentence_Terminal:
  112. case Unicode::Property::Soft_Dotted:
  113. case Unicode::Property::Terminal_Punctuation:
  114. case Unicode::Property::Unified_Ideograph:
  115. case Unicode::Property::Uppercase:
  116. case Unicode::Property::Variation_Selector:
  117. case Unicode::Property::White_Space:
  118. case Unicode::Property::XID_Continue:
  119. case Unicode::Property::XID_Start:
  120. return true;
  121. default:
  122. return false;
  123. }
  124. #else
  125. return false;
  126. #endif
  127. }
  128. Optional<Script> __attribute__((weak)) script_from_string(StringView) { return {}; }
  129. bool __attribute__((weak)) code_point_has_script(u32, Script) { return {}; }
  130. bool __attribute__((weak)) code_point_has_script_extension(u32, Script) { return {}; }
  131. bool __attribute__((weak)) code_point_has_grapheme_break_property(u32, GraphemeBreakProperty) { return {}; }
  132. bool __attribute__((weak)) code_point_has_word_break_property(u32, WordBreakProperty) { return {}; }
  133. bool __attribute__((weak)) code_point_has_sentence_break_property(u32, SentenceBreakProperty) { return {}; }
  134. Optional<BidirectionalClass> __attribute__((weak)) bidirectional_class_from_string(StringView) { return {}; }
  135. Optional<BidirectionalClass> __attribute__((weak)) bidirectional_class(u32) { return {}; }
  136. }