Emoji.cpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/Utf32View.h>
  8. #include <AK/Utf8View.h>
  9. #include <LibUnicode/CharacterTypes.h>
  10. #include <LibUnicode/Emoji.h>
  11. #if ENABLE_UNICODE_DATA
  12. # include <LibUnicode/UnicodeData.h>
  13. #endif
  14. namespace Unicode {
  15. Optional<Emoji> __attribute__((weak)) find_emoji_for_code_points(ReadonlySpan<u32>) { return {}; }
  16. #if ENABLE_UNICODE_DATA
  17. // https://unicode.org/reports/tr51/#def_emoji_core_sequence
  18. static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32> const& next_code_point)
  19. {
  20. // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence
  21. static constexpr auto emoji_presentation_selector = 0xFE0Fu;
  22. static constexpr auto combining_enclosing_keycap = 0x20E3u;
  23. // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
  24. // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
  25. if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*')
  26. return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap;
  27. // A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence.
  28. if (is_ascii(code_point))
  29. return false;
  30. // https://unicode.org/reports/tr51/#def_emoji_character
  31. if (code_point_has_property(code_point, Property::Emoji))
  32. return true;
  33. // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
  34. // emoji_presentation_sequence := emoji_character emoji_presentation_selector
  35. if (next_code_point == emoji_presentation_selector)
  36. return true;
  37. // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
  38. // emoji_modifier_sequence := emoji_modifier_base emoji_modifier
  39. if (code_point_has_property(code_point, Property::Emoji_Modifier_Base))
  40. return true;
  41. // https://unicode.org/reports/tr51/#def_emoji_flag_sequence
  42. // emoji_flag_sequence := regional_indicator regional_indicator
  43. if (code_point_has_property(code_point, Property::Regional_Indicator))
  44. return true;
  45. return false;
  46. }
  47. static bool could_be_start_of_serenity_emoji(u32 code_point)
  48. {
  49. // We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00.
  50. static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u;
  51. return code_point >= first_custom_serenity_emoji_code_point;
  52. }
  53. #endif
  54. // https://unicode.org/reports/tr51/#def_emoji_sequence
  55. template<typename CodePointIterator>
  56. static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it)
  57. {
  58. // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence
  59. if (it.done())
  60. return false;
  61. #if ENABLE_UNICODE_DATA
  62. // The purpose of this method is to quickly filter out code points that cannot be the start of
  63. // an emoji. The emoji_core_sequence definition alone captures the start of all possible
  64. // emoji_zwj_sequence and emoji_tag_sequence emojis, because:
  65. //
  66. // * emoji_zwj_sequence must begin with emoji_zwj_element, which is:
  67. // emoji_zwj_sequence := emoji_core_sequence | emoji_tag_sequence
  68. //
  69. // * emoji_tag_sequence must begin with tag_base, which is:
  70. // tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence
  71. // Note that this is a subset of emoji_core_sequence.
  72. auto code_point = *it;
  73. auto next_code_point = it.peek(1);
  74. if (could_be_start_of_emoji_core_sequence(code_point, next_code_point))
  75. return true;
  76. if (could_be_start_of_serenity_emoji(code_point))
  77. return true;
  78. return false;
  79. #else
  80. return true;
  81. #endif
  82. }
  83. bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it)
  84. {
  85. return could_be_start_of_emoji_sequence_impl(it);
  86. }
  87. bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it)
  88. {
  89. return could_be_start_of_emoji_sequence_impl(it);
  90. }
  91. }