Emoji.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
  3. * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/LexicalPath.h>
  10. #include <AK/Span.h>
  11. #include <AK/Utf32View.h>
  12. #include <AK/Utf8View.h>
  13. #include <AK/Variant.h>
  14. #include <LibGfx/Bitmap.h>
  15. #include <LibGfx/Font/Emoji.h>
  16. #include <LibUnicode/CharacterTypes.h>
  17. #include <LibUnicode/Emoji.h>
  18. namespace Gfx {
  19. // https://unicode.org/reports/tr51/
  20. // https://unicode.org/emoji/charts/emoji-list.html
  21. // https://unicode.org/emoji/charts/emoji-zwj-sequences.html
  22. static HashMap<StringView, RefPtr<Gfx::Bitmap>> s_emojis;
  23. static Variant<String, StringView> s_emoji_lookup_path = "/res/emoji"sv;
  24. static StringView emoji_lookup_path()
  25. {
  26. return s_emoji_lookup_path.visit([](auto const& path) -> StringView { return path; });
  27. }
  28. void Emoji::set_emoji_lookup_path(String emoji_lookup_path)
  29. {
  30. s_emoji_lookup_path = move(emoji_lookup_path);
  31. }
  32. Bitmap const* Emoji::emoji_for_code_point(u32 code_point)
  33. {
  34. return emoji_for_code_points(Array { code_point });
  35. }
  36. Bitmap const* Emoji::emoji_for_code_points(ReadonlySpan<u32> const& code_points)
  37. {
  38. auto emoji = Unicode::find_emoji_for_code_points(code_points);
  39. if (!emoji.has_value() || !emoji->image_path.has_value())
  40. return nullptr;
  41. auto emoji_file = emoji->image_path.value();
  42. if (auto it = s_emojis.find(emoji_file); it != s_emojis.end())
  43. return it->value.ptr();
  44. auto emoji_path = LexicalPath::join(emoji_lookup_path(), emoji_file);
  45. auto bitmap_or_error = Bitmap::load_from_file(emoji_path.string());
  46. if (bitmap_or_error.is_error()) {
  47. dbgln_if(EMOJI_DEBUG, "Generated emoji data has file {}, but could not load image: {}", emoji_file, bitmap_or_error.error());
  48. s_emojis.set(emoji_file, nullptr);
  49. return nullptr;
  50. }
  51. auto bitmap = bitmap_or_error.release_value();
  52. s_emojis.set(emoji_file, bitmap);
  53. return bitmap.ptr();
  54. }
  55. template<typename CodePointIterator>
  56. static Bitmap const* emoji_for_code_point_iterator_impl(CodePointIterator& it)
  57. {
  58. if (!Unicode::could_be_start_of_emoji_sequence(it))
  59. return nullptr;
  60. constexpr size_t max_emoji_code_point_sequence_length = 10;
  61. Vector<u32, max_emoji_code_point_sequence_length> code_points;
  62. struct EmojiAndCodePoints {
  63. Bitmap const* emoji;
  64. Span<u32> code_points;
  65. u8 real_codepoint_length;
  66. };
  67. Vector<EmojiAndCodePoints, max_emoji_code_point_sequence_length> possible_emojis;
  68. // Determine all existing emojis for the longest possible ZWJ emoji sequence,
  69. // or until we run out of code points in the iterator.
  70. bool last_codepoint_sequence_found = false;
  71. for (u8 i = 0; i < max_emoji_code_point_sequence_length; ++i) {
  72. auto code_point = it.peek(i);
  73. if (!code_point.has_value())
  74. break;
  75. // NOTE: The following only applies to emoji presentation, not to other
  76. // emoji modifiers.
  77. //
  78. // For a single emoji core sequence, we assume that emoji presentation
  79. // is implied, since this function will only be called for characters
  80. // with default text presentation when either (1) the character is not
  81. // found in the font, or (2) the character is followed by an explicit
  82. // emoji presentation selector.
  83. //
  84. // For emoji zwj sequences, Serenity chooses to treat minimally-qualified
  85. // and unqualified emojis the same as fully-qualified emojis (with regards
  86. // to emoji presentation).
  87. //
  88. // From https://unicode.org/reports/tr51/#Emoji_Implementation_Notes:
  89. // > minimally-qualified or unqualified emoji zwj sequences may be handled
  90. // > in the same way as their fully-qualified forms; the choice is up to
  91. // > the implementation.
  92. //
  93. // In both cases, whenever an emoji presentation selector (U+FE0F) is found, we
  94. // just skip it in order to drop fully-qualified emojis down to their
  95. // minimally-qualified or unqualified forms (with respect to emoji presentation)
  96. // for doing emoji lookups. This ensures that all forms are treated the same
  97. // assuming the emoji filenames are named accordingly (with all emoji presentation
  98. // selector codepoints removed).
  99. if (code_point.value() == 0xFE0F) {
  100. // If the last sequence was found, then we can just update
  101. // its real length.
  102. if (last_codepoint_sequence_found) {
  103. possible_emojis.last().real_codepoint_length++;
  104. }
  105. // And we can always skip the lookup since the code point sequence
  106. // will be unchanged since last time.
  107. continue;
  108. } else {
  109. code_points.append(*code_point);
  110. }
  111. if (auto const* emoji = Emoji::emoji_for_code_points(code_points)) {
  112. u8 real_codepoint_length = i + 1;
  113. possible_emojis.empend(emoji, code_points, real_codepoint_length);
  114. last_codepoint_sequence_found = true;
  115. } else {
  116. last_codepoint_sequence_found = false;
  117. }
  118. }
  119. if (possible_emojis.is_empty())
  120. return nullptr;
  121. // If we found one or more matches, return the longest, i.e. last. For example:
  122. // U+1F3F3 - white flag
  123. // U+1F3F3 U+200D U+1F308 - rainbow flag (unqualified form)
  124. auto& [emoji, emoji_code_points, codepoint_length] = possible_emojis.last();
  125. // Advance the iterator, so it's on the last code point of our found emoji and
  126. // whoever is iterating will advance to the next new code point.
  127. for (u8 i = 0; i < codepoint_length - 1; ++i)
  128. ++it;
  129. return emoji;
  130. }
  131. Bitmap const* Emoji::emoji_for_code_point_iterator(Utf8CodePointIterator& it)
  132. {
  133. return emoji_for_code_point_iterator_impl(it);
  134. }
  135. Bitmap const* Emoji::emoji_for_code_point_iterator(Utf32CodePointIterator& it)
  136. {
  137. return emoji_for_code_point_iterator_impl(it);
  138. }
  139. }