/* * Copyright (c) 2019-2020, Sergey Bugaev * Copyright (c) 2022, Linus Groh * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include namespace Gfx { // https://unicode.org/reports/tr51/ // https://unicode.org/emoji/charts/emoji-list.html // https://unicode.org/emoji/charts/emoji-zwj-sequences.html static HashMap> s_emojis; Bitmap const* Emoji::emoji_for_code_point(u32 code_point) { return emoji_for_code_points(Array { code_point }); } Bitmap const* Emoji::emoji_for_code_points(ReadonlySpan const& code_points) { // FIXME: This function is definitely not fast. auto basename = DeprecatedString::join('_', code_points, "U+{:X}"sv); auto it = s_emojis.find(basename); if (it != s_emojis.end()) return (*it).value.ptr(); auto bitmap_or_error = Bitmap::load_from_file(DeprecatedString::formatted("/res/emoji/{}.png", basename)); if (bitmap_or_error.is_error()) { s_emojis.set(basename, nullptr); return nullptr; } auto bitmap = bitmap_or_error.release_value(); s_emojis.set(basename, bitmap); return bitmap.ptr(); } Bitmap const* Emoji::emoji_for_code_point_iterator(Utf8CodePointIterator& it) { // NOTE: I'm sure this could be more efficient, e.g. by checking if each code point falls // into a certain range in the loop below (emojis, modifiers, variation selectors, ZWJ), // and bailing out early if not. Current worst case is 10 file lookups for any sequence of // code points (if the first glyph isn't part of the font in regular text rendering). constexpr size_t max_emoji_code_point_sequence_length = 10; Vector code_points; struct EmojiAndCodePoints { Bitmap const* emoji; Span code_points; u8 real_codepoint_length; }; Vector possible_emojis; // Determine all existing emojis for the longest possible ZWJ emoji sequence, // or until we run out of code points in the iterator. bool last_codepoint_sequence_found = false; for (u8 i = 0; i < max_emoji_code_point_sequence_length; ++i) { auto code_point = it.peek(i); if (!code_point.has_value()) break; // NOTE: The following only applies to emoji presentation, not to other // emoji modifiers. // // For a single emoji core sequence, we assume that emoji presentation // is implied, since this function will only be called for characters // with default text presentation when either (1) the character is not // found in the font, or (2) the character is followed by an explicit // emoji presentation selector. // // For emoji zwj sequences, Serenity chooses to treat minimally-qualified // and unqualified emojis the same as fully-qualified emojis (with regards // to emoji presentation). // // From https://unicode.org/reports/tr51/#Emoji_Implementation_Notes: // > minimally-qualified or unqualified emoji zwj sequences may be handled // > in the same way as their fully-qualified forms; the choice is up to // > the implementation. // // In both cases, whenever an emoji presentation selector (U+FE0F) is found, we // just skip it in order to drop fully-qualified emojis down to their // minimally-qualified or unqualified forms (with respect to emoji presentation) // for doing emoji lookups. This ensures that all forms are treated the same // assuming the emoji filenames are named accordingly (with all emoji presentation // selector codepoints removed). if (code_point.value() == 0xFE0F) { // If the last sequence was found, then we can just update // its real length. if (last_codepoint_sequence_found) { possible_emojis.last().real_codepoint_length++; } // And we can always skip the lookup since the code point sequence // will be unchanged since last time. continue; } else { code_points.append(*code_point); } if (auto const* emoji = emoji_for_code_points(code_points)) { u8 real_codepoint_length = i + 1; possible_emojis.empend(emoji, code_points, real_codepoint_length); last_codepoint_sequence_found = true; } else { last_codepoint_sequence_found = false; } } if (possible_emojis.is_empty()) return nullptr; // If we found one or more matches, return the longest, i.e. last. For example: // U+1F3F3 - white flag // U+1F3F3 U+200D U+1F308 - rainbow flag (unqualified form) auto& [emoji, emoji_code_points, codepoint_length] = possible_emojis.last(); // Advance the iterator, so it's on the last code point of our found emoji and // whoever is iterating will advance to the next new code point. for (u8 i = 0; i < codepoint_length - 1; ++i) ++it; return emoji; } }