From f8a03650020215883d4287cd400ac44fb96624ca Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 4 Mar 2023 12:19:45 -0500 Subject: [PATCH] LibUnicode: Detect ZWJ sequences when filtering by emoji presentation This was preventing some unqualified emoji sequences from rendering properly, such as the custom SerenityOS flag. We rendered the flag correctly when given the fully qualified sequence: U+1F3F3 U+FEOF U+200D U+1F41E But were not detecting the unqualified sequence as an emoji when also filtering for emoji-presentation sequences: U+1F3F3 U+200D U+1F41E --- Tests/LibUnicode/TestEmoji.cpp | 18 ++++++++++++++++++ Userland/Libraries/LibUnicode/Emoji.cpp | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Tests/LibUnicode/TestEmoji.cpp b/Tests/LibUnicode/TestEmoji.cpp index 591d02445bd..8c1048e8387 100644 --- a/Tests/LibUnicode/TestEmoji.cpp +++ b/Tests/LibUnicode/TestEmoji.cpp @@ -43,6 +43,24 @@ TEST_CASE(emoji) test_emojis(s_flags); } +TEST_CASE(emoji_presentation_only) +{ + auto test_emoji = [](auto emoji, auto expected_result) { + Utf8View view { emoji }; + auto is_start_of_emoji_sequence = Unicode::could_be_start_of_emoji_sequence(view.begin(), Unicode::SequenceType::EmojiPresentation); + EXPECT_EQ(is_start_of_emoji_sequence, expected_result); + }; + + test_emoji("©️"sv, true); + test_emoji("©"sv, false); + + test_emoji("®️"sv, true); + test_emoji("®"sv, false); + + test_emoji("\U0001F3F3\u200D\U0001F41E"sv, true); // SerenityOS flag + test_emoji("\U0001F3F3\uFE0F\u200D\U0001F41E"sv, true); // SerenityOS flag +} + TEST_CASE(ascii_is_not_emoji) { for (u32 code_point = 0u; is_ascii(code_point); ++code_point) { diff --git a/Userland/Libraries/LibUnicode/Emoji.cpp b/Userland/Libraries/LibUnicode/Emoji.cpp index c6bbad1f36f..55efd1757c6 100644 --- a/Userland/Libraries/LibUnicode/Emoji.cpp +++ b/Userland/Libraries/LibUnicode/Emoji.cpp @@ -27,6 +27,7 @@ static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional static constexpr auto emoji_presentation_selector = 0xFE0Fu; static constexpr auto combining_enclosing_keycap = 0x20E3u; + static constexpr auto zero_width_joiner = 0x200Du; // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3} @@ -46,6 +47,8 @@ static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional case SequenceType::EmojiPresentation: if (code_point_has_property(code_point, Property::Emoji_Presentation)) return true; + if (next_code_point == zero_width_joiner && code_point_has_property(code_point, Property::Emoji)) + return true; break; } @@ -92,7 +95,7 @@ static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [ // emoji_zwj_sequence and emoji_tag_sequence emojis, because: // // * emoji_zwj_sequence must begin with emoji_zwj_element, which is: - // emoji_zwj_sequence := emoji_core_sequence | emoji_tag_sequence + // emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence // // * emoji_tag_sequence must begin with tag_base, which is: // tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence