LibUnicode: Detect ZWJ sequences when filtering by emoji presentation

This was preventing some unqualified emoji sequences from rendering
properly, such as the custom SerenityOS flag. We rendered the flag
correctly when given the fully qualified sequence:

    U+1F3F3 U+FEOF U+200D U+1F41E

But were not detecting the unqualified sequence as an emoji when also
filtering for emoji-presentation sequences:

    U+1F3F3 U+200D U+1F41E
This commit is contained in:
Timothy Flynn 2023-03-04 12:19:45 -05:00 committed by Andreas Kling
parent ab6bd946d8
commit f8a0365002
Notes: sideshowbarker 2024-07-17 00:27:16 +09:00
2 changed files with 22 additions and 1 deletions

View file

@ -43,6 +43,24 @@ TEST_CASE(emoji)
test_emojis(s_flags);
}
TEST_CASE(emoji_presentation_only)
{
auto test_emoji = [](auto emoji, auto expected_result) {
Utf8View view { emoji };
auto is_start_of_emoji_sequence = Unicode::could_be_start_of_emoji_sequence(view.begin(), Unicode::SequenceType::EmojiPresentation);
EXPECT_EQ(is_start_of_emoji_sequence, expected_result);
};
test_emoji("©️"sv, true);
test_emoji("©"sv, false);
test_emoji("®️"sv, true);
test_emoji("®"sv, false);
test_emoji("\U0001F3F3\u200D\U0001F41E"sv, true); // SerenityOS flag
test_emoji("\U0001F3F3\uFE0F\u200D\U0001F41E"sv, true); // SerenityOS flag
}
TEST_CASE(ascii_is_not_emoji)
{
for (u32 code_point = 0u; is_ascii(code_point); ++code_point) {

View file

@ -27,6 +27,7 @@ static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32>
static constexpr auto emoji_presentation_selector = 0xFE0Fu;
static constexpr auto combining_enclosing_keycap = 0x20E3u;
static constexpr auto zero_width_joiner = 0x200Du;
// https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
// emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
@ -46,6 +47,8 @@ static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32>
case SequenceType::EmojiPresentation:
if (code_point_has_property(code_point, Property::Emoji_Presentation))
return true;
if (next_code_point == zero_width_joiner && code_point_has_property(code_point, Property::Emoji))
return true;
break;
}
@ -92,7 +95,7 @@ static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [
// emoji_zwj_sequence and emoji_tag_sequence emojis, because:
//
// * emoji_zwj_sequence must begin with emoji_zwj_element, which is:
// emoji_zwj_sequence := emoji_core_sequence | emoji_tag_sequence
// emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence
//
// * emoji_tag_sequence must begin with tag_base, which is:
// tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence