mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-04 05:20:30 +00:00
LibUnicode: Remove unused emoji support methods
This commit is contained in:
parent
0244859ab7
commit
5eda629326
Notes:
github-actions[bot]
2024-09-06 19:43:25 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/5eda6293263 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1303
6 changed files with 0 additions and 215 deletions
|
@ -1,6 +1,5 @@
|
|||
set(TEST_SOURCES
|
||||
TestDisplayNames.cpp
|
||||
TestEmoji.cpp
|
||||
TestIDNA.cpp
|
||||
TestLocale.cpp
|
||||
TestSegmenter.cpp
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibTest/TestCase.h>
|
||||
#include <LibUnicode/Emoji.h>
|
||||
|
||||
// These emojis are the first subgroup in each Unicode-defined group of emojis, plus some interesting
|
||||
// hand-picked test cases (such as keycap emoji, which begin with ASCII symbols, and country flags).
|
||||
static constexpr auto s_smileys_emotion = Array { "😀"sv, "😃"sv, "😄"sv, "😁"sv, "😆"sv, "😅"sv, "🤣"sv, "😂"sv, "🙂"sv, "🙃"sv, "🫠"sv, "😉"sv, "😊"sv, "😇"sv };
|
||||
static constexpr auto s_people_body = Array { "👋"sv, "🤚"sv, "🖐️"sv, "🖐"sv, "✋"sv, "🫱"sv, "🫲"sv, "🫳"sv, "🫴"sv, "🫷"sv, "🫸"sv };
|
||||
static constexpr auto s_animals_nature = Array { "🐶"sv, "🐕"sv, "🐕🦺"sv, "🐩"sv, "🦊"sv, "🦝"sv, "🐱"sv, "🐈"sv, "🐈⬛"sv, "🦁"sv, "🐯"sv, "🐴"sv, "🫎"sv, "🫏"sv, "🐎"sv, "🦄"sv, "🦓"sv, "🦌"sv, "🦬"sv, "🐮"sv, "🐷"sv, "🐖"sv, "🐗"sv, "🐽"sv, "🐑"sv, "🦙"sv, "🦒"sv, "🐘"sv, "🐭"sv, "🐁"sv, "🐀"sv, "🐰"sv, "🐇"sv, "🐿️"sv, "🐿"sv, "🦔"sv, "🦇"sv, "🐻"sv, "🐻❄️"sv, "🐻❄"sv, "🐨"sv, "🐼"sv, "🦥"sv, "🦘"sv, "🦡"sv, "🐾"sv };
|
||||
static constexpr auto s_food_drink = Array { "🍇"sv, "🍈"sv, "🍉"sv, "🍊"sv, "🍋"sv, "🍌"sv, "🍍"sv, "🥭"sv, "🍎"sv, "🍏"sv, "🍐"sv, "🍑"sv, "🍒"sv, "🍓"sv, "🫐"sv, "🥝"sv, "🍅"sv, "🫒"sv, "🥥"sv };
|
||||
static constexpr auto s_travel_places = Array { "🌍"sv, "🌎"sv, "🌏"sv, "🌐"sv, "🗺️"sv, "🗺"sv, "🗾"sv, "🧭"sv };
|
||||
static constexpr auto s_activities = Array { "🎃"sv, "🎄"sv, "🎆"sv, "🎇"sv, "🧨"sv, "✨"sv, "🎈"sv, "🎉"sv, "🎊"sv, "🎋"sv, "🎍"sv, "🎏"sv, "🎑"sv, "🎀"sv, "🎁"sv, "🎗️"sv, "🎗"sv, "🎟️"sv, "🎟"sv, "🎫"sv };
|
||||
static constexpr auto s_objects = Array { "👓"sv, "🕶️"sv, "🕶"sv, "🦺"sv, "👔"sv, "👖"sv, "🧦"sv, "👗"sv, "🥻"sv, "🩱"sv, "🩲"sv, "🩳"sv, "👙"sv, "🪭"sv, "👛"sv, "👜"sv, "🛍️"sv, "🛍"sv, "🩴"sv, "👡"sv, "👢"sv, "🪮"sv, "👑"sv, "🎩"sv, "🎓"sv, "🪖"sv, "⛑️"sv, "⛑"sv, "💄"sv, "💍"sv, "💎"sv };
|
||||
static constexpr auto s_symbols = Array { "🚮"sv, "🚰"sv, "♿"sv, "🚹"sv, "🚺"sv, "🚾"sv, "🛂"sv, "🛃"sv, "🛄"sv, "🛅"sv, "#️⃣"sv, "#⃣"sv, "*️⃣"sv, "*⃣"sv, "0️⃣"sv, "0⃣"sv, "1️⃣"sv, "1⃣"sv, "2️⃣"sv, "2⃣"sv, "3️⃣"sv, "3⃣"sv, "4️⃣"sv, "4⃣"sv, "5️⃣"sv, "5⃣"sv, "6️⃣"sv, "6⃣"sv, "7️⃣"sv, "7⃣"sv, "8️⃣"sv, "8⃣"sv, "9️⃣"sv, "9⃣"sv, "🔟"sv };
|
||||
static constexpr auto s_flags = Array { "🏁"sv, "🚩"sv, "🎌"sv, "🏴"sv, "🏳️"sv, "🏳"sv, "🏳️🌈"sv, "🏳🌈"sv, "🏳️⚧️"sv, "🏳⚧️"sv, "🏳️⚧"sv, "🏳⚧"sv, "🏴☠️"sv, "🏴☠"sv, "🇦🇨"sv, "🇦🇩"sv, "🇦🇪"sv, "🇦🇫"sv, "🇦🇬"sv, "🇦🇮"sv, "🇦🇱"sv, "🇦🇲"sv, "🇦🇴"sv, "🇦🇶"sv, "🇦🇷"sv, "🇦🇸"sv, "🇦🇹"sv, "🇦🇺"sv, "🇦🇼"sv, "🇦🇽"sv, "🇦🇿"sv, "🇧🇦"sv, "🇧🇧"sv, "🇧🇩"sv, "🇧🇪"sv, "🇧🇫"sv, "🇧🇬"sv, "🇧🇭"sv, "🇧🇮"sv, "🇧🇯"sv, "🇧🇱"sv, "🇧🇲"sv, "🇧🇳"sv, "🇧🇴"sv, "🇧🇶"sv, "🇧🇷"sv, "🇧🇸"sv };
|
||||
|
||||
TEST_CASE(emoji)
|
||||
{
|
||||
auto test_emojis = [](auto const& emojis) {
|
||||
for (auto emoji : emojis) {
|
||||
Utf8View view { emoji };
|
||||
EXPECT(Unicode::could_be_start_of_emoji_sequence(view.begin()));
|
||||
}
|
||||
};
|
||||
|
||||
test_emojis(s_smileys_emotion);
|
||||
test_emojis(s_people_body);
|
||||
test_emojis(s_animals_nature);
|
||||
test_emojis(s_food_drink);
|
||||
test_emojis(s_travel_places);
|
||||
test_emojis(s_activities);
|
||||
test_emojis(s_objects);
|
||||
test_emojis(s_symbols);
|
||||
test_emojis(s_flags);
|
||||
}
|
||||
|
||||
TEST_CASE(emoji_presentation_only)
|
||||
{
|
||||
auto test_emoji = [](auto emoji, auto expected_result) {
|
||||
Utf8View view { emoji };
|
||||
auto is_start_of_emoji_sequence = Unicode::could_be_start_of_emoji_sequence(view.begin(), Unicode::SequenceType::EmojiPresentation);
|
||||
EXPECT_EQ(is_start_of_emoji_sequence, expected_result);
|
||||
};
|
||||
|
||||
test_emoji("©️"sv, true);
|
||||
test_emoji("©"sv, false);
|
||||
|
||||
test_emoji("®️"sv, true);
|
||||
test_emoji("®"sv, false);
|
||||
|
||||
test_emoji("\U0001F3F3\u200D\U0001F41E"sv, true); // SerenityOS flag
|
||||
test_emoji("\U0001F3F3\uFE0F\u200D\U0001F41E"sv, true); // SerenityOS flag
|
||||
}
|
||||
|
||||
TEST_CASE(ascii_is_not_emoji)
|
||||
{
|
||||
for (u32 code_point = 0u; is_ascii(code_point); ++code_point) {
|
||||
auto string = String::from_code_point(code_point);
|
||||
Utf8View view { string };
|
||||
|
||||
EXPECT(!Unicode::could_be_start_of_emoji_sequence(view.begin()));
|
||||
}
|
||||
}
|
|
@ -8,7 +8,6 @@
|
|||
#include "TextLayout.h"
|
||||
#include <AK/TypeCasts.h>
|
||||
#include <LibGfx/Font/ScaledFont.h>
|
||||
#include <LibUnicode/Emoji.h>
|
||||
#include <harfbuzz/hb.h>
|
||||
|
||||
namespace Gfx {
|
||||
|
|
|
@ -5,7 +5,6 @@ set(SOURCES
|
|||
DateTimeFormat.cpp
|
||||
DisplayNames.cpp
|
||||
DurationFormat.cpp
|
||||
Emoji.cpp
|
||||
ICU.cpp
|
||||
IDNA.cpp
|
||||
ListFormat.cpp
|
||||
|
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Utf32View.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <LibUnicode/Emoji.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<StringView> __attribute__((weak)) emoji_image_for_code_points(ReadonlySpan<u32>) { return {}; }
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_core_sequence
|
||||
static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32> const& next_code_point, SequenceType type)
|
||||
{
|
||||
// emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence
|
||||
|
||||
static constexpr auto emoji_presentation_selector = 0xFE0Fu;
|
||||
static constexpr auto combining_enclosing_keycap = 0x20E3u;
|
||||
static constexpr auto zero_width_joiner = 0x200Du;
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
|
||||
// emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
|
||||
if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*')
|
||||
return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap;
|
||||
|
||||
// A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence.
|
||||
if (is_ascii(code_point))
|
||||
return false;
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_character
|
||||
switch (type) {
|
||||
case SequenceType::Any:
|
||||
if (code_point_has_emoji_property(code_point))
|
||||
return true;
|
||||
break;
|
||||
case SequenceType::EmojiPresentation:
|
||||
if (code_point_has_emoji_presentation_property(code_point))
|
||||
return true;
|
||||
if (next_code_point == zero_width_joiner && code_point_has_emoji_property(code_point))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
|
||||
// emoji_presentation_sequence := emoji_character emoji_presentation_selector
|
||||
if (next_code_point == emoji_presentation_selector)
|
||||
return true;
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
|
||||
// emoji_modifier_sequence := emoji_modifier_base emoji_modifier
|
||||
if (code_point_has_emoji_modifier_base_property(code_point))
|
||||
return true;
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_flag_sequence
|
||||
// emoji_flag_sequence := regional_indicator regional_indicator
|
||||
if (code_point_has_regional_indicator_property(code_point))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool could_be_start_of_serenity_emoji(u32 code_point)
|
||||
{
|
||||
// We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00.
|
||||
static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u;
|
||||
|
||||
return code_point >= first_custom_serenity_emoji_code_point;
|
||||
}
|
||||
|
||||
// https://unicode.org/reports/tr51/#def_emoji_sequence
|
||||
template<typename CodePointIterator>
|
||||
static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, SequenceType type)
|
||||
{
|
||||
// emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence
|
||||
|
||||
if (it.done())
|
||||
return false;
|
||||
|
||||
// The purpose of this method is to quickly filter out code points that cannot be the start of
|
||||
// an emoji. The emoji_core_sequence definition alone captures the start of all possible
|
||||
// emoji_zwj_sequence and emoji_tag_sequence emojis, because:
|
||||
//
|
||||
// * emoji_zwj_sequence must begin with emoji_zwj_element, which is:
|
||||
// emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence
|
||||
//
|
||||
// * emoji_tag_sequence must begin with tag_base, which is:
|
||||
// tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence
|
||||
// Note that this is a subset of emoji_core_sequence.
|
||||
auto code_point = *it;
|
||||
auto next_code_point = it.peek(1);
|
||||
|
||||
if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type))
|
||||
return true;
|
||||
if (could_be_start_of_serenity_emoji(code_point))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type)
|
||||
{
|
||||
return could_be_start_of_emoji_sequence_impl(it, type);
|
||||
}
|
||||
|
||||
bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type)
|
||||
{
|
||||
return could_be_start_of_emoji_sequence_impl(it, type);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<StringView> emoji_image_for_code_points(ReadonlySpan<u32> code_points);
|
||||
|
||||
enum class SequenceType {
|
||||
Any,
|
||||
EmojiPresentation,
|
||||
};
|
||||
|
||||
bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const&, SequenceType = SequenceType::Any);
|
||||
bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const&, SequenceType = SequenceType::Any);
|
||||
|
||||
}
|
Loading…
Reference in a new issue