LibUnicode: Parse and generate custom emoji added for SerenityOS

Parse emoji from emoji-serenity.txt to allow displaying their names and
grouping them together in the EmojiInputDialog.

This also adds an "Unknown" value to the EmojiGroup enum. This will be
useful for emoji that aren't found in the UCD, or for when UCD downloads
are disabled.
This commit is contained in:
Timothy Flynn 2022-09-09 09:51:03 -04:00 committed by Linus Groh
parent 0aadd4869d
commit b7ef36aa36
Notes: sideshowbarker 2024-07-17 07:17:11 +09:00
3 changed files with 72 additions and 2 deletions

View file

@ -63,6 +63,7 @@ set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test
set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt")
set(EMOJI_GENERATOR_PATH "${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh")
set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji")
set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt")
set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt")
if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
@ -123,7 +124,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
"${UNICODE_META_TARGET_PREFIX}"
"${EMOJI_DATA_HEADER}"
"${EMOJI_DATA_IMPLEMENTATION}"
arguments -e "${EMOJI_TEST_PATH}"
arguments -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}"
)
if (CMAKE_CURRENT_BINARY_DIR MATCHES ".*/LibUnicode") # Serenity build.

View file

@ -5,6 +5,7 @@
*/
#include "GeneratorUtil.h"
#include <AK/AnyOf.h>
#include <AK/SourceGenerator.h>
#include <AK/String.h>
#include <AK/StringUtils.h>
@ -87,6 +88,57 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo
return {};
}
static ErrorOr<void> parse_emoji_serenity_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
{
static constexpr auto code_point_header = "U+"sv;
Array<u8, 1024> buffer;
auto display_order = static_cast<u32>(emoji_data.emojis.size()) + 1u;
while (TRY(file.can_read_line())) {
auto line = TRY(file.read_line(buffer));
if (line.is_empty())
continue;
auto index = line.find(code_point_header);
if (!index.has_value())
continue;
line = line.substring_view(*index);
StringBuilder builder;
Emoji emoji {};
emoji.group = Unicode::EmojiGroup::SerenityOS;
emoji.display_order = display_order++;
line.for_each_split_view(' ', false, [&](auto segment) {
if (segment.starts_with(code_point_header)) {
segment = segment.substring_view(code_point_header.length());
auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segment);
VERIFY(code_point.has_value());
emoji.code_points.append(*code_point);
} else {
if (!builder.is_empty())
builder.append(' ');
builder.append(segment);
}
});
auto name = builder.build();
if (!any_of(name, is_ascii_lower_alpha))
name = name.to_titlecase();
emoji.name = emoji_data.unique_strings.ensure(move(name));
emoji.code_points_name = String::join('_', emoji.code_points);
TRY(emoji_data.emojis.try_append(move(emoji)));
}
return {};
}
static ErrorOr<void> generate_emoji_data_header(Core::Stream::BufferedFile& file, EmojiData const&)
{
StringBuilder builder;
@ -194,19 +246,23 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
StringView generated_header_path;
StringView generated_implementation_path;
StringView emoji_test_path;
StringView emoji_serenity_path;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path");
args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path");
args_parser.parse(arguments);
auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read));
auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read));
EmojiData emoji_data {};
TRY(parse_emoji_test_data(*emoji_test_file, emoji_data));
TRY(parse_emoji_serenity_data(*emoji_serenity_file, emoji_data));
TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));

View file

@ -13,6 +13,8 @@
namespace Unicode {
enum class EmojiGroup : u8 {
Unknown,
SmileysAndEmotion,
PeopleAndBody,
Component,
@ -23,11 +25,14 @@ enum class EmojiGroup : u8 {
Objects,
Symbols,
Flags,
// Non-standard emoji added for SerenityOS:
SerenityOS,
};
struct Emoji {
StringView name;
EmojiGroup group;
EmojiGroup group { EmojiGroup::Unknown };
u32 display_order { 0 };
Span<u32 const> code_points;
};
@ -43,6 +48,8 @@ Optional<Emoji> find_emoji_for_code_points(u32 const (&code_points)[Size])
constexpr StringView emoji_group_to_string(EmojiGroup group)
{
switch (group) {
case EmojiGroup::Unknown:
return "Unknown"sv;
case EmojiGroup::SmileysAndEmotion:
return "Smileys & Emotion"sv;
case EmojiGroup::PeopleAndBody:
@ -63,6 +70,8 @@ constexpr StringView emoji_group_to_string(EmojiGroup group)
return "Symbols"sv;
case EmojiGroup::Flags:
return "Flags"sv;
case EmojiGroup::SerenityOS:
return "SerenityOS"sv;
}
VERIFY_NOT_REACHED();
@ -70,6 +79,8 @@ constexpr StringView emoji_group_to_string(EmojiGroup group)
constexpr EmojiGroup emoji_group_from_string(StringView group)
{
if (group == "Unknown"sv)
return EmojiGroup::Unknown;
if (group == "Smileys & Emotion"sv)
return EmojiGroup::SmileysAndEmotion;
if (group == "People & Body"sv)
@ -90,6 +101,8 @@ constexpr EmojiGroup emoji_group_from_string(StringView group)
return EmojiGroup::Symbols;
if (group == "Flags"sv)
return EmojiGroup::Flags;
if (group == "SerenityOS"sv)
return EmojiGroup::SerenityOS;
VERIFY_NOT_REACHED();
}