ソースを参照

Meta: Replace Bash script for generating emoji.txt with C++ generator

We currently have two build-time parsers for the UCD's emoji-test.txt
file. To prepare for future changes, this removes the Bash parser and
moves its functionality to the newer C++ parser.
Timothy Flynn 2 年 前
コミット
bd592480e4

+ 11 - 16
Meta/CMake/unicode_data.cmake

@@ -61,7 +61,6 @@ set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}")
 string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}")
 string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}")
 set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt")
 set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt")
 set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt")
 set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt")
-set(EMOJI_GENERATOR_PATH "${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh")
 set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji")
 set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji")
 set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt")
 set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt")
 set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt")
 set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt")
@@ -96,6 +95,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
     set(EMOJI_DATA_HEADER EmojiData.h)
     set(EMOJI_DATA_HEADER EmojiData.h)
     set(EMOJI_DATA_IMPLEMENTATION EmojiData.cpp)
     set(EMOJI_DATA_IMPLEMENTATION EmojiData.cpp)
 
 
+    if (SERENITYOS)
+        set(EMOJI_INSTALL_ARG -i "${EMOJI_INSTALL_PATH}")
+    endif()
+
     invoke_generator(
     invoke_generator(
         "UnicodeData"
         "UnicodeData"
         Lagom::GenerateUnicodeData
         Lagom::GenerateUnicodeData
@@ -110,22 +113,14 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
         "${UCD_VERSION_FILE}"
         "${UCD_VERSION_FILE}"
         "${EMOJI_DATA_HEADER}"
         "${EMOJI_DATA_HEADER}"
         "${EMOJI_DATA_IMPLEMENTATION}"
         "${EMOJI_DATA_IMPLEMENTATION}"
-        arguments -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}"
-    )
+        arguments "${EMOJI_INSTALL_ARG}" -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" -r "${EMOJI_RES_PATH}"
 
 
-    if (SERENITYOS)
-        add_custom_command(
-            OUTPUT "${EMOJI_INSTALL_PATH}"
-            COMMAND "${EMOJI_GENERATOR_PATH}" "${EMOJI_TEST_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_INSTALL_PATH}"
-            # This will make this command only run when the modified time of the directory changes,
-            # which only happens if files within it are added or deleted, but not when a file is modified.
-            # This is fine for this use-case, because the contents of a file changing should not affect
-            # the generated emoji.txt file.
-            DEPENDS "${EMOJI_GENERATOR_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_TEST_PATH}"
-            USES_TERMINAL
-        )
-        add_custom_target(generate_emoji_txt ALL DEPENDS "${EMOJI_INSTALL_PATH}")
-    endif()
+        # This will make this command only run when the modified time of the directory changes,
+        # which only happens if files within it are added or deleted, but not when a file is modified.
+        # This is fine for this use-case, because the contents of a file changing should not affect
+        # the generated emoji.txt file.
+        dependencies "${EMOJI_RES_PATH}" "${EMOJI_SERENITY_PATH}"
+    )
 
 
     set(UNICODE_DATA_SOURCES
     set(UNICODE_DATA_SOURCES
         ${UNICODE_DATA_HEADER}
         ${UNICODE_DATA_HEADER}

+ 90 - 1
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp

@@ -11,6 +11,7 @@
 #include <AK/StringUtils.h>
 #include <AK/StringUtils.h>
 #include <AK/Types.h>
 #include <AK/Types.h>
 #include <LibCore/ArgsParser.h>
 #include <LibCore/ArgsParser.h>
+#include <LibCore/Directory.h>
 #include <LibCore/Stream.h>
 #include <LibCore/Stream.h>
 #include <LibUnicode/Emoji.h>
 #include <LibUnicode/Emoji.h>
 
 
@@ -19,10 +20,14 @@ constexpr auto s_string_index_type = "u16"sv;
 
 
 struct Emoji {
 struct Emoji {
     StringIndexType name { 0 };
     StringIndexType name { 0 };
+    Optional<String> image_path;
     Unicode::EmojiGroup group;
     Unicode::EmojiGroup group;
+    String subgroup;
     u32 display_order { 0 };
     u32 display_order { 0 };
-    String code_points_name;
     Vector<u32> code_points;
     Vector<u32> code_points;
+    String code_points_name;
+    String encoded_code_points;
+    String status;
 };
 };
 
 
 struct EmojiData {
 struct EmojiData {
@@ -30,13 +35,32 @@ struct EmojiData {
     Vector<Emoji> emojis;
     Vector<Emoji> emojis;
 };
 };
 
 
+static void set_image_path_for_emoji(StringView emoji_resource_path, Emoji& emoji)
+{
+    StringBuilder builder;
+
+    for (auto code_point : emoji.code_points) {
+        if (code_point == 0xfe0f)
+            continue;
+        if (!builder.is_empty())
+            builder.append('_');
+        builder.appendff("U+{:X}", code_point);
+    }
+
+    auto path = String::formatted("{}/{}.png", emoji_resource_path, builder.build());
+    if (Core::Stream::File::exists(path))
+        emoji.image_path = move(path);
+}
+
 static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
 static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
 {
 {
     static constexpr auto group_header = "# group: "sv;
     static constexpr auto group_header = "# group: "sv;
+    static constexpr auto subgroup_header = "# subgroup: "sv;
 
 
     Array<u8, 1024> buffer;
     Array<u8, 1024> buffer;
 
 
     Unicode::EmojiGroup group;
     Unicode::EmojiGroup group;
+    String subgroup;
     u32 display_order { 0 };
     u32 display_order { 0 };
 
 
     while (TRY(file.can_read_line())) {
     while (TRY(file.can_read_line())) {
@@ -48,6 +72,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo
             if (line.starts_with(group_header)) {
             if (line.starts_with(group_header)) {
                 auto name = line.substring_view(group_header.length());
                 auto name = line.substring_view(group_header.length());
                 group = Unicode::emoji_group_from_string(name);
                 group = Unicode::emoji_group_from_string(name);
+            } else if (line.starts_with(subgroup_header)) {
+                subgroup = line.substring_view(subgroup_header.length());
             }
             }
 
 
             continue;
             continue;
@@ -61,6 +87,7 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo
 
 
         Emoji emoji {};
         Emoji emoji {};
         emoji.group = group;
         emoji.group = group;
+        emoji.subgroup = subgroup;
         emoji.display_order = display_order++;
         emoji.display_order = display_order++;
 
 
         auto code_points = line.substring_view(0, *status_index).split_view(' ');
         auto code_points = line.substring_view(0, *status_index).split_view(' ');
@@ -81,6 +108,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo
         auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace();
         auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace();
         emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string());
         emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string());
         emoji.code_points_name = String::join('_', code_points);
         emoji.code_points_name = String::join('_', code_points);
+        emoji.encoded_code_points = emoji_and_name.substring_view(0, emoji_and_name_spaces[1]).trim_whitespace();
+        emoji.status = line.substring_view(*status_index + 1, *emoji_and_name_index - *status_index - 1).trim_whitespace();
 
 
         TRY(emoji_data.emojis.try_append(move(emoji)));
         TRY(emoji_data.emojis.try_append(move(emoji)));
     }
     }
@@ -241,24 +270,74 @@ Optional<Emoji> find_emoji_for_code_points(Span<u32 const> code_points)
     return {};
     return {};
 }
 }
 
 
+static ErrorOr<void> generate_emoji_installation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data)
+{
+    StringBuilder builder;
+    SourceGenerator generator { builder };
+
+    auto current_group = Unicode::EmojiGroup::Unknown;
+    StringView current_subgroup;
+
+    for (auto const& emoji : emoji_data.emojis) {
+        if (!emoji.image_path.has_value())
+            continue;
+        if (emoji.group == Unicode::EmojiGroup::SerenityOS)
+            continue; // SerenityOS emojis are in emoji-serenity.txt
+
+        if (current_group != emoji.group) {
+            if (!builder.is_empty())
+                generator.append("\n"sv);
+
+            generator.set("group"sv, Unicode::emoji_group_to_string(emoji.group));
+            generator.append("# group: @group@\n");
+
+            current_group = emoji.group;
+        }
+
+        if (current_subgroup != emoji.subgroup) {
+            generator.set("subgroup"sv, emoji.subgroup);
+            generator.append("\n# subgroup: @subgroup@\n");
+
+            current_subgroup = emoji.subgroup;
+        }
+
+        generator.set("emoji"sv, emoji.encoded_code_points);
+        generator.set("name"sv, emoji_data.unique_strings.get(emoji.name));
+        generator.set("status"sv, emoji.status);
+
+        generator.append("@emoji@"sv);
+        generator.append(" - "sv);
+        generator.append(String::join(" "sv, emoji.code_points, "U+{:X}"sv));
+        generator.append(" @name@ (@status@)\n"sv);
+    }
+
+    TRY(file.write(generator.as_string_view().bytes()));
+    return {};
+}
+
 ErrorOr<int> serenity_main(Main::Arguments arguments)
 ErrorOr<int> serenity_main(Main::Arguments arguments)
 {
 {
     StringView generated_header_path;
     StringView generated_header_path;
     StringView generated_implementation_path;
     StringView generated_implementation_path;
+    StringView generated_installation_path;
     StringView emoji_test_path;
     StringView emoji_test_path;
     StringView emoji_serenity_path;
     StringView emoji_serenity_path;
+    StringView emoji_resource_path;
 
 
     Core::ArgsParser args_parser;
     Core::ArgsParser args_parser;
     args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
     args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
     args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
     args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
+    args_parser.add_option(generated_installation_path, "Path to the emoji.txt file to generate", "generated-installation-path", 'i', "generated-installation-path");
     args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path");
     args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path");
     args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path");
     args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path");
+    args_parser.add_option(emoji_resource_path, "Path to the /res/emoji directory", "emoji-resource-path", 'r', "emoji-resource-path");
     args_parser.parse(arguments);
     args_parser.parse(arguments);
 
 
     auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
     auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
     auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
     auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
     auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read));
     auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read));
     auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read));
     auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read));
+    VERIFY(Core::Stream::File::exists(emoji_resource_path));
 
 
     EmojiData emoji_data {};
     EmojiData emoji_data {};
     TRY(parse_emoji_test_data(*emoji_test_file, emoji_data));
     TRY(parse_emoji_test_data(*emoji_test_file, emoji_data));
@@ -267,5 +346,15 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
     TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
     TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));
     TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));
 
 
+    if (!generated_installation_path.is_empty()) {
+        TRY(Core::Directory::create(LexicalPath { generated_installation_path }.parent(), Core::Directory::CreateDirectories::Yes));
+
+        for (auto& emoji : emoji_data.emojis)
+            set_image_path_for_emoji(emoji_resource_path, emoji);
+
+        auto generated_installation_file = TRY(open_file(generated_installation_path, Core::Stream::OpenMode::Write));
+        TRY(generate_emoji_installation(*generated_installation_file, emoji_data));
+    }
+
     return 0;
     return 0;
 }
 }

+ 0 - 77
Meta/generate-emoji-txt.sh

@@ -1,77 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 <input emoji-test.txt file> <emoji image directory> <output path>"
-  exit 1
-fi
-
-INPUT_FILE="$1"
-EMOJI_DIR="$2"
-OUTPUT_PATH="$3"
-
-# empty the generated file first
-:>| "$OUTPUT_PATH"
-
-first_heading=true
-printed_group_header=false
-printed_subgroup_header=false
-while IFS= read -r line
-do
-    if [[ $line == \#\ group:\ * ]]; then
-        current_group="$line"
-        printed_group_header=false
-    elif [[ $line == \#\ subgroup:\ * ]]; then
-        current_subgroup="$line"
-        printed_subgroup_header=false
-    elif [[ ${#line} -ne 0 && $line != \#* ]]; then
-        codepoints_string=${line%%;*}
-        IFS=" " read -r -a codepoints <<< "$codepoints_string"
-        for i in "${!codepoints[@]}"; do
-            # strip leading zeros
-            codepoints[$i]="${codepoints[$i]#"${codepoints[$i]%%[!0]*}"}"
-            # add U+ prefix
-            codepoints[$i]="U+${codepoints[$i]}"
-        done
-
-        # when doing a lookup we want to remove all U+FE0F (emoji presentation specifier) codepoints
-        lookup_filename_parts=()
-        for codepoint in "${codepoints[@]}"; do
-            if [[ $codepoint != "U+FE0F" ]]; then
-                lookup_filename_parts+=("$codepoint")
-            fi
-        done
-
-        IFS=_
-        lookup_filename="${lookup_filename_parts[*]}.png"
-
-        if [ -f "$EMOJI_DIR/$lookup_filename" ]; then
-            if [ $printed_group_header = false ]; then
-                if [ $first_heading = false ]; then
-                    echo "" >> "$OUTPUT_PATH"
-                fi
-                echo "$current_group" >> "$OUTPUT_PATH"
-                first_heading=false
-                printed_group_header=true
-            fi
-            if [ $printed_subgroup_header = false ]; then
-                echo "" >> "$OUTPUT_PATH"
-                echo "$current_subgroup" >> "$OUTPUT_PATH"
-                printed_subgroup_header=true
-            fi
-
-            emoji_and_name=${line#*# }
-            emoji=${emoji_and_name%% E*}
-            name_with_version=${emoji_and_name#* }
-            name=${name_with_version#* }
-            qualification=${line#*; }
-            qualification=${qualification%%#*}
-            # remove trailing whitespace characters
-            qualification="${qualification%"${qualification##*[![:space:]]}"}"
-
-            IFS=" "
-            echo "$emoji - ${codepoints[*]} ${name^^} ($qualification)" >> "$OUTPUT_PATH"
-        fi
-    fi
-done < "$INPUT_FILE"