GenerateEmojiData.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AnyOf.h>
  8. #include <AK/SourceGenerator.h>
  9. #include <AK/String.h>
  10. #include <AK/StringUtils.h>
  11. #include <AK/Types.h>
  12. #include <LibCore/ArgsParser.h>
  13. #include <LibCore/Stream.h>
  14. #include <LibUnicode/Emoji.h>
  15. using StringIndexType = u16;
  16. constexpr auto s_string_index_type = "u16"sv;
  17. struct Emoji {
  18. StringIndexType name { 0 };
  19. Unicode::EmojiGroup group;
  20. u32 display_order { 0 };
  21. String code_points_name;
  22. Vector<u32> code_points;
  23. };
  24. struct EmojiData {
  25. UniqueStringStorage<StringIndexType> unique_strings;
  26. Vector<Emoji> emojis;
  27. };
  28. static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
  29. {
  30. static constexpr auto group_header = "# group: "sv;
  31. Array<u8, 1024> buffer;
  32. Unicode::EmojiGroup group;
  33. u32 display_order { 0 };
  34. while (TRY(file.can_read_line())) {
  35. auto line = TRY(file.read_line(buffer));
  36. if (line.is_empty())
  37. continue;
  38. if (line.starts_with('#')) {
  39. if (line.starts_with(group_header)) {
  40. auto name = line.substring_view(group_header.length());
  41. group = Unicode::emoji_group_from_string(name);
  42. }
  43. continue;
  44. }
  45. auto status_index = line.find(';');
  46. VERIFY(status_index.has_value());
  47. auto emoji_and_name_index = line.find('#', *status_index);
  48. VERIFY(emoji_and_name_index.has_value());
  49. Emoji emoji {};
  50. emoji.group = group;
  51. emoji.display_order = display_order++;
  52. auto code_points = line.substring_view(0, *status_index).split_view(' ');
  53. TRY(emoji.code_points.try_ensure_capacity(code_points.size()));
  54. for (auto code_point : code_points) {
  55. auto value = AK::StringUtils::convert_to_uint_from_hex<u32>(code_point);
  56. VERIFY(value.has_value());
  57. emoji.code_points.unchecked_append(*value);
  58. }
  59. auto emoji_and_name = line.substring_view(*emoji_and_name_index + 1);
  60. auto emoji_and_name_spaces = emoji_and_name.find_all(" "sv);
  61. VERIFY(emoji_and_name_spaces.size() > 2);
  62. auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace();
  63. emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string());
  64. emoji.code_points_name = String::join('_', code_points);
  65. TRY(emoji_data.emojis.try_append(move(emoji)));
  66. }
  67. return {};
  68. }
  69. static ErrorOr<void> parse_emoji_serenity_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
  70. {
  71. static constexpr auto code_point_header = "U+"sv;
  72. Array<u8, 1024> buffer;
  73. auto display_order = static_cast<u32>(emoji_data.emojis.size()) + 1u;
  74. while (TRY(file.can_read_line())) {
  75. auto line = TRY(file.read_line(buffer));
  76. if (line.is_empty())
  77. continue;
  78. auto index = line.find(code_point_header);
  79. if (!index.has_value())
  80. continue;
  81. line = line.substring_view(*index);
  82. StringBuilder builder;
  83. Emoji emoji {};
  84. emoji.group = Unicode::EmojiGroup::SerenityOS;
  85. emoji.display_order = display_order++;
  86. line.for_each_split_view(' ', SplitBehavior::Nothing, [&](auto segment) {
  87. if (segment.starts_with(code_point_header)) {
  88. segment = segment.substring_view(code_point_header.length());
  89. auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segment);
  90. VERIFY(code_point.has_value());
  91. emoji.code_points.append(*code_point);
  92. } else {
  93. if (!builder.is_empty())
  94. builder.append(' ');
  95. builder.append(segment);
  96. }
  97. });
  98. auto name = builder.build();
  99. if (!any_of(name, is_ascii_lower_alpha))
  100. name = name.to_titlecase();
  101. emoji.name = emoji_data.unique_strings.ensure(move(name));
  102. emoji.code_points_name = String::join('_', emoji.code_points);
  103. TRY(emoji_data.emojis.try_append(move(emoji)));
  104. }
  105. return {};
  106. }
  107. static ErrorOr<void> generate_emoji_data_header(Core::Stream::BufferedFile& file, EmojiData const&)
  108. {
  109. StringBuilder builder;
  110. SourceGenerator generator { builder };
  111. TRY(file.write(generator.as_string_view().bytes()));
  112. return {};
  113. }
  114. static ErrorOr<void> generate_emoji_data_implementation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data)
  115. {
  116. StringBuilder builder;
  117. SourceGenerator generator { builder };
  118. generator.set("string_index_type"sv, s_string_index_type);
  119. generator.set("emojis_size"sv, String::number(emoji_data.emojis.size()));
  120. generator.append(R"~~~(
  121. #include <AK/Array.h>
  122. #include <AK/BinarySearch.h>
  123. #include <AK/Span.h>
  124. #include <AK/StringView.h>
  125. #include <AK/Types.h>
  126. #include <LibUnicode/Emoji.h>
  127. #include <LibUnicode/EmojiData.h>
  128. namespace Unicode {
  129. )~~~");
  130. emoji_data.unique_strings.generate(generator);
  131. generator.append(R"~~~(
  132. struct EmojiData {
  133. constexpr Emoji to_unicode_emoji() const
  134. {
  135. Emoji emoji {};
  136. emoji.name = decode_string(name);
  137. emoji.group = static_cast<EmojiGroup>(group);
  138. emoji.display_order = display_order;
  139. emoji.code_points = code_points;
  140. return emoji;
  141. }
  142. @string_index_type@ name { 0 };
  143. u8 group { 0 };
  144. u32 display_order { 0 };
  145. Span<u32 const> code_points;
  146. };
  147. )~~~");
  148. for (auto const& emoji : emoji_data.emojis) {
  149. generator.set("name"sv, emoji.code_points_name);
  150. generator.set("size"sv, String::number(emoji.code_points.size()));
  151. generator.append(R"~~~(
  152. static constexpr Array<u32, @size@> s_@name@ { {)~~~");
  153. bool first = true;
  154. for (auto code_point : emoji.code_points) {
  155. generator.append(first ? " "sv : ", "sv);
  156. generator.append(String::formatted("{:#x}", code_point));
  157. first = false;
  158. }
  159. generator.append(" } };"sv);
  160. }
  161. generator.append(R"~~~(
  162. static constexpr Array<EmojiData, @emojis_size@> s_emojis { {)~~~");
  163. for (auto const& emoji : emoji_data.emojis) {
  164. generator.set("name"sv, String::number(emoji.name));
  165. generator.set("group"sv, String::number(to_underlying(emoji.group)));
  166. generator.set("display_order"sv, String::number(emoji.display_order));
  167. generator.set("code_points_name"sv, emoji.code_points_name);
  168. generator.append(R"~~~(
  169. { @name@, @group@, @display_order@, s_@code_points_name@ },)~~~");
  170. }
  171. generator.append(R"~~~(
  172. } };
  173. Optional<Emoji> find_emoji_for_code_points(Span<u32 const> code_points)
  174. {
  175. for (auto& emoji : s_emojis) {
  176. if (emoji.code_points == code_points)
  177. return emoji.to_unicode_emoji();
  178. }
  179. return {};
  180. }
  181. }
  182. )~~~");
  183. TRY(file.write(generator.as_string_view().bytes()));
  184. return {};
  185. }
  186. ErrorOr<int> serenity_main(Main::Arguments arguments)
  187. {
  188. StringView generated_header_path;
  189. StringView generated_implementation_path;
  190. StringView emoji_test_path;
  191. StringView emoji_serenity_path;
  192. Core::ArgsParser args_parser;
  193. args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
  194. args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  195. args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path");
  196. args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path");
  197. args_parser.parse(arguments);
  198. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
  199. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
  200. auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read));
  201. auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read));
  202. EmojiData emoji_data {};
  203. TRY(parse_emoji_test_data(*emoji_test_file, emoji_data));
  204. TRY(parse_emoji_serenity_data(*emoji_serenity_file, emoji_data));
  205. TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
  206. TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));
  207. return 0;
  208. }