GenerateEmojiData.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AnyOf.h>
  8. #include <AK/SourceGenerator.h>
  9. #include <AK/String.h>
  10. #include <AK/StringUtils.h>
  11. #include <AK/Types.h>
  12. #include <LibCore/ArgsParser.h>
  13. #include <LibCore/Directory.h>
  14. #include <LibCore/Stream.h>
  15. #include <LibUnicode/Emoji.h>
  16. using StringIndexType = u16;
  17. constexpr auto s_string_index_type = "u16"sv;
  18. struct Emoji {
  19. StringIndexType name { 0 };
  20. Optional<String> image_path;
  21. Unicode::EmojiGroup group;
  22. String subgroup;
  23. u32 display_order { 0 };
  24. Vector<u32> code_points;
  25. String encoded_code_points;
  26. String status;
  27. size_t code_point_array_index { 0 };
  28. };
  29. struct EmojiData {
  30. UniqueStringStorage<StringIndexType> unique_strings;
  31. Vector<Emoji> emojis;
  32. };
  33. static void set_image_path_for_emoji(StringView emoji_resource_path, Emoji& emoji)
  34. {
  35. StringBuilder builder;
  36. for (auto code_point : emoji.code_points) {
  37. if (code_point == 0xfe0f)
  38. continue;
  39. if (!builder.is_empty())
  40. builder.append('_');
  41. builder.appendff("U+{:X}", code_point);
  42. }
  43. auto path = String::formatted("{}/{}.png", emoji_resource_path, builder.build());
  44. if (Core::Stream::File::exists(path))
  45. emoji.image_path = move(path);
  46. }
  47. static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
  48. {
  49. static constexpr auto group_header = "# group: "sv;
  50. static constexpr auto subgroup_header = "# subgroup: "sv;
  51. Array<u8, 1024> buffer;
  52. Unicode::EmojiGroup group;
  53. String subgroup;
  54. u32 display_order { 0 };
  55. while (TRY(file.can_read_line())) {
  56. auto line = TRY(file.read_line(buffer));
  57. if (line.is_empty())
  58. continue;
  59. if (line.starts_with('#')) {
  60. if (line.starts_with(group_header)) {
  61. auto name = line.substring_view(group_header.length());
  62. group = Unicode::emoji_group_from_string(name);
  63. } else if (line.starts_with(subgroup_header)) {
  64. subgroup = line.substring_view(subgroup_header.length());
  65. }
  66. continue;
  67. }
  68. auto status_index = line.find(';');
  69. VERIFY(status_index.has_value());
  70. auto emoji_and_name_index = line.find('#', *status_index);
  71. VERIFY(emoji_and_name_index.has_value());
  72. Emoji emoji {};
  73. emoji.group = group;
  74. emoji.subgroup = subgroup;
  75. emoji.display_order = display_order++;
  76. auto code_points = line.substring_view(0, *status_index).split_view(' ');
  77. TRY(emoji.code_points.try_ensure_capacity(code_points.size()));
  78. for (auto code_point : code_points) {
  79. auto value = AK::StringUtils::convert_to_uint_from_hex<u32>(code_point);
  80. VERIFY(value.has_value());
  81. emoji.code_points.unchecked_append(*value);
  82. }
  83. auto emoji_and_name = line.substring_view(*emoji_and_name_index + 1);
  84. auto emoji_and_name_spaces = emoji_and_name.find_all(" "sv);
  85. VERIFY(emoji_and_name_spaces.size() > 2);
  86. auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace();
  87. emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string());
  88. emoji.encoded_code_points = emoji_and_name.substring_view(0, emoji_and_name_spaces[1]).trim_whitespace();
  89. emoji.status = line.substring_view(*status_index + 1, *emoji_and_name_index - *status_index - 1).trim_whitespace();
  90. TRY(emoji_data.emojis.try_append(move(emoji)));
  91. }
  92. return {};
  93. }
  94. static ErrorOr<void> parse_emoji_serenity_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data)
  95. {
  96. static constexpr auto code_point_header = "U+"sv;
  97. Array<u8, 1024> buffer;
  98. auto display_order = static_cast<u32>(emoji_data.emojis.size()) + 1u;
  99. while (TRY(file.can_read_line())) {
  100. auto line = TRY(file.read_line(buffer));
  101. if (line.is_empty())
  102. continue;
  103. auto index = line.find(code_point_header);
  104. if (!index.has_value())
  105. continue;
  106. line = line.substring_view(*index);
  107. StringBuilder builder;
  108. Emoji emoji {};
  109. emoji.group = Unicode::EmojiGroup::SerenityOS;
  110. emoji.display_order = display_order++;
  111. line.for_each_split_view(' ', SplitBehavior::Nothing, [&](auto segment) {
  112. if (segment.starts_with(code_point_header)) {
  113. segment = segment.substring_view(code_point_header.length());
  114. auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segment);
  115. VERIFY(code_point.has_value());
  116. emoji.code_points.append(*code_point);
  117. } else {
  118. if (!builder.is_empty())
  119. builder.append(' ');
  120. builder.append(segment);
  121. }
  122. });
  123. auto name = builder.build();
  124. if (!any_of(name, is_ascii_lower_alpha))
  125. name = name.to_titlecase();
  126. emoji.name = emoji_data.unique_strings.ensure(move(name));
  127. TRY(emoji_data.emojis.try_append(move(emoji)));
  128. }
  129. return {};
  130. }
  131. static ErrorOr<void> generate_emoji_data_header(Core::Stream::BufferedFile& file, EmojiData const&)
  132. {
  133. StringBuilder builder;
  134. SourceGenerator generator { builder };
  135. TRY(file.write(generator.as_string_view().bytes()));
  136. return {};
  137. }
  138. static ErrorOr<void> generate_emoji_data_implementation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data)
  139. {
  140. StringBuilder builder;
  141. SourceGenerator generator { builder };
  142. generator.set("string_index_type"sv, s_string_index_type);
  143. generator.set("emojis_size"sv, String::number(emoji_data.emojis.size()));
  144. generator.append(R"~~~(
  145. #include <AK/Array.h>
  146. #include <AK/BinarySearch.h>
  147. #include <AK/Span.h>
  148. #include <AK/StringView.h>
  149. #include <AK/Types.h>
  150. #include <LibUnicode/Emoji.h>
  151. #include <LibUnicode/EmojiData.h>
  152. namespace Unicode {
  153. )~~~");
  154. emoji_data.unique_strings.generate(generator);
  155. size_t total_code_point_count { 0 };
  156. for (auto const& emoji : emoji_data.emojis) {
  157. total_code_point_count += emoji.code_points.size();
  158. }
  159. generator.set("total_code_point_count", String::number(total_code_point_count));
  160. generator.append(R"~~~(
  161. static constexpr Array<u32, @total_code_point_count@> s_emoji_code_points { {)~~~");
  162. bool first = true;
  163. for (auto const& emoji : emoji_data.emojis) {
  164. for (auto code_point : emoji.code_points) {
  165. generator.append(first ? " "sv : ", "sv);
  166. generator.append(String::formatted("{:#x}", code_point));
  167. first = false;
  168. }
  169. }
  170. generator.append(" } };"sv);
  171. generator.append(R"~~~(
  172. struct EmojiData {
  173. constexpr Emoji to_unicode_emoji() const
  174. {
  175. Emoji emoji {};
  176. emoji.name = decode_string(name);
  177. emoji.group = static_cast<EmojiGroup>(group);
  178. emoji.display_order = display_order;
  179. emoji.code_points = code_points();
  180. return emoji;
  181. }
  182. constexpr Span<u32 const> code_points() const
  183. {
  184. return Span<u32 const>(s_emoji_code_points.data() + code_point_start, code_point_count);
  185. }
  186. @string_index_type@ name { 0 };
  187. u8 group { 0 };
  188. u32 display_order { 0 };
  189. size_t code_point_start { 0 };
  190. size_t code_point_count { 0 };
  191. };
  192. )~~~");
  193. generator.append(R"~~~(
  194. static constexpr Array<EmojiData, @emojis_size@> s_emojis { {)~~~");
  195. for (auto const& emoji : emoji_data.emojis) {
  196. generator.set("name"sv, String::number(emoji.name));
  197. generator.set("group"sv, String::number(to_underlying(emoji.group)));
  198. generator.set("display_order"sv, String::number(emoji.display_order));
  199. generator.set("code_point_start"sv, String::number(emoji.code_point_array_index));
  200. generator.set("code_point_count"sv, String::number(emoji.code_points.size()));
  201. generator.append(R"~~~(
  202. { @name@, @group@, @display_order@, @code_point_start@, @code_point_count@ },)~~~");
  203. }
  204. generator.append(R"~~~(
  205. } };
  206. Optional<Emoji> find_emoji_for_code_points(Span<u32 const> code_points)
  207. {
  208. for (auto& emoji : s_emojis) {
  209. if (emoji.code_points() == code_points)
  210. return emoji.to_unicode_emoji();
  211. }
  212. return {};
  213. }
  214. }
  215. )~~~");
  216. TRY(file.write(generator.as_string_view().bytes()));
  217. return {};
  218. }
  219. static ErrorOr<void> generate_emoji_installation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data)
  220. {
  221. StringBuilder builder;
  222. SourceGenerator generator { builder };
  223. auto current_group = Unicode::EmojiGroup::Unknown;
  224. StringView current_subgroup;
  225. for (auto const& emoji : emoji_data.emojis) {
  226. if (!emoji.image_path.has_value())
  227. continue;
  228. if (emoji.group == Unicode::EmojiGroup::SerenityOS)
  229. continue; // SerenityOS emojis are in emoji-serenity.txt
  230. if (current_group != emoji.group) {
  231. if (!builder.is_empty())
  232. generator.append("\n"sv);
  233. generator.set("group"sv, Unicode::emoji_group_to_string(emoji.group));
  234. generator.append("# group: @group@\n");
  235. current_group = emoji.group;
  236. }
  237. if (current_subgroup != emoji.subgroup) {
  238. generator.set("subgroup"sv, emoji.subgroup);
  239. generator.append("\n# subgroup: @subgroup@\n");
  240. current_subgroup = emoji.subgroup;
  241. }
  242. generator.set("emoji"sv, emoji.encoded_code_points);
  243. generator.set("name"sv, emoji_data.unique_strings.get(emoji.name));
  244. generator.set("status"sv, emoji.status);
  245. generator.append("@emoji@"sv);
  246. generator.append(" - "sv);
  247. generator.append(String::join(" "sv, emoji.code_points, "U+{:X}"sv));
  248. generator.append(" @name@ (@status@)\n"sv);
  249. }
  250. TRY(file.write(generator.as_string_view().bytes()));
  251. return {};
  252. }
  253. ErrorOr<int> serenity_main(Main::Arguments arguments)
  254. {
  255. StringView generated_header_path;
  256. StringView generated_implementation_path;
  257. StringView generated_installation_path;
  258. StringView emoji_test_path;
  259. StringView emoji_serenity_path;
  260. StringView emoji_resource_path;
  261. Core::ArgsParser args_parser;
  262. args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
  263. args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  264. args_parser.add_option(generated_installation_path, "Path to the emoji.txt file to generate", "generated-installation-path", 'i', "generated-installation-path");
  265. args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path");
  266. args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path");
  267. args_parser.add_option(emoji_resource_path, "Path to the /res/emoji directory", "emoji-resource-path", 'r', "emoji-resource-path");
  268. args_parser.parse(arguments);
  269. auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read));
  270. VERIFY(!emoji_resource_path.is_empty() && Core::Stream::File::exists(emoji_resource_path));
  271. EmojiData emoji_data {};
  272. TRY(parse_emoji_test_data(*emoji_test_file, emoji_data));
  273. if (!emoji_serenity_path.is_empty()) {
  274. auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read));
  275. TRY(parse_emoji_serenity_data(*emoji_serenity_file, emoji_data));
  276. }
  277. size_t code_point_array_index { 0 };
  278. for (auto& emoji : emoji_data.emojis) {
  279. emoji.code_point_array_index = code_point_array_index;
  280. code_point_array_index += emoji.code_points.size();
  281. }
  282. if (!generated_header_path.is_empty()) {
  283. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
  284. TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
  285. }
  286. if (!generated_implementation_path.is_empty()) {
  287. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
  288. TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));
  289. }
  290. if (!generated_installation_path.is_empty()) {
  291. TRY(Core::Directory::create(LexicalPath { generated_installation_path }.parent(), Core::Directory::CreateDirectories::Yes));
  292. for (auto& emoji : emoji_data.emojis)
  293. set_image_path_for_emoji(emoji_resource_path, emoji);
  294. auto generated_installation_file = TRY(open_file(generated_installation_path, Core::Stream::OpenMode::Write));
  295. TRY(generate_emoji_installation(*generated_installation_file, emoji_data));
  296. }
  297. return 0;
  298. }