GenerateEmojiData.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*
  2. * Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AnyOf.h>
  8. #include <AK/ByteString.h>
  9. #include <AK/QuickSort.h>
  10. #include <AK/SourceGenerator.h>
  11. #include <AK/StringUtils.h>
  12. #include <AK/Types.h>
  13. #include <LibCore/ArgsParser.h>
  14. #include <LibCore/Directory.h>
  15. #include <LibFileSystem/FileSystem.h>
  16. #include <LibUnicode/Emoji.h>
  17. struct Emoji {
  18. size_t image_path { 0 };
  19. Vector<u32> code_points;
  20. size_t code_point_array_index { 0 };
  21. };
  22. struct EmojiData {
  23. UniqueStringStorage unique_strings;
  24. Vector<Emoji> emojis;
  25. Vector<ByteString> emoji_file_list;
  26. };
  27. static ErrorOr<void> parse_emoji_file_list(Core::InputBufferedFile& file, EmojiData& emoji_data)
  28. {
  29. HashTable<ByteString> seen_emojis;
  30. Array<u8, 1024> buffer;
  31. while (TRY(file.can_read_line())) {
  32. auto line = TRY(file.read_line(buffer));
  33. if (line.is_empty())
  34. continue;
  35. if (seen_emojis.contains(line)) {
  36. warnln("\x1b[1;31mError!\x1b[0m Duplicate emoji \x1b[35m{}\x1b[0m listed in emoji-file-list.txt.", line);
  37. return Error::from_errno(EEXIST);
  38. }
  39. ByteString emoji_file { line.trim_whitespace() };
  40. emoji_data.emoji_file_list.append(emoji_file);
  41. seen_emojis.set(emoji_file);
  42. Emoji emoji;
  43. emoji.image_path = emoji_data.unique_strings.ensure(emoji_file);
  44. auto emoji_basename = LexicalPath::basename(emoji_file, LexicalPath::StripExtension::Yes);
  45. emoji_basename.view().for_each_split_view('_', SplitBehavior::Nothing, [&](StringView code_point) {
  46. static constexpr auto code_point_header = "U+"sv;
  47. VERIFY(code_point.starts_with(code_point_header));
  48. code_point = code_point.substring_view(code_point_header.length());
  49. auto code_point_value = AK::StringUtils::convert_to_uint_from_hex<u32>(code_point);
  50. VERIFY(code_point_value.has_value());
  51. emoji.code_points.append(*code_point_value);
  52. });
  53. emoji_data.emojis.append(move(emoji));
  54. }
  55. return {};
  56. }
  57. static ErrorOr<void> validate_emoji(StringView emoji_resource_path, EmojiData& emoji_data)
  58. {
  59. TRY(Core::Directory::for_each_entry(emoji_resource_path, Core::DirIterator::SkipDots, [&](auto& entry, auto&) -> ErrorOr<IterationDecision> {
  60. auto lexical_path = LexicalPath(entry.name);
  61. if (lexical_path.extension() != "png")
  62. return IterationDecision::Continue;
  63. auto title = lexical_path.title();
  64. if (!title.starts_with("U+"sv))
  65. return IterationDecision::Continue;
  66. Vector<u32> code_points;
  67. TRY(title.for_each_split_view('_', SplitBehavior::Nothing, [&](auto segment) -> ErrorOr<void> {
  68. auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segment.substring_view(2));
  69. VERIFY(code_point.has_value());
  70. TRY(code_points.try_append(*code_point));
  71. return {};
  72. }));
  73. auto it = emoji_data.emojis.find_if([&](auto const& emoji) {
  74. return emoji.code_points == code_points;
  75. });
  76. if (it == emoji_data.emojis.end()) {
  77. warnln("\x1b[1;31mError!\x1b[0m Emoji data for \x1b[35m{}\x1b[0m not found. Please check emoji-test.txt and emoji-serenity.txt.", entry.name);
  78. return Error::from_errno(ENOENT);
  79. }
  80. if (!emoji_data.emoji_file_list.contains_slow(lexical_path.string())) {
  81. warnln("\x1b[1;31mError!\x1b[0m Emoji entry for \x1b[35m{}\x1b[0m not found. Please check emoji-file-list.txt.", lexical_path);
  82. return Error::from_errno(ENOENT);
  83. }
  84. return IterationDecision::Continue;
  85. }));
  86. return {};
  87. }
  88. static ErrorOr<void> generate_emoji_data_header(Core::InputBufferedFile& file, EmojiData const&)
  89. {
  90. StringBuilder builder;
  91. SourceGenerator generator { builder };
  92. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  93. return {};
  94. }
  95. static ErrorOr<void> generate_emoji_data_implementation(Core::InputBufferedFile& file, EmojiData const& emoji_data)
  96. {
  97. StringBuilder builder;
  98. SourceGenerator generator { builder };
  99. generator.set("string_index_type"sv, emoji_data.unique_strings.type_that_fits());
  100. generator.set("emojis_size"sv, ByteString::number(emoji_data.emojis.size()));
  101. generator.append(R"~~~(
  102. #include <AK/Array.h>
  103. #include <AK/BinarySearch.h>
  104. #include <AK/Span.h>
  105. #include <AK/StringView.h>
  106. #include <AK/Types.h>
  107. #include <LibUnicode/Emoji.h>
  108. #include <LibUnicode/EmojiData.h>
  109. namespace Unicode {
  110. )~~~");
  111. emoji_data.unique_strings.generate(generator);
  112. size_t total_code_point_count { 0 };
  113. for (auto const& emoji : emoji_data.emojis)
  114. total_code_point_count += emoji.code_points.size();
  115. generator.set("total_code_point_count", ByteString::number(total_code_point_count));
  116. generator.append(R"~~~(
  117. static constexpr Array<u32, @total_code_point_count@> s_emoji_code_points { {)~~~");
  118. bool first = true;
  119. for (auto const& emoji : emoji_data.emojis) {
  120. for (auto code_point : emoji.code_points) {
  121. generator.append(first ? " "sv : ", "sv);
  122. generator.append(ByteString::formatted("{:#x}", code_point));
  123. first = false;
  124. }
  125. }
  126. generator.append(" } };\n"sv);
  127. generator.append(R"~~~(
  128. struct EmojiData {
  129. constexpr ReadonlySpan<u32> code_points() const
  130. {
  131. return ReadonlySpan<u32>(s_emoji_code_points.data() + code_point_start, code_point_count);
  132. }
  133. @string_index_type@ image_path { 0 };
  134. size_t code_point_start { 0 };
  135. size_t code_point_count { 0 };
  136. };
  137. )~~~");
  138. generator.append(R"~~~(
  139. static constexpr Array<EmojiData, @emojis_size@> s_emojis { {)~~~");
  140. for (auto const& emoji : emoji_data.emojis) {
  141. generator.set("image_path"sv, ByteString::number(emoji.image_path));
  142. generator.set("code_point_start"sv, ByteString::number(emoji.code_point_array_index));
  143. generator.set("code_point_count"sv, ByteString::number(emoji.code_points.size()));
  144. generator.append(R"~~~(
  145. { @image_path@, @code_point_start@, @code_point_count@ },)~~~");
  146. }
  147. generator.append(R"~~~(
  148. } };
  149. struct EmojiCodePointComparator {
  150. constexpr int operator()(ReadonlySpan<u32> code_points, EmojiData const& emoji)
  151. {
  152. auto emoji_code_points = emoji.code_points();
  153. if (code_points.size() != emoji_code_points.size())
  154. return static_cast<int>(code_points.size()) - static_cast<int>(emoji_code_points.size());
  155. for (size_t i = 0; i < code_points.size(); ++i) {
  156. if (code_points[i] != emoji_code_points[i])
  157. return static_cast<int>(code_points[i]) - static_cast<int>(emoji_code_points[i]);
  158. }
  159. return 0;
  160. }
  161. };
  162. Optional<StringView> emoji_image_for_code_points(ReadonlySpan<u32> code_points)
  163. {
  164. if (auto const* emoji = binary_search(s_emojis, code_points, nullptr, EmojiCodePointComparator {}))
  165. return decode_string(emoji->image_path);
  166. return {};
  167. }
  168. }
  169. )~~~");
  170. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  171. return {};
  172. }
  173. ErrorOr<int> serenity_main(Main::Arguments arguments)
  174. {
  175. StringView generated_header_path;
  176. StringView generated_implementation_path;
  177. StringView emoji_file_list_path;
  178. StringView emoji_resource_path;
  179. Core::ArgsParser args_parser;
  180. args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
  181. args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  182. args_parser.add_option(emoji_file_list_path, "Path to the emoji-file-list.txt file", "emoji-file-list-path", 'f', "emoji-file-list-path");
  183. args_parser.add_option(emoji_resource_path, "Path to the /res/emoji directory", "emoji-resource-path", 'r', "emoji-resource-path");
  184. args_parser.parse(arguments);
  185. VERIFY(!emoji_resource_path.is_empty() && FileSystem::exists(emoji_resource_path));
  186. VERIFY(!emoji_file_list_path.is_empty() && FileSystem::exists(emoji_file_list_path));
  187. EmojiData emoji_data {};
  188. auto emoji_file_list_file = TRY(open_file(emoji_file_list_path, Core::File::OpenMode::Read));
  189. TRY(parse_emoji_file_list(*emoji_file_list_file, emoji_data));
  190. TRY(validate_emoji(emoji_resource_path, emoji_data));
  191. auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
  192. TRY(generate_emoji_data_header(*generated_header_file, emoji_data));
  193. quick_sort(emoji_data.emojis, [](auto const& lhs, auto const& rhs) {
  194. if (lhs.code_points.size() != rhs.code_points.size())
  195. return lhs.code_points.size() < rhs.code_points.size();
  196. for (size_t i = 0; i < lhs.code_points.size(); ++i) {
  197. if (lhs.code_points[i] < rhs.code_points[i])
  198. return true;
  199. if (lhs.code_points[i] > rhs.code_points[i])
  200. return false;
  201. }
  202. return false;
  203. });
  204. size_t code_point_array_index { 0 };
  205. for (auto& emoji : emoji_data.emojis) {
  206. emoji.code_point_array_index = code_point_array_index;
  207. code_point_array_index += emoji.code_points.size();
  208. }
  209. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
  210. TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data));
  211. return 0;
  212. }