GenerateEncodingIndexes.cpp 7.5 KB


  1. /*
  2. * Copyright (c) 2024, Simon Wanner <simon@skyrising.xyz>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Array.h>
  7. #include <AK/JsonObject.h>
  8. #include <AK/NumericLimits.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <AK/StringBuilder.h>
  11. #include <AK/StringView.h>
  12. #include <AK/Vector.h>
  13. #include <LibCore/ArgsParser.h>
  14. #include <LibCore/File.h>
  15. #include <LibMain/Main.h>
  16. struct LookupTable {
  17. u32 first_pointer;
  18. u32 max_code_point;
  19. Vector<u32> code_points;
  20. };
  21. struct LookupTables {
  22. JsonArray const& gb18030_ranges;
  23. OrderedHashMap<StringView, LookupTable> indexes;
  24. };
  25. LookupTable prepare_table(JsonArray const& data)
  26. {
  27. Vector<u32> code_points;
  28. code_points.ensure_capacity(data.size());
  29. u32 max = 0;
  30. u32 first_pointer = 0;
  31. for (auto const& entry : data.values()) {
  32. if (entry.is_null()) {
  33. if (code_points.is_empty()) {
  34. first_pointer++;
  35. } else {
  36. code_points.append(0xfffd);
  37. max = AK::max(max, code_points.last());
  38. }
  39. } else {
  40. code_points.append(entry.as_integer<u32>());
  41. max = AK::max(max, code_points.last());
  42. }
  43. }
  44. while (code_points.last() == 0xfffd)
  45. code_points.take_last();
  46. return { first_pointer, max, move(code_points) };
  47. }
  48. void generate_table(SourceGenerator generator, StringView name, LookupTable& table)
  49. {
  50. generator.set("name", name);
  51. generator.set("value_type", table.max_code_point > NumericLimits<u16>::max() ? "u32" : "u16");
  52. generator.set("first_pointer", MUST(String::number(table.first_pointer)));
  53. generator.set("size", MUST(String::number(table.code_points.size())));
  54. if (table.first_pointer > 0) {
  55. generator.appendln("static constexpr u32 s_@name@_index_first_pointer = @first_pointer@;");
  56. }
  57. generator.append("static constexpr Array<@value_type@, @size@> s_@name@_index {\n ");
  58. for (size_t i = 0; i < table.code_points.size(); i++) {
  59. generator.append(MUST(String::formatted("{:#04x}", table.code_points[i])));
  60. if (i != table.code_points.size() - 1)
  61. generator.append(i % 16 == 15 ? ",\n "sv : ", "sv);
  62. }
  63. generator.appendln("\n};");
  64. generator.appendln("Optional<u32> index_@name@_code_point(u32 pointer);");
  65. }
  66. ErrorOr<void> generate_header_file(LookupTables& tables, Core::File& file)
  67. {
  68. StringBuilder builder;
  69. SourceGenerator generator { builder };
  70. generator.set("gb18030_ranges_size", MUST(String::number(tables.gb18030_ranges.size())));
  71. generator.append(R"~~~(
  72. #pragma once
  73. #include <AK/Array.h>
  74. #include <AK/Types.h>
  75. namespace TextCodec {
  76. struct Gb18030RangeEntry {
  77. u32 pointer;
  78. u32 code_point;
  79. };
  80. static constexpr Array<Gb18030RangeEntry, @gb18030_ranges_size@> s_gb18030_ranges { {
  81. )~~~");
  82. for (auto const& range : tables.gb18030_ranges.values()) {
  83. generator.appendln(MUST(String::formatted(" {{ {}, {:#04x} }},", range.as_array()[0].as_integer<u32>(), range.as_array()[1].as_integer<u32>())));
  84. }
  85. generator.appendln("} };\n");
  86. for (auto e : tables.indexes) {
  87. generate_table(generator.fork(), e.key, e.value);
  88. }
  89. generator.append("\n");
  90. generator.appendln("}");
  91. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  92. return {};
  93. }
  94. void generate_table_implementation(SourceGenerator generator, StringView name, LookupTable& table)
  95. {
  96. generator.set("name", name);
  97. generator.set("first_pointer", MUST(String::number(table.first_pointer)));
  98. generator.set("size", MUST(String::number(table.code_points.size())));
  99. if (table.first_pointer > 0) {
  100. generator.append(R"~~~(
  101. Optional<u32> index_@name@_code_point(u32 pointer)
  102. {
  103. if (pointer < s_@name@_index_first_pointer || pointer - s_@name@_index_first_pointer >= s_@name@_index.size())
  104. return {};
  105. auto value = s_@name@_index[pointer - s_@name@_index_first_pointer];
  106. if (value == 0xfffd)
  107. return {};
  108. return value;
  109. }
  110. )~~~");
  111. } else {
  112. generator.append(R"~~~(
  113. Optional<u32> index_@name@_code_point(u32 pointer)
  114. {
  115. if (pointer >= s_@name@_index.size())
  116. return {};
  117. auto value = s_@name@_index[pointer];
  118. if (value == 0xfffd)
  119. return {};
  120. return value;
  121. }
  122. )~~~");
  123. }
  124. }
  125. ErrorOr<void> generate_implementation_file(LookupTables& tables, Core::File& file)
  126. {
  127. StringBuilder builder;
  128. SourceGenerator generator { builder };
  129. generator.append(R"~~~(
  130. #include <LibTextCodec/LookupTables.h>
  131. namespace TextCodec {
  132. )~~~");
  133. for (auto e : tables.indexes) {
  134. generate_table_implementation(generator.fork(), e.key, e.value);
  135. }
  136. generator.appendln("\n}");
  137. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  138. return {};
  139. }
  140. ErrorOr<int> serenity_main(Main::Arguments arguments)
  141. {
  142. StringView generated_header_path;
  143. StringView generated_implementation_path;
  144. StringView json_path;
  145. Core::ArgsParser args_parser;
  146. args_parser.add_option(generated_header_path, "Path to the lookup table header file to generate", "generated-header-path", 'h', "generated-header-path");
  147. args_parser.add_option(generated_implementation_path, "Path to the lookup table implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  148. args_parser.add_option(json_path, "Path to the JSON file to read from", "json-path", 'j', "json-path");
  149. args_parser.parse(arguments);
  150. auto json_file = TRY(Core::File::open(json_path, Core::File::OpenMode::Read));
  151. auto json_data = TRY(json_file->read_until_eof());
  152. auto data = TRY(JsonValue::from_string(json_data)).as_object();
  153. auto gb18030_table = prepare_table(data.get("gb18030"sv)->as_array());
  154. // FIXME: Encoding specification is not updated to GB-18030-2022 yet (https://github.com/whatwg/encoding/issues/312)
  155. // NOTE: See https://commits.webkit.org/264918@main
  156. gb18030_table.code_points[7182] = 0xfe10;
  157. gb18030_table.code_points[7183] = 0xfe12;
  158. gb18030_table.code_points[7184] = 0xfe11;
  159. gb18030_table.code_points[7185] = 0xfe13;
  160. gb18030_table.code_points[7186] = 0xfe14;
  161. gb18030_table.code_points[7187] = 0xfe15;
  162. gb18030_table.code_points[7188] = 0xfe16;
  163. gb18030_table.code_points[7201] = 0xfe17;
  164. gb18030_table.code_points[7202] = 0xfe18;
  165. gb18030_table.code_points[7208] = 0xfe19;
  166. gb18030_table.code_points[23775] = 0x9fb4;
  167. gb18030_table.code_points[23783] = 0x9fb5;
  168. gb18030_table.code_points[23788] = 0x9fb6;
  169. gb18030_table.code_points[23789] = 0x9fb7;
  170. gb18030_table.code_points[23795] = 0x9fb8;
  171. gb18030_table.code_points[23812] = 0x9fb9;
  172. gb18030_table.code_points[23829] = 0x9fba;
  173. gb18030_table.code_points[23845] = 0x9fbb;
  174. LookupTables tables {
  175. .gb18030_ranges = data.get("gb18030-ranges"sv)->as_array(),
  176. .indexes = {
  177. { "gb18030"sv, move(gb18030_table) },
  178. { "big5"sv, prepare_table(data.get("big5"sv)->as_array()) },
  179. { "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array()) },
  180. { "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array()) },
  181. { "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array()) },
  182. },
  183. };
  184. auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write));
  185. auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write));
  186. TRY(generate_header_file(tables, *generated_header_file));
  187. TRY(generate_implementation_file(tables, *generated_implementation_file));
  188. return 0;
  189. }