GenerateIDNAData.cpp 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /*
  2. * Copyright (c) 2023, Simon Wanner <simon@skyrising.xyz>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/Error.h>
  8. #include <AK/SourceGenerator.h>
  9. #include <AK/Types.h>
  10. #include <LibCore/ArgsParser.h>
  11. enum class MappingStatus : u8 {
  12. Valid,
  13. Ignored,
  14. Mapped,
  15. Deviation,
  16. Disallowed,
  17. DisallowedStd3Valid,
  18. DisallowedStd3Mapped,
  19. };
  20. static constexpr Array<StringView, 7> mapping_status_names { "Valid"sv, "Ignored"sv, "Mapped"sv, "Deviation"sv, "Disallowed"sv, "DisallowedStd3Valid"sv, "DisallowedStd3Mapped"sv };
  21. enum class IDNA2008Status : u8 {
  22. NV8,
  23. XV8,
  24. };
  25. static constexpr Array<StringView, 2> idna_2008_status_names { "NV8"sv, "XV8"sv };
  26. struct IDNAMapping {
  27. Unicode::CodePointRange code_points;
  28. MappingStatus status;
  29. IDNA2008Status idna_2008_status;
  30. Vector<u32> mapped_to {};
  31. };
  32. struct IDNAData {
  33. Vector<IDNAMapping> mapping_table;
  34. };
  35. static MappingStatus parse_mapping_status(StringView status)
  36. {
  37. if (status == "valid"sv)
  38. return MappingStatus::Valid;
  39. if (status == "ignored"sv)
  40. return MappingStatus::Ignored;
  41. if (status == "mapped"sv)
  42. return MappingStatus::Mapped;
  43. if (status == "deviation"sv)
  44. return MappingStatus::Deviation;
  45. if (status == "disallowed"sv)
  46. return MappingStatus::Disallowed;
  47. if (status == "disallowed_STD3_valid"sv)
  48. return MappingStatus::DisallowedStd3Valid;
  49. if (status == "disallowed_STD3_mapped"sv)
  50. return MappingStatus::DisallowedStd3Mapped;
  51. VERIFY_NOT_REACHED();
  52. }
  53. static ErrorOr<void> parse_idna_mapping_table(Core::InputBufferedFile& file, Vector<IDNAMapping>& mapping_table)
  54. {
  55. Array<u8, 1024> buffer;
  56. while (TRY(file.can_read_line())) {
  57. auto line = TRY(file.read_line(buffer));
  58. if (line.is_empty() || line.starts_with('#'))
  59. continue;
  60. if (auto index = line.find('#'); index.has_value())
  61. line = line.substring_view(0, *index);
  62. auto segments = line.split_view(';', SplitBehavior::KeepEmpty);
  63. VERIFY(segments.size() >= 2);
  64. IDNAMapping idna_mapping {};
  65. idna_mapping.code_points = parse_code_point_range(segments[0].trim_whitespace());
  66. idna_mapping.status = parse_mapping_status(segments[1].trim_whitespace());
  67. if (segments.size() >= 3)
  68. idna_mapping.mapped_to = parse_code_point_list(segments[2].trim_whitespace());
  69. if (segments.size() >= 4) {
  70. auto trimmed = segments[3].trim_whitespace();
  71. if (trimmed == "NV8"sv) {
  72. idna_mapping.idna_2008_status = IDNA2008Status::NV8;
  73. } else {
  74. VERIFY(trimmed == "XV8"sv);
  75. idna_mapping.idna_2008_status = IDNA2008Status::XV8;
  76. }
  77. }
  78. TRY(mapping_table.try_append(move(idna_mapping)));
  79. }
  80. return {};
  81. }
  82. static ErrorOr<void> generate_idna_data_header(Core::InputBufferedFile& file, IDNAData&)
  83. {
  84. StringBuilder builder;
  85. SourceGenerator generator { builder };
  86. generator.append(R"~~~(
  87. #pragma once
  88. namespace Unicode::IDNA {
  89. }
  90. )~~~");
  91. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  92. return {};
  93. }
  94. static ErrorOr<void> generate_idna_data_implementation(Core::InputBufferedFile& file, IDNAData& idna_data)
  95. {
  96. StringBuilder builder;
  97. SourceGenerator generator { builder };
  98. generator.set("idna_table_size", TRY(String::number(idna_data.mapping_table.size())));
  99. generator.append(R"~~~(
  100. #include <AK/BinarySearch.h>
  101. #include <AK/Optional.h>
  102. #include <AK/Utf32View.h>
  103. #include <LibUnicode/CharacterTypes.h>
  104. #include <LibUnicode/IDNA.h>
  105. #include <LibUnicode/IDNAData.h>
  106. namespace Unicode::IDNA {
  107. struct MappingEntry {
  108. CodePointRange code_points {};
  109. MappingStatus status : 3 { MappingStatus::Valid };
  110. IDNA2008Status idna_2008_status : 1 { IDNA2008Status::NV8 };
  111. size_t mapping_offset : 20 { 0 };
  112. size_t mapping_length : 8 { 0 };
  113. };
  114. static constexpr Array<MappingEntry, @idna_table_size@> s_idna_mapping_table { {)~~~");
  115. {
  116. size_t mapping_offset = 0;
  117. for (auto const& mapping : idna_data.mapping_table) {
  118. generator.set("code_points", TRY(String::formatted("{:#x}, {:#x}", mapping.code_points.first, mapping.code_points.last)));
  119. generator.set("status", mapping_status_names[to_underlying(mapping.status)]);
  120. generator.set("idna_2008_status", idna_2008_status_names[to_underlying(mapping.idna_2008_status)]);
  121. if (mapping.mapped_to.is_empty()) {
  122. generator.set("mapping_offset", "0"sv);
  123. generator.set("mapping_length", "0"sv);
  124. } else {
  125. generator.set("mapping_offset", TRY(String::number(mapping_offset)));
  126. generator.set("mapping_length", TRY(String::number(mapping.mapped_to.size())));
  127. mapping_offset += mapping.mapped_to.size();
  128. }
  129. generator.append(R"~~~(
  130. { { @code_points@ }, MappingStatus::@status@, IDNA2008Status::@idna_2008_status@, @mapping_offset@, @mapping_length@ },)~~~");
  131. }
  132. generator.set("mapping_length_total", TRY(String::number(mapping_offset)));
  133. }
  134. generator.append(R"~~~(
  135. } };
  136. static constexpr Array<u32, @mapping_length_total@> s_mapping_code_points { )~~~");
  137. {
  138. for (auto const& mapping : idna_data.mapping_table) {
  139. if (mapping.mapped_to.is_empty())
  140. continue;
  141. for (u32 code_point : mapping.mapped_to)
  142. generator.append(TRY(String::formatted("{:#x}, ", code_point)));
  143. generator.append(R"~~~(
  144. )~~~");
  145. }
  146. }
  147. generator.append(R"~~~(
  148. };
  149. Optional<Mapping> get_idna_mapping(u32 code_point)
  150. {
  151. auto* entry = binary_search(s_idna_mapping_table, code_point, nullptr, [](auto code_point, auto entry) {
  152. if (code_point < entry.code_points.first)
  153. return -1;
  154. if (code_point > entry.code_points.last)
  155. return 1;
  156. return 0;
  157. });
  158. if (!entry)
  159. return {};
  160. auto mapped_to = Utf32View { entry->mapping_length ? s_mapping_code_points.data() + entry->mapping_offset : nullptr, entry->mapping_length };
  161. return Mapping { entry->status, entry->idna_2008_status, move(mapped_to) };
  162. }
  163. }
  164. )~~~");
  165. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  166. return {};
  167. }
  168. ErrorOr<int> serenity_main(Main::Arguments arguments)
  169. {
  170. StringView generated_header_path;
  171. StringView generated_implementation_path;
  172. StringView idna_mapping_table_path;
  173. Core::ArgsParser args_parser;
  174. args_parser.add_option(generated_header_path, "Path to the IDNA Data header file to generate", "generated-header-path", 'h', "generated-header-path");
  175. args_parser.add_option(generated_implementation_path, "Path to the IDNA Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  176. args_parser.add_option(idna_mapping_table_path, "Path to IdnaMappingTable.txt file", "idna-mapping-table-path", 'm', "idna-mapping-table-path");
  177. args_parser.parse(arguments);
  178. auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
  179. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
  180. auto idna_mapping_table_file = TRY(open_file(idna_mapping_table_path, Core::File::OpenMode::Read));
  181. IDNAData idna_data {};
  182. TRY(parse_idna_mapping_table(*idna_mapping_table_file, idna_data.mapping_table));
  183. TRY(generate_idna_data_header(*generated_header_file, idna_data));
  184. TRY(generate_idna_data_implementation(*generated_implementation_file, idna_data));
  185. return 0;
  186. }