GenerateUnicodeLocale.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/AllOf.h>
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/Format.h>
  9. #include <AK/HashMap.h>
  10. #include <AK/JsonObject.h>
  11. #include <AK/JsonParser.h>
  12. #include <AK/JsonValue.h>
  13. #include <AK/LexicalPath.h>
  14. #include <AK/QuickSort.h>
  15. #include <AK/SourceGenerator.h>
  16. #include <AK/String.h>
  17. #include <AK/StringBuilder.h>
  18. #include <LibCore/ArgsParser.h>
  19. #include <LibCore/DirIterator.h>
  20. #include <LibCore/File.h>
  21. struct Locale {
  22. String language;
  23. Optional<String> territory;
  24. Optional<String> variant;
  25. HashMap<String, String> languages;
  26. HashMap<String, String> territories;
  27. HashMap<String, String> scripts;
  28. HashMap<String, String> currencies;
  29. };
  30. struct UnicodeLocaleData {
  31. HashMap<String, Locale> locales;
  32. Vector<String> languages;
  33. Vector<String> territories;
  34. Vector<String> scripts;
  35. Vector<String> variants;
  36. Vector<String> currencies;
  37. HashMap<String, String> language_aliases;
  38. HashMap<String, String> territory_aliases;
  39. HashMap<String, String> script_aliases;
  40. HashMap<String, String> variant_aliases;
  41. HashMap<String, String> subdivision_aliases;
  42. HashMap<String, String> complex_mappings;
  43. HashMap<String, String> likely_subtags;
  44. Vector<String> likely_territory_subtags;
  45. };
  46. static void write_to_file_if_different(Core::File& file, StringView contents)
  47. {
  48. auto const current_contents = file.read_all();
  49. if (StringView { current_contents.bytes() } == contents)
  50. return;
  51. VERIFY(file.seek(0));
  52. VERIFY(file.truncate(0));
  53. VERIFY(file.write(contents));
  54. }
  55. static void parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
  56. {
  57. LexicalPath core_aliases_path(move(core_supplemental_path));
  58. core_aliases_path = core_aliases_path.append("aliases.json"sv);
  59. VERIFY(Core::File::exists(core_aliases_path.string()));
  60. auto core_aliases_file_or_error = Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly);
  61. VERIFY(!core_aliases_file_or_error.is_error());
  62. auto core_aliases = JsonParser(core_aliases_file_or_error.value()->read_all()).parse();
  63. VERIFY(core_aliases.has_value());
  64. auto const& supplemental_object = core_aliases->as_object().get("supplemental"sv);
  65. auto const& metadata_object = supplemental_object.as_object().get("metadata"sv);
  66. auto const& alias_object = metadata_object.as_object().get("alias"sv);
  67. auto append_aliases = [&](auto& alias_object, auto& alias_map, Vector<String>* likely_subtags_list = nullptr) {
  68. alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  69. auto alias = value.as_object().get("_replacement"sv).as_string();
  70. if (auto aliases = alias.split(' '); likely_subtags_list && (aliases.size() > 1))
  71. likely_subtags_list->extend(move(aliases));
  72. if (key.contains('-'))
  73. locale_data.complex_mappings.set(key, move(alias));
  74. else
  75. alias_map.set(key, move(alias));
  76. });
  77. };
  78. append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases);
  79. append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases, &locale_data.likely_territory_subtags);
  80. append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases);
  81. append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases);
  82. append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases);
  83. }
  84. static void parse_likely_subtags(String core_supplemental_path, UnicodeLocaleData& locale_data)
  85. {
  86. LexicalPath likely_subtags_path(move(core_supplemental_path));
  87. likely_subtags_path = likely_subtags_path.append("likelySubtags.json"sv);
  88. VERIFY(Core::File::exists(likely_subtags_path.string()));
  89. auto likely_subtags_file_or_error = Core::File::open(likely_subtags_path.string(), Core::OpenMode::ReadOnly);
  90. VERIFY(!likely_subtags_file_or_error.is_error());
  91. auto likely_subtags = JsonParser(likely_subtags_file_or_error.value()->read_all()).parse();
  92. VERIFY(likely_subtags.has_value());
  93. auto const& supplemental_object = likely_subtags->as_object().get("supplemental"sv);
  94. auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
  95. likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  96. auto likely_subtag = value.as_string();
  97. auto regions = likely_subtag.split('-');
  98. VERIFY(regions.size() == 3);
  99. // Unicode TR35 has the following footnote in section 3.2.1 Canonical Unicode Locale Identifiers
  100. //
  101. // Formally, replacement of multiple territories uses Section 4.3 Likely Subtags. However, there are a small
  102. // number of cases of multiple territories, so the mappings can be precomputed. This results in a faster
  103. // lookup with a very small subset of the likely subtags data.
  104. //
  105. // Since the likely subtags data is quite large, and resolving likely territory subtags is our only use case for
  106. // this data, we only generate likely subtags that contain one of the above multiple territories.
  107. if (locale_data.likely_territory_subtags.contains_slow(regions[2]))
  108. locale_data.likely_subtags.set(key, move(likely_subtag));
  109. });
  110. }
  111. static void parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  112. {
  113. LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them.
  114. languages_path = languages_path.append("languages.json"sv);
  115. VERIFY(Core::File::exists(languages_path.string()));
  116. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  117. VERIFY(!languages_file_or_error.is_error());
  118. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  119. VERIFY(languages.has_value());
  120. auto const& main_object = languages->as_object().get("main"sv);
  121. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  122. auto const& identity_object = locale_object.as_object().get("identity"sv);
  123. auto const& language_string = identity_object.as_object().get("language"sv);
  124. auto const& territory_string = identity_object.as_object().get("territory"sv);
  125. auto const& variant_string = identity_object.as_object().get("variant"sv);
  126. locale.language = language_string.as_string();
  127. if (!locale_data.languages.contains_slow(locale.language))
  128. locale_data.languages.append(locale.language);
  129. if (territory_string.is_string()) {
  130. locale.territory = territory_string.as_string();
  131. if (!locale_data.territories.contains_slow(*locale.territory))
  132. locale_data.territories.append(*locale.territory);
  133. }
  134. if (variant_string.is_string()) {
  135. locale.variant = variant_string.as_string();
  136. if (!locale_data.variants.contains_slow(*locale.variant))
  137. locale_data.variants.append(*locale.variant);
  138. }
  139. }
  140. static void parse_locale_languages(String locale_path, Locale& locale)
  141. {
  142. LexicalPath languages_path(move(locale_path));
  143. languages_path = languages_path.append("languages.json"sv);
  144. VERIFY(Core::File::exists(languages_path.string()));
  145. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  146. VERIFY(!languages_file_or_error.is_error());
  147. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  148. VERIFY(languages.has_value());
  149. auto const& main_object = languages->as_object().get("main"sv);
  150. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  151. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  152. auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
  153. languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  154. locale.languages.set(key, value.as_string());
  155. });
  156. }
  157. static void parse_locale_territories(String locale_path, Locale& locale)
  158. {
  159. LexicalPath territories_path(move(locale_path));
  160. territories_path = territories_path.append("territories.json"sv);
  161. VERIFY(Core::File::exists(territories_path.string()));
  162. auto territories_file_or_error = Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly);
  163. VERIFY(!territories_file_or_error.is_error());
  164. auto territories = JsonParser(territories_file_or_error.value()->read_all()).parse();
  165. VERIFY(territories.has_value());
  166. auto const& main_object = territories->as_object().get("main"sv);
  167. auto const& locale_object = main_object.as_object().get(territories_path.parent().basename());
  168. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  169. auto const& territories_object = locale_display_names_object.as_object().get("territories"sv);
  170. territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  171. locale.territories.set(key, value.as_string());
  172. });
  173. }
  174. static void parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  175. {
  176. LexicalPath scripts_path(move(locale_path));
  177. scripts_path = scripts_path.append("scripts.json"sv);
  178. VERIFY(Core::File::exists(scripts_path.string()));
  179. auto scripts_file_or_error = Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly);
  180. VERIFY(!scripts_file_or_error.is_error());
  181. auto scripts = JsonParser(scripts_file_or_error.value()->read_all()).parse();
  182. VERIFY(scripts.has_value());
  183. auto const& main_object = scripts->as_object().get("main"sv);
  184. auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename());
  185. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  186. auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv);
  187. scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  188. locale.scripts.set(key, value.as_string());
  189. if (!locale_data.scripts.contains_slow(key))
  190. locale_data.scripts.append(key);
  191. });
  192. }
  193. static void parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  194. {
  195. LexicalPath currencies_path(move(numbers_path));
  196. currencies_path = currencies_path.append("currencies.json"sv);
  197. VERIFY(Core::File::exists(currencies_path.string()));
  198. auto currencies_file_or_error = Core::File::open(currencies_path.string(), Core::OpenMode::ReadOnly);
  199. VERIFY(!currencies_file_or_error.is_error());
  200. auto currencies = JsonParser(currencies_file_or_error.value()->read_all()).parse();
  201. VERIFY(currencies.has_value());
  202. auto const& main_object = currencies->as_object().get("main"sv);
  203. auto const& locale_object = main_object.as_object().get(currencies_path.parent().basename());
  204. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  205. auto const& currencies_object = locale_numbers_object.as_object().get("currencies"sv);
  206. currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  207. auto const& display_name = value.as_object().get("displayName"sv);
  208. locale.currencies.set(key, display_name.as_string());
  209. if (!locale_data.currencies.contains_slow(key))
  210. locale_data.currencies.append(key);
  211. });
  212. }
  213. static Core::DirIterator path_to_dir_iterator(String path)
  214. {
  215. LexicalPath lexical_path(move(path));
  216. lexical_path = lexical_path.append("main"sv);
  217. VERIFY(Core::File::is_directory(lexical_path.string()));
  218. Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir);
  219. if (iterator.has_error()) {
  220. warnln("{}: {}", lexical_path.string(), iterator.error_string());
  221. VERIFY_NOT_REACHED();
  222. }
  223. return iterator;
  224. }
  225. static void parse_all_locales(String core_path, String locale_names_path, String numbers_path, UnicodeLocaleData& locale_data)
  226. {
  227. auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path));
  228. auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
  229. LexicalPath core_supplemental_path(move(core_path));
  230. core_supplemental_path = core_supplemental_path.append("supplemental"sv);
  231. VERIFY(Core::File::is_directory(core_supplemental_path.string()));
  232. parse_core_aliases(core_supplemental_path.string(), locale_data);
  233. parse_likely_subtags(core_supplemental_path.string(), locale_data);
  234. while (locale_names_iterator.has_next()) {
  235. auto locale_path = locale_names_iterator.next_full_path();
  236. VERIFY(Core::File::is_directory(locale_path));
  237. auto& locale = locale_data.locales.ensure(LexicalPath::basename(locale_path));
  238. parse_identity(locale_path, locale_data, locale);
  239. parse_locale_languages(locale_path, locale);
  240. parse_locale_territories(locale_path, locale);
  241. parse_locale_scripts(locale_path, locale_data, locale);
  242. }
  243. while (numbers_iterator.has_next()) {
  244. auto numbers_path = numbers_iterator.next_full_path();
  245. VERIFY(Core::File::is_directory(numbers_path));
  246. auto& locale = locale_data.locales.ensure(LexicalPath::basename(numbers_path));
  247. parse_locale_currencies(numbers_path, locale_data, locale);
  248. }
  249. }
  250. static String format_identifier(StringView owner, String identifier)
  251. {
  252. identifier.replace("-"sv, "_"sv, true);
  253. if (all_of(identifier, is_ascii_digit))
  254. return String::formatted("{}_{}", owner[0], identifier);
  255. return identifier.to_titlecase();
  256. }
  257. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  258. {
  259. StringBuilder builder;
  260. SourceGenerator generator { builder };
  261. auto generate_enum = [&](StringView name, StringView default_, Vector<String>& values) {
  262. quick_sort(values);
  263. generator.set("name", name);
  264. generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv);
  265. generator.append(R"~~~(
  266. enum class @name@ : @underlying@ {)~~~");
  267. if (!default_.is_empty()) {
  268. generator.set("default", default_);
  269. generator.append(R"~~~(
  270. @default@,)~~~");
  271. }
  272. for (auto const& value : values) {
  273. generator.set("value", format_identifier(name, value));
  274. generator.append(R"~~~(
  275. @value@,)~~~");
  276. }
  277. generator.append(R"~~~(
  278. };
  279. )~~~");
  280. };
  281. generator.append(R"~~~(
  282. #pragma once
  283. #include <AK/Optional.h>
  284. #include <AK/StringView.h>
  285. #include <AK/Types.h>
  286. #include <LibUnicode/Forward.h>
  287. namespace Unicode {
  288. )~~~");
  289. auto locales = locale_data.locales.keys();
  290. generate_enum("Locale"sv, "None"sv, locales);
  291. generate_enum("Language"sv, {}, locale_data.languages);
  292. generate_enum("Territory"sv, {}, locale_data.territories);
  293. generate_enum("ScriptTag"sv, {}, locale_data.scripts);
  294. generate_enum("Currency"sv, {}, locale_data.currencies);
  295. generate_enum("Variant"sv, {}, locale_data.variants);
  296. generator.append(R"~~~(
  297. namespace Detail {
  298. Optional<Locale> locale_from_string(StringView const& locale);
  299. Optional<StringView> get_locale_language_mapping(StringView locale, StringView language);
  300. Optional<Language> language_from_string(StringView const& language);
  301. Optional<StringView> resolve_language_alias(StringView const& language);
  302. Optional<StringView> get_locale_territory_mapping(StringView locale, StringView territory);
  303. Optional<Territory> territory_from_string(StringView const& territory);
  304. Optional<StringView> resolve_territory_alias(StringView const& territory);
  305. Optional<StringView> get_locale_script_tag_mapping(StringView locale, StringView script_tag);
  306. Optional<ScriptTag> script_tag_from_string(StringView const& script_tag);
  307. Optional<StringView> resolve_script_tag_alias(StringView const& script_tag);
  308. Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency);
  309. Optional<Currency> currency_from_string(StringView const& currency);
  310. Optional<StringView> resolve_variant_alias(StringView const& variant);
  311. Optional<StringView> resolve_subdivision_alias(StringView const& subdivision);
  312. void resolve_complex_language_aliases(Unicode::LanguageID& language_id);
  313. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id);
  314. }
  315. }
  316. )~~~");
  317. write_to_file_if_different(file, generator.as_string_view());
  318. }
  319. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  320. {
  321. StringBuilder builder;
  322. SourceGenerator generator { builder };
  323. generator.set("locales_size"sv, String::number(locale_data.locales.size()));
  324. generator.set("territories_size", String::number(locale_data.territories.size()));
  325. generator.append(R"~~~(
  326. #include <AK/Array.h>
  327. #include <AK/HashMap.h>
  328. #include <AK/Span.h>
  329. #include <LibUnicode/Locale.h>
  330. #include <LibUnicode/UnicodeLocale.h>
  331. namespace Unicode {
  332. )~~~");
  333. auto format_mapping_name = [](StringView format, StringView name) {
  334. auto mapping_name = name.to_lowercase_string();
  335. mapping_name.replace("-"sv, "_"sv, true);
  336. return String::formatted(format, mapping_name);
  337. };
  338. auto append_mapping_list = [&](String name, auto const& keys, auto const& mappings) {
  339. generator.set("name", name);
  340. generator.set("size", String::number(keys.size()));
  341. generator.append(R"~~~(
  342. static constexpr Array<StringView, @size@> @name@ { {
  343. )~~~");
  344. constexpr size_t max_values_per_row = 10;
  345. size_t values_in_current_row = 0;
  346. for (auto const& key : keys) {
  347. if (values_in_current_row++ > 0)
  348. generator.append(" ");
  349. if (auto it = mappings.find(key); it != mappings.end())
  350. generator.set("mapping"sv, String::formatted("\"{}\"sv", it->value));
  351. else
  352. generator.set("mapping"sv, "{}"sv);
  353. generator.append("@mapping@,");
  354. if (values_in_current_row == max_values_per_row) {
  355. values_in_current_row = 0;
  356. generator.append("\n ");
  357. }
  358. }
  359. generator.append(R"~~~(
  360. } };
  361. )~~~");
  362. };
  363. auto append_mapping = [&](StringView name, StringView format, auto const& keys, auto get_mapping_callback) {
  364. Vector<String> mapping_names;
  365. for (auto const& locale : locale_data.locales) {
  366. auto mapping_name = format_mapping_name(format, locale.key);
  367. append_mapping_list(mapping_name, keys, get_mapping_callback(locale.value));
  368. mapping_names.append(move(mapping_name));
  369. }
  370. quick_sort(mapping_names);
  371. generator.set("name", name);
  372. generator.set("size", String::number(locale_data.locales.size()));
  373. generator.append(R"~~~(
  374. static constexpr Array<Span<StringView const>, @size@> @name@ { {
  375. )~~~");
  376. constexpr size_t max_values_per_row = 10;
  377. size_t values_in_current_row = 0;
  378. for (auto& mapping_name : mapping_names) {
  379. if (values_in_current_row++ > 0)
  380. generator.append(" ");
  381. generator.set("name", move(mapping_name));
  382. generator.append("@name@.span(),");
  383. if (values_in_current_row == max_values_per_row) {
  384. values_in_current_row = 0;
  385. generator.append("\n ");
  386. }
  387. }
  388. generator.append(R"~~~(
  389. } };
  390. )~~~");
  391. };
  392. append_mapping("s_languages"sv, "s_languages_{}", locale_data.languages, [](auto const& value) { return value.languages; });
  393. append_mapping("s_territories"sv, "s_territories_{}", locale_data.territories, [](auto const& value) { return value.territories; });
  394. append_mapping("s_scripts"sv, "s_scripts_{}", locale_data.scripts, [](auto const& value) { return value.scripts; });
  395. append_mapping("s_currencies"sv, "s_currencies_{}", locale_data.currencies, [](auto const& value) { return value.currencies; });
  396. generator.append(R"~~~(
  397. struct LanguageMapping {
  398. Unicode::LanguageID key;
  399. Unicode::LanguageID alias;
  400. };
  401. )~~~");
  402. auto append_complex_mapping = [&](StringView name, auto const& mappings) {
  403. generator.set("name"sv, name);
  404. generator.append(R"~~~(
  405. static auto const& ensure_@name@_map()
  406. {
  407. static Vector<LanguageMapping> @name@_map;
  408. auto append_mapping = [&](StringView key, StringView alias) {
  409. if (auto key_value = Unicode::parse_unicode_language_id(key); key_value.has_value()) {
  410. if (auto alias_value = Unicode::parse_unicode_language_id(alias); alias_value.has_value())
  411. @name@_map.append({ key_value.release_value(), alias_value.release_value() });
  412. }
  413. };
  414. )~~~");
  415. auto keys = mappings.keys();
  416. quick_sort(keys, [](auto const& lhs, auto const& rhs) {
  417. // Sort the keys such that "und" language tags are at the end, as those are less specific.
  418. if (lhs.starts_with("und"sv) && !rhs.starts_with("und"sv))
  419. return false;
  420. if (!lhs.starts_with("und"sv) && rhs.starts_with("und"sv))
  421. return true;
  422. return lhs < rhs;
  423. });
  424. for (auto const& key : keys) {
  425. generator.set("key"sv, key);
  426. generator.set("alias"sv, mappings.get(key).value());
  427. generator.append(R"~~~(
  428. append_mapping("@key@"sv, "@alias@"sv);)~~~");
  429. }
  430. generator.append(R"~~~(
  431. return @name@_map;
  432. }
  433. )~~~");
  434. };
  435. append_complex_mapping("complex_alias"sv, locale_data.complex_mappings);
  436. append_complex_mapping("likely_subtags"sv, locale_data.likely_subtags);
  437. generator.append(R"~~~(
  438. static Unicode::LanguageID const* resolve_likely_subtag(Unicode::LanguageID const& language_id)
  439. {
  440. // https://unicode.org/reports/tr35/#Likely_Subtags
  441. static auto const& likely_subtags_map = ensure_likely_subtags_map();
  442. enum class State {
  443. LanguageScriptRegion,
  444. LanguageRegion,
  445. LanguageScript,
  446. Language,
  447. UndScript,
  448. Done,
  449. };
  450. auto state = State::LanguageScriptRegion;
  451. while (state != State::Done) {
  452. Unicode::LanguageID search_key;
  453. switch (state) {
  454. case State::LanguageScriptRegion:
  455. state = State::LanguageRegion;
  456. if (!language_id.script.has_value() || !language_id.region.has_value())
  457. continue;
  458. search_key.language = language_id.language;
  459. search_key.script = language_id.script;
  460. search_key.region = language_id.region;
  461. break;
  462. case State::LanguageRegion:
  463. state = State::LanguageScript;
  464. if (!language_id.region.has_value())
  465. continue;
  466. search_key.language = language_id.language;
  467. search_key.region = language_id.region;
  468. break;
  469. case State::LanguageScript:
  470. state = State::Language;
  471. if (!language_id.script.has_value())
  472. continue;
  473. search_key.language = language_id.language;
  474. search_key.script = language_id.script;
  475. break;
  476. case State::Language:
  477. state = State::UndScript;
  478. search_key.language = language_id.language;
  479. break;
  480. case State::UndScript:
  481. state = State::Done;
  482. if (!language_id.script.has_value())
  483. continue;
  484. search_key.language = "und"sv;
  485. search_key.script = language_id.script;
  486. break;
  487. default:
  488. VERIFY_NOT_REACHED();
  489. }
  490. for (auto const& map : likely_subtags_map) {
  491. if (map.key.language != search_key.language)
  492. continue;
  493. if (map.key.script != search_key.script)
  494. continue;
  495. if (map.key.region != search_key.region)
  496. continue;
  497. return &map.alias;
  498. }
  499. }
  500. return nullptr;
  501. }
  502. namespace Detail {
  503. )~~~");
  504. auto append_mapping_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) {
  505. generator.set("enum_title", enum_title);
  506. generator.set("enum_snake", enum_snake);
  507. generator.set("collection_name", collection_name);
  508. generator.append(R"~~~(
  509. Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@)
  510. {
  511. auto locale_value = locale_from_string(locale);
  512. if (!locale_value.has_value())
  513. return {};
  514. auto @enum_snake@_value = @enum_snake@_from_string(@enum_snake@);
  515. if (!@enum_snake@_value.has_value())
  516. return {};
  517. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  518. auto @enum_snake@_index = to_underlying(*@enum_snake@_value);
  519. auto const& mappings = @collection_name@.at(locale_index);
  520. auto @enum_snake@_mapping = mappings.at(@enum_snake@_index);
  521. if (@enum_snake@_mapping.is_empty())
  522. return {};
  523. return @enum_snake@_mapping;
  524. }
  525. )~~~");
  526. };
  527. auto append_from_string = [&](StringView enum_title, StringView enum_snake, Vector<String> const& values) {
  528. generator.set("enum_title", enum_title);
  529. generator.set("enum_snake", enum_snake);
  530. generator.append(R"~~~(
  531. Optional<@enum_title@> @enum_snake@_from_string(StringView const& @enum_snake@)
  532. {
  533. static HashMap<StringView, @enum_title@> @enum_snake@_values { {)~~~");
  534. for (auto const& value : values) {
  535. generator.set("key"sv, value);
  536. generator.set("value"sv, format_identifier(enum_title, value));
  537. generator.append(R"~~~(
  538. { "@key@"sv, @enum_title@::@value@ },)~~~");
  539. }
  540. generator.append(R"~~~(
  541. } };
  542. if (auto value = @enum_snake@_values.get(@enum_snake@); value.has_value())
  543. return value.value();
  544. return {};
  545. }
  546. )~~~");
  547. };
  548. auto append_alias_search = [&](StringView enum_snake, HashMap<String, String> const& aliases) {
  549. generator.set("enum_snake", enum_snake);
  550. generator.append(R"~~~(
  551. Optional<StringView> resolve_@enum_snake@_alias(StringView const& @enum_snake@)
  552. {
  553. static HashMap<StringView, StringView> @enum_snake@_aliases { {
  554. )~~~");
  555. constexpr size_t max_values_per_row = 10;
  556. size_t values_in_current_row = 0;
  557. for (auto const& alias : aliases) {
  558. if (values_in_current_row++ > 0)
  559. generator.append(" ");
  560. generator.set("key"sv, alias.key);
  561. generator.set("alias"sv, alias.value);
  562. generator.append("{ \"@key@\"sv, \"@alias@\"sv },");
  563. if (values_in_current_row == max_values_per_row) {
  564. generator.append("\n ");
  565. values_in_current_row = 0;
  566. }
  567. }
  568. generator.append(R"~~~(
  569. } };
  570. if (auto alias = @enum_snake@_aliases.get(@enum_snake@); alias.has_value())
  571. return alias.value();
  572. return {};
  573. }
  574. )~~~");
  575. };
  576. append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys());
  577. append_mapping_search("Language"sv, "language"sv, "s_languages"sv);
  578. append_from_string("Language"sv, "language"sv, locale_data.languages);
  579. append_alias_search("language"sv, locale_data.language_aliases);
  580. append_mapping_search("Territory"sv, "territory"sv, "s_territories"sv);
  581. append_from_string("Territory"sv, "territory"sv, locale_data.territories);
  582. append_alias_search("territory"sv, locale_data.territory_aliases);
  583. append_mapping_search("ScriptTag"sv, "script_tag"sv, "s_scripts"sv);
  584. append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts);
  585. append_alias_search("script_tag"sv, locale_data.script_aliases);
  586. append_mapping_search("Currency"sv, "currency"sv, "s_currencies"sv);
  587. append_from_string("Currency"sv, "currency"sv, locale_data.currencies);
  588. append_alias_search("variant"sv, locale_data.variant_aliases);
  589. append_alias_search("subdivision"sv, locale_data.subdivision_aliases);
  590. generator.append(R"~~~(
  591. void resolve_complex_language_aliases(Unicode::LanguageID& language_id)
  592. {
  593. static auto const& complex_alias_map = ensure_complex_alias_map();
  594. for (auto const& map : complex_alias_map) {
  595. if ((map.key.language != language_id.language) && (map.key.language != "und"sv))
  596. continue;
  597. if (map.key.script.has_value() && (map.key.script != language_id.script))
  598. continue;
  599. if (map.key.region.has_value() && (map.key.region != language_id.region))
  600. continue;
  601. if (!map.key.variants.is_empty() && (map.key.variants != language_id.variants))
  602. continue;
  603. auto alias = map.alias;
  604. if (alias.language == "und"sv)
  605. alias.language = move(language_id.language);
  606. if (!map.key.script.has_value() && !alias.script.has_value())
  607. alias.script = move(language_id.script);
  608. if (!map.key.region.has_value() && !alias.region.has_value())
  609. alias.region = move(language_id.region);
  610. if (map.key.variants.is_empty() && alias.variants.is_empty())
  611. alias.variants = move(language_id.variants);
  612. language_id = move(alias);
  613. break;
  614. }
  615. }
  616. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id)
  617. {
  618. if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
  619. return likely_subtag->region;
  620. return {};
  621. }
  622. }
  623. }
  624. )~~~");
  625. write_to_file_if_different(file, generator.as_string_view());
  626. }
  627. int main(int argc, char** argv)
  628. {
  629. char const* generated_header_path = nullptr;
  630. char const* generated_implementation_path = nullptr;
  631. char const* core_path = nullptr;
  632. char const* locale_names_path = nullptr;
  633. char const* numbers_path = nullptr;
  634. Core::ArgsParser args_parser;
  635. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  636. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  637. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  638. args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path");
  639. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  640. args_parser.parse(argc, argv);
  641. auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
  642. if (path.is_empty()) {
  643. warnln("{} is required", flags);
  644. args_parser.print_usage(stderr, argv[0]);
  645. exit(1);
  646. }
  647. auto file_or_error = Core::File::open(path, mode);
  648. if (file_or_error.is_error()) {
  649. warnln("Failed to open {}: {}", path, file_or_error.release_error());
  650. exit(1);
  651. }
  652. return file_or_error.release_value();
  653. };
  654. auto generated_header_file = open_file(generated_header_path, "-h/--generated-header-path", Core::OpenMode::ReadWrite);
  655. auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite);
  656. UnicodeLocaleData locale_data;
  657. parse_all_locales(core_path, locale_names_path, numbers_path, locale_data);
  658. generate_unicode_locale_header(generated_header_file, locale_data);
  659. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  660. return 0;
  661. }