GenerateUnicodeLocale.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/AllOf.h>
  7. #include <AK/CharacterTypes.h>
  8. #include <AK/Format.h>
  9. #include <AK/HashMap.h>
  10. #include <AK/JsonObject.h>
  11. #include <AK/JsonParser.h>
  12. #include <AK/JsonValue.h>
  13. #include <AK/LexicalPath.h>
  14. #include <AK/QuickSort.h>
  15. #include <AK/SourceGenerator.h>
  16. #include <AK/String.h>
  17. #include <AK/StringBuilder.h>
  18. #include <LibCore/ArgsParser.h>
  19. #include <LibCore/DirIterator.h>
  20. #include <LibCore/File.h>
  21. #include <LibUnicode/Locale.h>
  22. struct ListPatterns {
  23. String type;
  24. String style;
  25. size_t start { 0 };
  26. size_t middle { 0 };
  27. size_t end { 0 };
  28. size_t pair { 0 };
  29. };
  30. struct Locale {
  31. String language;
  32. Optional<String> territory;
  33. Optional<String> variant;
  34. HashMap<String, size_t> languages;
  35. HashMap<String, size_t> territories;
  36. HashMap<String, size_t> scripts;
  37. HashMap<String, size_t> currencies;
  38. HashMap<String, size_t> keywords;
  39. Vector<ListPatterns> list_patterns;
  40. };
  41. struct CanonicalLanguageID {
  42. size_t language { 0 };
  43. size_t script { 0 };
  44. size_t region { 0 };
  45. Vector<size_t> variants {};
  46. };
  47. struct LanguageMapping {
  48. CanonicalLanguageID key {};
  49. CanonicalLanguageID alias {};
  50. };
  51. struct UnicodeLocaleData {
  52. Vector<String> unique_strings;
  53. HashMap<StringView, size_t> unique_string_indices;
  54. HashMap<String, Locale> locales;
  55. Vector<String> languages;
  56. Vector<String> territories;
  57. Vector<String> scripts;
  58. Vector<String> variants;
  59. Vector<String> currencies;
  60. Vector<String> keywords;
  61. Vector<String> list_pattern_types;
  62. Vector<String> list_pattern_styles;
  63. HashMap<String, String> language_aliases;
  64. HashMap<String, String> territory_aliases;
  65. HashMap<String, String> script_aliases;
  66. HashMap<String, String> variant_aliases;
  67. HashMap<String, String> subdivision_aliases;
  68. Vector<LanguageMapping> complex_mappings;
  69. Vector<LanguageMapping> likely_subtags;
  70. size_t max_variant_size { 0 };
  71. };
  72. static size_t ensure_unique_string(UnicodeLocaleData& locale_data, String string)
  73. {
  74. // We maintain a set of unique strings in two structures: a vector which owns the unique string,
  75. // and a hash map which maps that string to its index in the vector. The vector is to ensure the
  76. // strings are generated in an easily known order, and the map is to allow quickly deciding if a
  77. // string is actually unique (otherwise, we'd have to linear-search the vector for each string).
  78. //
  79. // Also note that index 0 will be reserved for the empty string, so the index returned from this
  80. // method is actually the real index in the vector + 1.
  81. if (auto index = locale_data.unique_string_indices.get(string); index.has_value())
  82. return *index;
  83. locale_data.unique_strings.append(move(string));
  84. size_t index = locale_data.unique_strings.size();
  85. locale_data.unique_string_indices.set(locale_data.unique_strings.last(), index);
  86. return index;
  87. }
  88. static StringView get_unique_string(UnicodeLocaleData& locale_data, size_t index)
  89. {
  90. if (index == 0)
  91. return {};
  92. VERIFY(index <= locale_data.unique_strings.size());
  93. return locale_data.unique_strings.at(index - 1);
  94. }
  95. static Optional<CanonicalLanguageID> parse_language(UnicodeLocaleData& locale_data, StringView language)
  96. {
  97. CanonicalLanguageID language_id {};
  98. auto segments = language.split_view('-');
  99. VERIFY(!segments.is_empty());
  100. size_t index = 0;
  101. if (Unicode::is_unicode_language_subtag(segments[index])) {
  102. language_id.language = ensure_unique_string(locale_data, segments[index]);
  103. if (segments.size() == ++index)
  104. return language_id;
  105. } else {
  106. return {};
  107. }
  108. if (Unicode::is_unicode_script_subtag(segments[index])) {
  109. language_id.script = ensure_unique_string(locale_data, segments[index]);
  110. if (segments.size() == ++index)
  111. return language_id;
  112. }
  113. if (Unicode::is_unicode_region_subtag(segments[index])) {
  114. language_id.region = ensure_unique_string(locale_data, segments[index]);
  115. if (segments.size() == ++index)
  116. return language_id;
  117. }
  118. while (index < segments.size()) {
  119. if (!Unicode::is_unicode_variant_subtag(segments[index]))
  120. return {};
  121. language_id.variants.append(ensure_unique_string(locale_data, segments[index++]));
  122. }
  123. return language_id;
  124. }
  125. static Optional<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
  126. {
  127. auto parsed_key = parse_language(locale_data, key);
  128. if (!parsed_key.has_value())
  129. return {};
  130. auto parsed_alias = parse_language(locale_data, alias);
  131. if (!parsed_alias.has_value())
  132. return {};
  133. return LanguageMapping { parsed_key.release_value(), parsed_alias.release_value() };
  134. }
  135. static void parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
  136. {
  137. LexicalPath core_aliases_path(move(core_supplemental_path));
  138. core_aliases_path = core_aliases_path.append("aliases.json"sv);
  139. VERIFY(Core::File::exists(core_aliases_path.string()));
  140. auto core_aliases_file_or_error = Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly);
  141. VERIFY(!core_aliases_file_or_error.is_error());
  142. auto core_aliases = JsonParser(core_aliases_file_or_error.value()->read_all()).parse();
  143. VERIFY(core_aliases.has_value());
  144. auto const& supplemental_object = core_aliases->as_object().get("supplemental"sv);
  145. auto const& metadata_object = supplemental_object.as_object().get("metadata"sv);
  146. auto const& alias_object = metadata_object.as_object().get("alias"sv);
  147. auto append_aliases = [&](auto& alias_object, auto& alias_map) {
  148. alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  149. auto alias = value.as_object().get("_replacement"sv).as_string();
  150. if (key.contains('-')) {
  151. auto mapping = parse_language_mapping(locale_data, key, alias);
  152. if (!mapping.has_value())
  153. return;
  154. locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
  155. locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
  156. locale_data.complex_mappings.append(mapping.release_value());
  157. } else {
  158. alias_map.set(key, move(alias));
  159. }
  160. });
  161. };
  162. append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases);
  163. append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases);
  164. append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases);
  165. append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases);
  166. append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases);
  167. }
  168. static void parse_likely_subtags(String core_supplemental_path, UnicodeLocaleData& locale_data)
  169. {
  170. LexicalPath likely_subtags_path(move(core_supplemental_path));
  171. likely_subtags_path = likely_subtags_path.append("likelySubtags.json"sv);
  172. VERIFY(Core::File::exists(likely_subtags_path.string()));
  173. auto likely_subtags_file_or_error = Core::File::open(likely_subtags_path.string(), Core::OpenMode::ReadOnly);
  174. VERIFY(!likely_subtags_file_or_error.is_error());
  175. auto likely_subtags = JsonParser(likely_subtags_file_or_error.value()->read_all()).parse();
  176. VERIFY(likely_subtags.has_value());
  177. auto const& supplemental_object = likely_subtags->as_object().get("supplemental"sv);
  178. auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
  179. likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  180. auto mapping = parse_language_mapping(locale_data, key, value.as_string());
  181. if (!mapping.has_value())
  182. return;
  183. locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
  184. locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
  185. locale_data.likely_subtags.append(mapping.release_value());
  186. });
  187. }
  188. static void parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  189. {
  190. LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them.
  191. languages_path = languages_path.append("languages.json"sv);
  192. VERIFY(Core::File::exists(languages_path.string()));
  193. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  194. VERIFY(!languages_file_or_error.is_error());
  195. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  196. VERIFY(languages.has_value());
  197. auto const& main_object = languages->as_object().get("main"sv);
  198. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  199. auto const& identity_object = locale_object.as_object().get("identity"sv);
  200. auto const& language_string = identity_object.as_object().get("language"sv);
  201. auto const& territory_string = identity_object.as_object().get("territory"sv);
  202. auto const& variant_string = identity_object.as_object().get("variant"sv);
  203. locale.language = language_string.as_string();
  204. if (!locale_data.languages.contains_slow(locale.language))
  205. locale_data.languages.append(locale.language);
  206. if (territory_string.is_string()) {
  207. locale.territory = territory_string.as_string();
  208. if (!locale_data.territories.contains_slow(*locale.territory))
  209. locale_data.territories.append(*locale.territory);
  210. }
  211. if (variant_string.is_string()) {
  212. locale.variant = variant_string.as_string();
  213. if (!locale_data.variants.contains_slow(*locale.variant))
  214. locale_data.variants.append(*locale.variant);
  215. }
  216. }
  217. static void parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  218. {
  219. LexicalPath languages_path(move(locale_path));
  220. languages_path = languages_path.append("languages.json"sv);
  221. VERIFY(Core::File::exists(languages_path.string()));
  222. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  223. VERIFY(!languages_file_or_error.is_error());
  224. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  225. VERIFY(languages.has_value());
  226. auto const& main_object = languages->as_object().get("main"sv);
  227. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  228. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  229. auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
  230. languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  231. if (!locale_data.languages.contains_slow(key))
  232. return;
  233. size_t index = ensure_unique_string(locale_data, value.as_string());
  234. locale.languages.set(key, index);
  235. });
  236. }
  237. static void parse_locale_territories(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  238. {
  239. LexicalPath territories_path(move(locale_path));
  240. territories_path = territories_path.append("territories.json"sv);
  241. VERIFY(Core::File::exists(territories_path.string()));
  242. auto territories_file_or_error = Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly);
  243. VERIFY(!territories_file_or_error.is_error());
  244. auto territories = JsonParser(territories_file_or_error.value()->read_all()).parse();
  245. VERIFY(territories.has_value());
  246. auto const& main_object = territories->as_object().get("main"sv);
  247. auto const& locale_object = main_object.as_object().get(territories_path.parent().basename());
  248. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  249. auto const& territories_object = locale_display_names_object.as_object().get("territories"sv);
  250. territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  251. if (!locale_data.territories.contains_slow(key))
  252. return;
  253. size_t index = ensure_unique_string(locale_data, value.as_string());
  254. locale.territories.set(key, index);
  255. });
  256. }
  257. static void parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  258. {
  259. LexicalPath scripts_path(move(locale_path));
  260. scripts_path = scripts_path.append("scripts.json"sv);
  261. VERIFY(Core::File::exists(scripts_path.string()));
  262. auto scripts_file_or_error = Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly);
  263. VERIFY(!scripts_file_or_error.is_error());
  264. auto scripts = JsonParser(scripts_file_or_error.value()->read_all()).parse();
  265. VERIFY(scripts.has_value());
  266. auto const& main_object = scripts->as_object().get("main"sv);
  267. auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename());
  268. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  269. auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv);
  270. scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  271. size_t index = ensure_unique_string(locale_data, value.as_string());
  272. locale.scripts.set(key, index);
  273. if (!locale_data.scripts.contains_slow(key))
  274. locale_data.scripts.append(key);
  275. });
  276. }
  277. static void parse_locale_list_patterns(String misc_path, UnicodeLocaleData& locale_data, Locale& locale)
  278. {
  279. LexicalPath list_patterns_path(move(misc_path));
  280. list_patterns_path = list_patterns_path.append("listPatterns.json"sv);
  281. VERIFY(Core::File::exists(list_patterns_path.string()));
  282. auto list_patterns_file_or_error = Core::File::open(list_patterns_path.string(), Core::OpenMode::ReadOnly);
  283. VERIFY(!list_patterns_file_or_error.is_error());
  284. auto list_patterns = JsonParser(list_patterns_file_or_error.value()->read_all()).parse();
  285. VERIFY(list_patterns.has_value());
  286. auto const& main_object = list_patterns->as_object().get("main"sv);
  287. auto const& locale_object = main_object.as_object().get(list_patterns_path.parent().basename());
  288. auto const& list_patterns_object = locale_object.as_object().get("listPatterns"sv);
  289. auto list_pattern_type = [](StringView key) {
  290. if (key.contains("type-standard"sv))
  291. return "conjunction"sv;
  292. if (key.contains("type-or"sv))
  293. return "disjunction"sv;
  294. if (key.contains("type-unit"sv))
  295. return "unit"sv;
  296. VERIFY_NOT_REACHED();
  297. };
  298. auto list_pattern_style = [](StringView key) {
  299. if (key.contains("short"sv))
  300. return "short"sv;
  301. if (key.contains("narrow"sv))
  302. return "narrow"sv;
  303. return "long"sv;
  304. };
  305. list_patterns_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  306. auto type = list_pattern_type(key);
  307. auto style = list_pattern_style(key);
  308. auto start = ensure_unique_string(locale_data, value.as_object().get("start"sv).as_string());
  309. auto middle = ensure_unique_string(locale_data, value.as_object().get("middle"sv).as_string());
  310. auto end = ensure_unique_string(locale_data, value.as_object().get("end"sv).as_string());
  311. auto pair = ensure_unique_string(locale_data, value.as_object().get("2"sv).as_string());
  312. if (!locale_data.list_pattern_types.contains_slow(type))
  313. locale_data.list_pattern_types.append(type);
  314. if (!locale_data.list_pattern_styles.contains_slow(style))
  315. locale_data.list_pattern_styles.append(style);
  316. locale.list_patterns.append({ move(type), move(style), move(start), move(middle), move(end), move(pair) });
  317. });
  318. }
  319. static void parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  320. {
  321. LexicalPath currencies_path(move(numbers_path));
  322. currencies_path = currencies_path.append("currencies.json"sv);
  323. VERIFY(Core::File::exists(currencies_path.string()));
  324. auto currencies_file_or_error = Core::File::open(currencies_path.string(), Core::OpenMode::ReadOnly);
  325. VERIFY(!currencies_file_or_error.is_error());
  326. auto currencies = JsonParser(currencies_file_or_error.value()->read_all()).parse();
  327. VERIFY(currencies.has_value());
  328. auto const& main_object = currencies->as_object().get("main"sv);
  329. auto const& locale_object = main_object.as_object().get(currencies_path.parent().basename());
  330. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  331. auto const& currencies_object = locale_numbers_object.as_object().get("currencies"sv);
  332. currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  333. auto const& display_name = value.as_object().get("displayName"sv);
  334. size_t index = ensure_unique_string(locale_data, display_name.as_string());
  335. locale.currencies.set(key, index);
  336. if (!locale_data.currencies.contains_slow(key))
  337. locale_data.currencies.append(key);
  338. });
  339. }
  340. static void parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  341. {
  342. static constexpr StringView key = "nu"sv;
  343. LexicalPath numbers_path(move(locale_numbers_path));
  344. numbers_path = numbers_path.append("numbers.json"sv);
  345. VERIFY(Core::File::exists(numbers_path.string()));
  346. auto numbers_file_or_error = Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly);
  347. VERIFY(!numbers_file_or_error.is_error());
  348. auto numbers = JsonParser(numbers_file_or_error.value()->read_all()).parse();
  349. VERIFY(numbers.has_value());
  350. auto const& main_object = numbers->as_object().get("main"sv);
  351. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  352. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  353. auto const& default_numbering_system_object = locale_numbers_object.as_object().get("defaultNumberingSystem"sv);
  354. auto const& other_numbering_systems_object = locale_numbers_object.as_object().get("otherNumberingSystems"sv);
  355. Vector<String> keyword_values {};
  356. keyword_values.append(default_numbering_system_object.as_string());
  357. other_numbering_systems_object.as_object().for_each_member([&](auto const&, JsonValue const& value) {
  358. auto keyword_value = value.as_string();
  359. if (!keyword_values.contains_slow(keyword_value))
  360. keyword_values.append(move(keyword_value));
  361. });
  362. StringBuilder builder;
  363. builder.join(',', keyword_values);
  364. auto index = ensure_unique_string(locale_data, builder.build());
  365. locale.keywords.set(key, index);
  366. if (!locale_data.keywords.contains_slow(key))
  367. locale_data.keywords.append(key);
  368. }
  369. static Core::DirIterator path_to_dir_iterator(String path)
  370. {
  371. LexicalPath lexical_path(move(path));
  372. lexical_path = lexical_path.append("main"sv);
  373. VERIFY(Core::File::is_directory(lexical_path.string()));
  374. Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir);
  375. if (iterator.has_error()) {
  376. warnln("{}: {}", lexical_path.string(), iterator.error_string());
  377. VERIFY_NOT_REACHED();
  378. }
  379. return iterator;
  380. }
  381. static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
  382. {
  383. auto identity_iterator = path_to_dir_iterator(locale_names_path);
  384. auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path));
  385. auto misc_iterator = path_to_dir_iterator(move(misc_path));
  386. auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
  387. LexicalPath core_supplemental_path(move(core_path));
  388. core_supplemental_path = core_supplemental_path.append("supplemental"sv);
  389. VERIFY(Core::File::is_directory(core_supplemental_path.string()));
  390. parse_core_aliases(core_supplemental_path.string(), locale_data);
  391. parse_likely_subtags(core_supplemental_path.string(), locale_data);
  392. auto remove_variants_from_path = [&](String path) -> Optional<String> {
  393. auto parsed_locale = parse_language(locale_data, LexicalPath::basename(path));
  394. if (!parsed_locale.has_value())
  395. return {};
  396. StringBuilder builder;
  397. builder.append(get_unique_string(locale_data, parsed_locale->language));
  398. if (auto script = get_unique_string(locale_data, parsed_locale->script); !script.is_empty())
  399. builder.appendff("-{}", script);
  400. if (auto region = get_unique_string(locale_data, parsed_locale->region); !region.is_empty())
  401. builder.appendff("-{}", region);
  402. return builder.build();
  403. };
  404. while (identity_iterator.has_next()) {
  405. auto locale_path = identity_iterator.next_full_path();
  406. VERIFY(Core::File::is_directory(locale_path));
  407. auto language = remove_variants_from_path(locale_path);
  408. if (!language.has_value())
  409. continue;
  410. auto& locale = locale_data.locales.ensure(*language);
  411. parse_identity(locale_path, locale_data, locale);
  412. }
  413. while (locale_names_iterator.has_next()) {
  414. auto locale_path = locale_names_iterator.next_full_path();
  415. VERIFY(Core::File::is_directory(locale_path));
  416. auto language = remove_variants_from_path(locale_path);
  417. if (!language.has_value())
  418. continue;
  419. auto& locale = locale_data.locales.ensure(*language);
  420. parse_locale_languages(locale_path, locale_data, locale);
  421. parse_locale_territories(locale_path, locale_data, locale);
  422. parse_locale_scripts(locale_path, locale_data, locale);
  423. }
  424. while (misc_iterator.has_next()) {
  425. auto misc_path = misc_iterator.next_full_path();
  426. VERIFY(Core::File::is_directory(misc_path));
  427. auto language = remove_variants_from_path(misc_path);
  428. if (!language.has_value())
  429. continue;
  430. auto& locale = locale_data.locales.ensure(*language);
  431. parse_locale_list_patterns(misc_path, locale_data, locale);
  432. }
  433. while (numbers_iterator.has_next()) {
  434. auto numbers_path = numbers_iterator.next_full_path();
  435. VERIFY(Core::File::is_directory(numbers_path));
  436. auto language = remove_variants_from_path(numbers_path);
  437. if (!language.has_value())
  438. continue;
  439. auto& locale = locale_data.locales.ensure(*language);
  440. parse_locale_currencies(numbers_path, locale_data, locale);
  441. parse_numeric_keywords(numbers_path, locale_data, locale);
  442. }
  443. }
  444. static String format_identifier(StringView owner, String identifier)
  445. {
  446. identifier = identifier.replace("-"sv, "_"sv, true);
  447. if (all_of(identifier, is_ascii_digit))
  448. return String::formatted("{}_{}", owner[0], identifier);
  449. return identifier.to_titlecase();
  450. }
  451. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  452. {
  453. StringBuilder builder;
  454. SourceGenerator generator { builder };
  455. auto generate_enum = [&](StringView name, StringView default_, Vector<String>& values) {
  456. quick_sort(values);
  457. generator.set("name", name);
  458. generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv);
  459. generator.append(R"~~~(
  460. enum class @name@ : @underlying@ {)~~~");
  461. if (!default_.is_empty()) {
  462. generator.set("default", default_);
  463. generator.append(R"~~~(
  464. @default@,)~~~");
  465. }
  466. for (auto const& value : values) {
  467. generator.set("value", format_identifier(name, value));
  468. generator.append(R"~~~(
  469. @value@,)~~~");
  470. }
  471. generator.append(R"~~~(
  472. };
  473. )~~~");
  474. };
  475. generator.append(R"~~~(
  476. #pragma once
  477. #include <AK/Optional.h>
  478. #include <AK/StringView.h>
  479. #include <AK/Types.h>
  480. #include <LibUnicode/Forward.h>
  481. namespace Unicode {
  482. )~~~");
  483. auto locales = locale_data.locales.keys();
  484. generate_enum("Locale"sv, "None"sv, locales);
  485. generate_enum("Language"sv, {}, locale_data.languages);
  486. generate_enum("Territory"sv, {}, locale_data.territories);
  487. generate_enum("ScriptTag"sv, {}, locale_data.scripts);
  488. generate_enum("Currency"sv, {}, locale_data.currencies);
  489. generate_enum("Key"sv, {}, locale_data.keywords);
  490. generate_enum("Variant"sv, {}, locale_data.variants);
  491. generate_enum("ListPatternType"sv, {}, locale_data.list_pattern_types);
  492. generate_enum("ListPatternStyle"sv, {}, locale_data.list_pattern_styles);
  493. generator.append(R"~~~(
  494. namespace Detail {
  495. Optional<Locale> locale_from_string(StringView const& locale);
  496. Optional<StringView> get_locale_language_mapping(StringView locale, StringView language);
  497. Optional<Language> language_from_string(StringView const& language);
  498. Optional<StringView> resolve_language_alias(StringView const& language);
  499. Optional<StringView> get_locale_territory_mapping(StringView locale, StringView territory);
  500. Optional<Territory> territory_from_string(StringView const& territory);
  501. Optional<StringView> resolve_territory_alias(StringView const& territory);
  502. Optional<StringView> get_locale_script_tag_mapping(StringView locale, StringView script_tag);
  503. Optional<ScriptTag> script_tag_from_string(StringView const& script_tag);
  504. Optional<StringView> resolve_script_tag_alias(StringView const& script_tag);
  505. Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency);
  506. Optional<Currency> currency_from_string(StringView const& currency);
  507. Optional<StringView> get_locale_key_mapping(StringView locale, StringView key);
  508. Optional<Key> key_from_string(StringView const& key);
  509. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style);
  510. Optional<ListPatternType> list_pattern_type_from_string(StringView const& list_pattern_type);
  511. Optional<ListPatternStyle> list_pattern_style_from_string(StringView const& list_pattern_style);
  512. Optional<StringView> resolve_variant_alias(StringView const& variant);
  513. Optional<StringView> resolve_subdivision_alias(StringView const& subdivision);
  514. void resolve_complex_language_aliases(Unicode::LanguageID& language_id);
  515. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id);
  516. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id);
  517. }
  518. }
  519. )~~~");
  520. file.write(generator.as_string_view());
  521. }
  522. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  523. {
  524. StringBuilder builder;
  525. SourceGenerator generator { builder };
  526. generator.set("strings_size"sv, String::number(locale_data.unique_strings.size()));
  527. generator.set("locales_size"sv, String::number(locale_data.locales.size()));
  528. generator.set("territories_size", String::number(locale_data.territories.size()));
  529. generator.set("variants_size", String::number(locale_data.max_variant_size));
  530. generator.append(R"~~~(
  531. #include <AK/Array.h>
  532. #include <AK/HashMap.h>
  533. #include <AK/Span.h>
  534. #include <LibUnicode/Locale.h>
  535. #include <LibUnicode/UnicodeLocale.h>
  536. namespace Unicode {
  537. struct Patterns {
  538. ListPatternType type;
  539. ListPatternStyle style;
  540. size_t start { 0 };
  541. size_t middle { 0 };
  542. size_t end { 0 };
  543. size_t pair { 0 };
  544. };
  545. )~~~");
  546. generator.append(R"~~~(
  547. static constexpr Array<StringView, @strings_size@ + 1> s_string_list { {
  548. {})~~~");
  549. constexpr size_t max_strings_per_row = 30;
  550. size_t strings_in_current_row = 1;
  551. for (auto const& string : locale_data.unique_strings) {
  552. if (strings_in_current_row++ > 0)
  553. generator.append(", ");
  554. generator.append(String::formatted("\"{}\"sv", string));
  555. if (strings_in_current_row == max_strings_per_row) {
  556. strings_in_current_row = 0;
  557. generator.append(",\n ");
  558. }
  559. }
  560. generator.append(R"~~~(
  561. } };
  562. )~~~");
  563. auto format_mapping_name = [](StringView format, StringView name) {
  564. auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true);
  565. return String::formatted(format, mapping_name);
  566. };
  567. auto append_index = [&](size_t index) {
  568. generator.append(String::formatted(", {}", index));
  569. };
  570. auto append_list_and_size = [&](auto const& list) {
  571. if (list.is_empty()) {
  572. generator.append(", {}, 0");
  573. return;
  574. }
  575. bool first = true;
  576. generator.append(", {");
  577. for (auto const& item : list) {
  578. generator.append(first ? " " : ", ");
  579. generator.append(String::number(item));
  580. first = false;
  581. }
  582. generator.append(String::formatted(" }}, {}", list.size()));
  583. };
  584. auto append_string_index_list = [&](String name, auto const& keys, auto const& mappings) {
  585. generator.set("name", name);
  586. generator.set("size", String::number(keys.size()));
  587. generator.append(R"~~~(
  588. static constexpr Array<size_t, @size@> @name@ { {
  589. )~~~");
  590. constexpr size_t max_values_per_row = 30;
  591. size_t values_in_current_row = 0;
  592. for (auto const& key : keys) {
  593. if (values_in_current_row++ > 0)
  594. generator.append(" ");
  595. if (auto it = mappings.find(key); it != mappings.end())
  596. generator.set("mapping"sv, String::number(it->value));
  597. else
  598. generator.set("mapping"sv, "0"sv);
  599. generator.append("@mapping@,");
  600. if (values_in_current_row == max_values_per_row) {
  601. values_in_current_row = 0;
  602. generator.append("\n ");
  603. }
  604. }
  605. generator.append(R"~~~(
  606. } };
  607. )~~~");
  608. };
  609. auto append_list_patterns = [&](StringView name, Vector<ListPatterns> const& list_patterns) {
  610. generator.set("name", name);
  611. generator.set("size", String::number(list_patterns.size()));
  612. generator.append(R"~~~(
  613. static constexpr Array<Patterns, @size@> @name@ { {)~~~");
  614. for (auto const& list_pattern : list_patterns) {
  615. generator.set("type"sv, String::formatted("ListPatternType::{}", format_identifier({}, list_pattern.type)));
  616. generator.set("style"sv, String::formatted("ListPatternStyle::{}", format_identifier({}, list_pattern.style)));
  617. generator.set("start"sv, String::number(list_pattern.start));
  618. generator.set("middle"sv, String::number(list_pattern.middle));
  619. generator.set("end"sv, String::number(list_pattern.end));
  620. generator.set("pair"sv, String::number(list_pattern.pair));
  621. generator.append(R"~~~(
  622. { @type@, @style@, @start@, @middle@, @end@, @pair@ },)~~~");
  623. }
  624. generator.append(R"~~~(
  625. } };
  626. )~~~");
  627. };
  628. auto append_mapping = [&](StringView type, StringView name, StringView format, auto format_list_callback) {
  629. Vector<String> mapping_names;
  630. for (auto const& locale : locale_data.locales) {
  631. auto mapping_name = format_mapping_name(format, locale.key);
  632. format_list_callback(mapping_name, locale.value);
  633. mapping_names.append(move(mapping_name));
  634. }
  635. quick_sort(mapping_names);
  636. generator.set("type", type);
  637. generator.set("name", name);
  638. generator.set("size", String::number(locale_data.locales.size()));
  639. generator.append(R"~~~(
  640. static constexpr Array<Span<@type@ const>, @size@> @name@ { {
  641. )~~~");
  642. constexpr size_t max_values_per_row = 10;
  643. size_t values_in_current_row = 0;
  644. for (auto& mapping_name : mapping_names) {
  645. if (values_in_current_row++ > 0)
  646. generator.append(" ");
  647. generator.set("name", move(mapping_name));
  648. generator.append("@name@.span(),");
  649. if (values_in_current_row == max_values_per_row) {
  650. values_in_current_row = 0;
  651. generator.append("\n ");
  652. }
  653. }
  654. generator.append(R"~~~(
  655. } };
  656. )~~~");
  657. };
  658. append_mapping("size_t"sv, "s_languages"sv, "s_languages_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.languages, value.languages); });
  659. append_mapping("size_t"sv, "s_territories"sv, "s_territories_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.territories, value.territories); });
  660. append_mapping("size_t"sv, "s_scripts"sv, "s_scripts_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.scripts, value.scripts); });
  661. append_mapping("size_t"sv, "s_currencies"sv, "s_currencies_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.currencies, value.currencies); });
  662. append_mapping("size_t"sv, "s_keywords"sv, "s_keywords_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.keywords, value.keywords); });
  663. append_mapping("Patterns"sv, "s_list_patterns"sv, "s_list_patterns_{}", [&](auto const& name, auto const& value) { append_list_patterns(name, value.list_patterns); });
  664. generator.append(R"~~~(
  665. struct CanonicalLanguageID {
  666. Unicode::LanguageID to_unicode_language_id() const
  667. {
  668. Unicode::LanguageID language_id {};
  669. language_id.variants.ensure_capacity(variants_size);
  670. language_id.language = s_string_list[language];
  671. if (script != 0)
  672. language_id.script = s_string_list[script];
  673. if (region != 0)
  674. language_id.region = s_string_list[region];
  675. for (size_t i = 0; i < variants_size; ++i)
  676. language_id.variants.append(s_string_list[variants[i]]);
  677. return language_id;
  678. }
  679. bool matches_variants(Vector<String> const& other_variants) const {
  680. if (variants_size == 0)
  681. return true;
  682. if (other_variants.size() != variants_size)
  683. return false;
  684. for (size_t i = 0; i < variants_size; ++i) {
  685. if (s_string_list[variants[i]] != other_variants[i])
  686. return false;
  687. }
  688. return true;
  689. };
  690. size_t language { 0 };
  691. size_t script { 0 };
  692. size_t region { 0 };
  693. Array<size_t, @variants_size@> variants {};
  694. size_t variants_size { 0 };
  695. };
  696. struct LanguageMapping {
  697. CanonicalLanguageID key;
  698. CanonicalLanguageID alias;
  699. };
  700. )~~~");
  701. auto append_complex_mapping = [&](StringView name, auto& mappings) {
  702. generator.set("size", String::number(mappings.size()));
  703. generator.set("name"sv, name);
  704. generator.append(R"~~~(
  705. static constexpr Array<LanguageMapping, @size@> s_@name@ { {
  706. )~~~");
  707. quick_sort(mappings, [&](auto const& lhs, auto const& rhs) {
  708. auto const& lhs_language = get_unique_string(locale_data, lhs.key.language);
  709. auto const& rhs_language = get_unique_string(locale_data, rhs.key.language);
  710. // Sort the keys such that "und" language tags are at the end, as those are less specific.
  711. if (lhs_language.starts_with("und"sv) && !rhs_language.starts_with("und"sv))
  712. return false;
  713. if (!lhs_language.starts_with("und"sv) && rhs_language.starts_with("und"sv))
  714. return true;
  715. return lhs_language < rhs_language;
  716. });
  717. for (auto const& mapping : mappings) {
  718. generator.set("language"sv, String::number(mapping.key.language));
  719. generator.append(" { { @language@");
  720. append_index(mapping.key.script);
  721. append_index(mapping.key.region);
  722. append_list_and_size(mapping.key.variants);
  723. generator.set("language"sv, String::number(mapping.alias.language));
  724. generator.append(" }, { @language@");
  725. append_index(mapping.alias.script);
  726. append_index(mapping.alias.region);
  727. append_list_and_size(mapping.alias.variants);
  728. generator.append(" } },\n");
  729. }
  730. generator.append("} };\n");
  731. };
  732. append_complex_mapping("complex_alias"sv, locale_data.complex_mappings);
  733. append_complex_mapping("likely_subtags"sv, locale_data.likely_subtags);
  734. generator.append(R"~~~(
  735. static LanguageMapping const* resolve_likely_subtag(Unicode::LanguageID const& language_id)
  736. {
  737. // https://unicode.org/reports/tr35/#Likely_Subtags
  738. enum class State {
  739. LanguageScriptRegion,
  740. LanguageRegion,
  741. LanguageScript,
  742. Language,
  743. UndScript,
  744. Done,
  745. };
  746. auto state = State::LanguageScriptRegion;
  747. while (state != State::Done) {
  748. Unicode::LanguageID search_key;
  749. switch (state) {
  750. case State::LanguageScriptRegion:
  751. state = State::LanguageRegion;
  752. if (!language_id.script.has_value() || !language_id.region.has_value())
  753. continue;
  754. search_key.language = *language_id.language;
  755. search_key.script = *language_id.script;
  756. search_key.region = *language_id.region;
  757. break;
  758. case State::LanguageRegion:
  759. state = State::LanguageScript;
  760. if (!language_id.region.has_value())
  761. continue;
  762. search_key.language = *language_id.language;
  763. search_key.region = *language_id.region;
  764. break;
  765. case State::LanguageScript:
  766. state = State::Language;
  767. if (!language_id.script.has_value())
  768. continue;
  769. search_key.language = *language_id.language;
  770. search_key.script = *language_id.script;
  771. break;
  772. case State::Language:
  773. state = State::UndScript;
  774. search_key.language = *language_id.language;
  775. break;
  776. case State::UndScript:
  777. state = State::Done;
  778. if (!language_id.script.has_value())
  779. continue;
  780. search_key.language = "und"sv;
  781. search_key.script = *language_id.script;
  782. break;
  783. default:
  784. VERIFY_NOT_REACHED();
  785. }
  786. for (auto const& map : s_likely_subtags) {
  787. auto const& key_language = s_string_list[map.key.language];
  788. auto const& key_script = s_string_list[map.key.script];
  789. auto const& key_region = s_string_list[map.key.region];
  790. if (key_language != search_key.language)
  791. continue;
  792. if (!key_script.is_empty() || search_key.script.has_value()) {
  793. if (key_script != search_key.script)
  794. continue;
  795. }
  796. if (!key_region.is_empty() || search_key.region.has_value()) {
  797. if (key_region != search_key.region)
  798. continue;
  799. }
  800. return &map;
  801. }
  802. }
  803. return nullptr;
  804. }
  805. namespace Detail {
  806. )~~~");
  807. auto append_mapping_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) {
  808. generator.set("enum_title", enum_title);
  809. generator.set("enum_snake", enum_snake);
  810. generator.set("collection_name", collection_name);
  811. generator.append(R"~~~(
  812. Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@)
  813. {
  814. auto locale_value = locale_from_string(locale);
  815. if (!locale_value.has_value())
  816. return {};
  817. auto @enum_snake@_value = @enum_snake@_from_string(@enum_snake@);
  818. if (!@enum_snake@_value.has_value())
  819. return {};
  820. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  821. auto @enum_snake@_index = to_underlying(*@enum_snake@_value);
  822. auto const& mappings = @collection_name@.at(locale_index);
  823. auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
  824. auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
  825. if (@enum_snake@_mapping.is_empty())
  826. return {};
  827. return @enum_snake@_mapping;
  828. }
  829. )~~~");
  830. };
  831. auto append_from_string = [&](StringView enum_title, StringView enum_snake, Vector<String> const& values) {
  832. generator.set("enum_title", enum_title);
  833. generator.set("enum_snake", enum_snake);
  834. generator.append(R"~~~(
  835. Optional<@enum_title@> @enum_snake@_from_string(StringView const& @enum_snake@)
  836. {
  837. static HashMap<StringView, @enum_title@> @enum_snake@_values { {)~~~");
  838. for (auto const& value : values) {
  839. generator.set("key"sv, value);
  840. generator.set("value"sv, format_identifier(enum_title, value));
  841. generator.append(R"~~~(
  842. { "@key@"sv, @enum_title@::@value@ },)~~~");
  843. }
  844. generator.append(R"~~~(
  845. } };
  846. if (auto value = @enum_snake@_values.get(@enum_snake@); value.has_value())
  847. return value.value();
  848. return {};
  849. }
  850. )~~~");
  851. };
  852. auto append_alias_search = [&](StringView enum_snake, HashMap<String, String> const& aliases) {
  853. generator.set("enum_snake", enum_snake);
  854. generator.append(R"~~~(
  855. Optional<StringView> resolve_@enum_snake@_alias(StringView const& @enum_snake@)
  856. {
  857. static HashMap<StringView, StringView> @enum_snake@_aliases { {
  858. )~~~");
  859. constexpr size_t max_values_per_row = 10;
  860. size_t values_in_current_row = 0;
  861. for (auto const& alias : aliases) {
  862. if (values_in_current_row++ > 0)
  863. generator.append(" ");
  864. generator.set("key"sv, alias.key);
  865. generator.set("alias"sv, alias.value);
  866. generator.append("{ \"@key@\"sv, \"@alias@\"sv },");
  867. if (values_in_current_row == max_values_per_row) {
  868. generator.append("\n ");
  869. values_in_current_row = 0;
  870. }
  871. }
  872. generator.append(R"~~~(
  873. } };
  874. if (auto alias = @enum_snake@_aliases.get(@enum_snake@); alias.has_value())
  875. return alias.value();
  876. return {};
  877. }
  878. )~~~");
  879. };
  880. append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys());
  881. append_mapping_search("Language"sv, "language"sv, "s_languages"sv);
  882. append_from_string("Language"sv, "language"sv, locale_data.languages);
  883. append_alias_search("language"sv, locale_data.language_aliases);
  884. append_mapping_search("Territory"sv, "territory"sv, "s_territories"sv);
  885. append_from_string("Territory"sv, "territory"sv, locale_data.territories);
  886. append_alias_search("territory"sv, locale_data.territory_aliases);
  887. append_mapping_search("ScriptTag"sv, "script_tag"sv, "s_scripts"sv);
  888. append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts);
  889. append_alias_search("script_tag"sv, locale_data.script_aliases);
  890. append_mapping_search("Currency"sv, "currency"sv, "s_currencies"sv);
  891. append_from_string("Currency"sv, "currency"sv, locale_data.currencies);
  892. append_mapping_search("Key"sv, "key"sv, "s_keywords"sv);
  893. append_from_string("Key"sv, "key"sv, locale_data.keywords);
  894. append_alias_search("variant"sv, locale_data.variant_aliases);
  895. append_alias_search("subdivision"sv, locale_data.subdivision_aliases);
  896. append_from_string("ListPatternType"sv, "list_pattern_type"sv, locale_data.list_pattern_types);
  897. append_from_string("ListPatternStyle"sv, "list_pattern_style"sv, locale_data.list_pattern_styles);
  898. generator.append(R"~~~(
  899. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style)
  900. {
  901. auto locale_value = locale_from_string(locale);
  902. if (!locale_value.has_value())
  903. return {};
  904. auto type_value = list_pattern_type_from_string(list_pattern_type);
  905. if (!type_value.has_value())
  906. return {};
  907. auto style_value = list_pattern_style_from_string(list_pattern_style);
  908. if (!style_value.has_value())
  909. return {};
  910. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  911. auto const& locale_list_patterns = s_list_patterns.at(locale_index);
  912. for (auto const& list_patterns : locale_list_patterns) {
  913. if ((list_patterns.type == type_value) && (list_patterns.style == style_value)) {
  914. auto const& start = s_string_list[list_patterns.start];
  915. auto const& middle = s_string_list[list_patterns.middle];
  916. auto const& end = s_string_list[list_patterns.end];
  917. auto const& pair = s_string_list[list_patterns.pair];
  918. return ListPatterns { start, middle, end, pair };
  919. }
  920. }
  921. return {};
  922. }
  923. void resolve_complex_language_aliases(Unicode::LanguageID& language_id)
  924. {
  925. for (auto const& map : s_complex_alias) {
  926. auto const& key_language = s_string_list[map.key.language];
  927. auto const& key_script = s_string_list[map.key.script];
  928. auto const& key_region = s_string_list[map.key.region];
  929. if ((key_language != language_id.language) && (key_language != "und"sv))
  930. continue;
  931. if (!key_script.is_empty() && (key_script != language_id.script))
  932. continue;
  933. if (!key_region.is_empty() && (key_region != language_id.region))
  934. continue;
  935. if (!map.key.matches_variants(language_id.variants))
  936. continue;
  937. auto alias = map.alias.to_unicode_language_id();
  938. if (alias.language == "und"sv)
  939. alias.language = move(language_id.language);
  940. if (key_script.is_empty() && !alias.script.has_value())
  941. alias.script = move(language_id.script);
  942. if (key_region.is_empty() && !alias.region.has_value())
  943. alias.region = move(language_id.region);
  944. if (map.key.variants_size == 0 && alias.variants.is_empty())
  945. alias.variants = move(language_id.variants);
  946. language_id = move(alias);
  947. break;
  948. }
  949. }
  950. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id)
  951. {
  952. // https://www.unicode.org/reports/tr35/#Likely_Subtags
  953. auto const* likely_subtag = resolve_likely_subtag(language_id);
  954. if (likely_subtag == nullptr)
  955. return {};
  956. auto maximized = language_id;
  957. auto const& key_script = s_string_list[likely_subtag->key.script];
  958. auto const& key_region = s_string_list[likely_subtag->key.region];
  959. auto const& alias_language = s_string_list[likely_subtag->alias.language];
  960. auto const& alias_script = s_string_list[likely_subtag->alias.script];
  961. auto const& alias_region = s_string_list[likely_subtag->alias.region];
  962. if (maximized.language == "und"sv)
  963. maximized.language = alias_language;
  964. if (!maximized.script.has_value() || (!key_script.is_empty() && !alias_script.is_empty()))
  965. maximized.script = alias_script;
  966. if (!maximized.region.has_value() || (!key_region.is_empty() && !alias_region.is_empty()))
  967. maximized.region = alias_region;
  968. return maximized;
  969. }
  970. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id)
  971. {
  972. if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
  973. return s_string_list[likely_subtag->alias.region];
  974. return {};
  975. }
  976. }
  977. }
  978. )~~~");
  979. file.write(generator.as_string_view());
  980. }
  981. int main(int argc, char** argv)
  982. {
  983. char const* generated_header_path = nullptr;
  984. char const* generated_implementation_path = nullptr;
  985. char const* core_path = nullptr;
  986. char const* locale_names_path = nullptr;
  987. char const* misc_path = nullptr;
  988. char const* numbers_path = nullptr;
  989. Core::ArgsParser args_parser;
  990. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  991. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  992. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  993. args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path");
  994. args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path");
  995. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  996. args_parser.parse(argc, argv);
  997. auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
  998. if (path.is_empty()) {
  999. warnln("{} is required", flags);
  1000. args_parser.print_usage(stderr, argv[0]);
  1001. exit(1);
  1002. }
  1003. auto file_or_error = Core::File::open(path, mode);
  1004. if (file_or_error.is_error()) {
  1005. warnln("Failed to open {}: {}", path, file_or_error.release_error());
  1006. exit(1);
  1007. }
  1008. return file_or_error.release_value();
  1009. };
  1010. auto generated_header_file = open_file(generated_header_path, "-h/--generated-header-path", Core::OpenMode::ReadWrite);
  1011. auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite);
  1012. UnicodeLocaleData locale_data;
  1013. parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, locale_data);
  1014. generate_unicode_locale_header(generated_header_file, locale_data);
  1015. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  1016. return 0;
  1017. }