GenerateUnicodeLocale.cpp 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AllOf.h>
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Format.h>
  10. #include <AK/HashMap.h>
  11. #include <AK/JsonObject.h>
  12. #include <AK/JsonParser.h>
  13. #include <AK/JsonValue.h>
  14. #include <AK/LexicalPath.h>
  15. #include <AK/QuickSort.h>
  16. #include <AK/SourceGenerator.h>
  17. #include <AK/String.h>
  18. #include <AK/StringBuilder.h>
  19. #include <LibCore/ArgsParser.h>
  20. #include <LibCore/DirIterator.h>
  21. #include <LibCore/File.h>
  22. #include <LibUnicode/Locale.h>
  23. using StringIndexType = u16;
  24. constexpr auto s_string_index_type = "u16"sv;
  25. struct NumberSystem {
  26. StringIndexType system { 0 };
  27. HashMap<String, StringIndexType> symbols {};
  28. };
  29. struct ListPatterns {
  30. String type;
  31. String style;
  32. StringIndexType start { 0 };
  33. StringIndexType middle { 0 };
  34. StringIndexType end { 0 };
  35. StringIndexType pair { 0 };
  36. };
  37. struct Locale {
  38. String language;
  39. Optional<String> territory;
  40. Optional<String> variant;
  41. HashMap<String, StringIndexType> languages;
  42. HashMap<String, StringIndexType> territories;
  43. HashMap<String, StringIndexType> scripts;
  44. HashMap<String, StringIndexType> currencies;
  45. HashMap<String, StringIndexType> keywords;
  46. HashMap<String, NumberSystem> number_systems;
  47. Vector<ListPatterns> list_patterns;
  48. };
  49. struct CanonicalLanguageID {
  50. StringIndexType language { 0 };
  51. StringIndexType script { 0 };
  52. StringIndexType region { 0 };
  53. Vector<StringIndexType> variants {};
  54. };
  55. struct LanguageMapping {
  56. CanonicalLanguageID key {};
  57. CanonicalLanguageID alias {};
  58. };
  59. struct UnicodeLocaleData {
  60. Vector<String> unique_strings;
  61. HashMap<StringView, StringIndexType> unique_string_indices;
  62. HashMap<String, Locale> locales;
  63. Vector<String> languages;
  64. Vector<String> territories;
  65. Vector<String> scripts;
  66. Vector<String> variants;
  67. Vector<String> currencies;
  68. Vector<String> keywords;
  69. Vector<String> numeric_symbols;
  70. Vector<String> list_pattern_types;
  71. Vector<String> list_pattern_styles;
  72. HashMap<String, StringIndexType> language_aliases;
  73. HashMap<String, StringIndexType> territory_aliases;
  74. HashMap<String, StringIndexType> script_aliases;
  75. HashMap<String, StringIndexType> variant_aliases;
  76. HashMap<String, StringIndexType> subdivision_aliases;
  77. Vector<LanguageMapping> complex_mappings;
  78. Vector<LanguageMapping> likely_subtags;
  79. size_t max_variant_size { 0 };
  80. };
  81. static StringIndexType ensure_unique_string(UnicodeLocaleData& locale_data, String string)
  82. {
  83. // We maintain a set of unique strings in two structures: a vector which owns the unique string,
  84. // and a hash map which maps that string to its index in the vector. The vector is to ensure the
  85. // strings are generated in an easily known order, and the map is to allow quickly deciding if a
  86. // string is actually unique (otherwise, we'd have to linear-search the vector for each string).
  87. //
  88. // Also note that index 0 will be reserved for the empty string, so the index returned from this
  89. // method is actually the real index in the vector + 1.
  90. if (auto index = locale_data.unique_string_indices.get(string); index.has_value())
  91. return *index;
  92. locale_data.unique_strings.append(move(string));
  93. size_t index = locale_data.unique_strings.size();
  94. // There are currently on the order of 46K unique strings in UnicodeLocale.cpp.
  95. // If that number reaches 2^16, bump the StringIndexType alias to a u32.
  96. VERIFY(index < NumericLimits<StringIndexType>::max());
  97. auto string_index = static_cast<StringIndexType>(index);
  98. locale_data.unique_string_indices.set(locale_data.unique_strings.last(), string_index);
  99. return string_index;
  100. }
  101. static StringView get_unique_string(UnicodeLocaleData& locale_data, StringIndexType index)
  102. {
  103. if (index == 0)
  104. return {};
  105. VERIFY(index <= locale_data.unique_strings.size());
  106. return locale_data.unique_strings.at(index - 1);
  107. }
  108. static Optional<CanonicalLanguageID> parse_language(UnicodeLocaleData& locale_data, StringView language)
  109. {
  110. CanonicalLanguageID language_id {};
  111. auto segments = language.split_view('-');
  112. VERIFY(!segments.is_empty());
  113. size_t index = 0;
  114. if (Unicode::is_unicode_language_subtag(segments[index])) {
  115. language_id.language = ensure_unique_string(locale_data, segments[index]);
  116. if (segments.size() == ++index)
  117. return language_id;
  118. } else {
  119. return {};
  120. }
  121. if (Unicode::is_unicode_script_subtag(segments[index])) {
  122. language_id.script = ensure_unique_string(locale_data, segments[index]);
  123. if (segments.size() == ++index)
  124. return language_id;
  125. }
  126. if (Unicode::is_unicode_region_subtag(segments[index])) {
  127. language_id.region = ensure_unique_string(locale_data, segments[index]);
  128. if (segments.size() == ++index)
  129. return language_id;
  130. }
  131. while (index < segments.size()) {
  132. if (!Unicode::is_unicode_variant_subtag(segments[index]))
  133. return {};
  134. language_id.variants.append(ensure_unique_string(locale_data, segments[index++]));
  135. }
  136. return language_id;
  137. }
  138. static Optional<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
  139. {
  140. auto parsed_key = parse_language(locale_data, key);
  141. if (!parsed_key.has_value())
  142. return {};
  143. auto parsed_alias = parse_language(locale_data, alias);
  144. if (!parsed_alias.has_value())
  145. return {};
  146. return LanguageMapping { parsed_key.release_value(), parsed_alias.release_value() };
  147. }
  148. static void parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
  149. {
  150. LexicalPath core_aliases_path(move(core_supplemental_path));
  151. core_aliases_path = core_aliases_path.append("aliases.json"sv);
  152. VERIFY(Core::File::exists(core_aliases_path.string()));
  153. auto core_aliases_file_or_error = Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly);
  154. VERIFY(!core_aliases_file_or_error.is_error());
  155. auto core_aliases = JsonParser(core_aliases_file_or_error.value()->read_all()).parse();
  156. VERIFY(core_aliases.has_value());
  157. auto const& supplemental_object = core_aliases->as_object().get("supplemental"sv);
  158. auto const& metadata_object = supplemental_object.as_object().get("metadata"sv);
  159. auto const& alias_object = metadata_object.as_object().get("alias"sv);
  160. auto append_aliases = [&](auto& alias_object, auto& alias_map) {
  161. alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  162. auto alias = value.as_object().get("_replacement"sv).as_string();
  163. if (key.contains('-')) {
  164. auto mapping = parse_language_mapping(locale_data, key, alias);
  165. if (!mapping.has_value())
  166. return;
  167. locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
  168. locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
  169. locale_data.complex_mappings.append(mapping.release_value());
  170. } else {
  171. alias_map.set(key, ensure_unique_string(locale_data, alias));
  172. }
  173. });
  174. };
  175. append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases);
  176. append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases);
  177. append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases);
  178. append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases);
  179. append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases);
  180. }
  181. static void parse_likely_subtags(String core_supplemental_path, UnicodeLocaleData& locale_data)
  182. {
  183. LexicalPath likely_subtags_path(move(core_supplemental_path));
  184. likely_subtags_path = likely_subtags_path.append("likelySubtags.json"sv);
  185. VERIFY(Core::File::exists(likely_subtags_path.string()));
  186. auto likely_subtags_file_or_error = Core::File::open(likely_subtags_path.string(), Core::OpenMode::ReadOnly);
  187. VERIFY(!likely_subtags_file_or_error.is_error());
  188. auto likely_subtags = JsonParser(likely_subtags_file_or_error.value()->read_all()).parse();
  189. VERIFY(likely_subtags.has_value());
  190. auto const& supplemental_object = likely_subtags->as_object().get("supplemental"sv);
  191. auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
  192. likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  193. auto mapping = parse_language_mapping(locale_data, key, value.as_string());
  194. if (!mapping.has_value())
  195. return;
  196. locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size);
  197. locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size);
  198. locale_data.likely_subtags.append(mapping.release_value());
  199. });
  200. }
  201. static void parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  202. {
  203. LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them.
  204. languages_path = languages_path.append("languages.json"sv);
  205. VERIFY(Core::File::exists(languages_path.string()));
  206. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  207. VERIFY(!languages_file_or_error.is_error());
  208. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  209. VERIFY(languages.has_value());
  210. auto const& main_object = languages->as_object().get("main"sv);
  211. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  212. auto const& identity_object = locale_object.as_object().get("identity"sv);
  213. auto const& language_string = identity_object.as_object().get("language"sv);
  214. auto const& territory_string = identity_object.as_object().get("territory"sv);
  215. auto const& variant_string = identity_object.as_object().get("variant"sv);
  216. locale.language = language_string.as_string();
  217. if (!locale_data.languages.contains_slow(locale.language))
  218. locale_data.languages.append(locale.language);
  219. if (territory_string.is_string()) {
  220. locale.territory = territory_string.as_string();
  221. if (!locale_data.territories.contains_slow(*locale.territory))
  222. locale_data.territories.append(*locale.territory);
  223. }
  224. if (variant_string.is_string()) {
  225. locale.variant = variant_string.as_string();
  226. if (!locale_data.variants.contains_slow(*locale.variant))
  227. locale_data.variants.append(*locale.variant);
  228. }
  229. }
  230. static void parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  231. {
  232. LexicalPath languages_path(move(locale_path));
  233. languages_path = languages_path.append("languages.json"sv);
  234. VERIFY(Core::File::exists(languages_path.string()));
  235. auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly);
  236. VERIFY(!languages_file_or_error.is_error());
  237. auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse();
  238. VERIFY(languages.has_value());
  239. auto const& main_object = languages->as_object().get("main"sv);
  240. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  241. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  242. auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
  243. languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  244. if (!locale_data.languages.contains_slow(key))
  245. return;
  246. auto index = ensure_unique_string(locale_data, value.as_string());
  247. locale.languages.set(key, index);
  248. });
  249. }
  250. static void parse_locale_territories(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  251. {
  252. LexicalPath territories_path(move(locale_path));
  253. territories_path = territories_path.append("territories.json"sv);
  254. VERIFY(Core::File::exists(territories_path.string()));
  255. auto territories_file_or_error = Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly);
  256. VERIFY(!territories_file_or_error.is_error());
  257. auto territories = JsonParser(territories_file_or_error.value()->read_all()).parse();
  258. VERIFY(territories.has_value());
  259. auto const& main_object = territories->as_object().get("main"sv);
  260. auto const& locale_object = main_object.as_object().get(territories_path.parent().basename());
  261. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  262. auto const& territories_object = locale_display_names_object.as_object().get("territories"sv);
  263. territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  264. if (!locale_data.territories.contains_slow(key))
  265. return;
  266. auto index = ensure_unique_string(locale_data, value.as_string());
  267. locale.territories.set(key, index);
  268. });
  269. }
  270. static void parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  271. {
  272. LexicalPath scripts_path(move(locale_path));
  273. scripts_path = scripts_path.append("scripts.json"sv);
  274. VERIFY(Core::File::exists(scripts_path.string()));
  275. auto scripts_file_or_error = Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly);
  276. VERIFY(!scripts_file_or_error.is_error());
  277. auto scripts = JsonParser(scripts_file_or_error.value()->read_all()).parse();
  278. VERIFY(scripts.has_value());
  279. auto const& main_object = scripts->as_object().get("main"sv);
  280. auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename());
  281. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  282. auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv);
  283. scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  284. auto index = ensure_unique_string(locale_data, value.as_string());
  285. locale.scripts.set(key, index);
  286. if (!locale_data.scripts.contains_slow(key))
  287. locale_data.scripts.append(key);
  288. });
  289. }
  290. static void parse_locale_list_patterns(String misc_path, UnicodeLocaleData& locale_data, Locale& locale)
  291. {
  292. LexicalPath list_patterns_path(move(misc_path));
  293. list_patterns_path = list_patterns_path.append("listPatterns.json"sv);
  294. VERIFY(Core::File::exists(list_patterns_path.string()));
  295. auto list_patterns_file_or_error = Core::File::open(list_patterns_path.string(), Core::OpenMode::ReadOnly);
  296. VERIFY(!list_patterns_file_or_error.is_error());
  297. auto list_patterns = JsonParser(list_patterns_file_or_error.value()->read_all()).parse();
  298. VERIFY(list_patterns.has_value());
  299. auto const& main_object = list_patterns->as_object().get("main"sv);
  300. auto const& locale_object = main_object.as_object().get(list_patterns_path.parent().basename());
  301. auto const& list_patterns_object = locale_object.as_object().get("listPatterns"sv);
  302. auto list_pattern_type = [](StringView key) {
  303. if (key.contains("type-standard"sv))
  304. return "conjunction"sv;
  305. if (key.contains("type-or"sv))
  306. return "disjunction"sv;
  307. if (key.contains("type-unit"sv))
  308. return "unit"sv;
  309. VERIFY_NOT_REACHED();
  310. };
  311. auto list_pattern_style = [](StringView key) {
  312. if (key.contains("short"sv))
  313. return "short"sv;
  314. if (key.contains("narrow"sv))
  315. return "narrow"sv;
  316. return "long"sv;
  317. };
  318. list_patterns_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  319. auto type = list_pattern_type(key);
  320. auto style = list_pattern_style(key);
  321. auto start = ensure_unique_string(locale_data, value.as_object().get("start"sv).as_string());
  322. auto middle = ensure_unique_string(locale_data, value.as_object().get("middle"sv).as_string());
  323. auto end = ensure_unique_string(locale_data, value.as_object().get("end"sv).as_string());
  324. auto pair = ensure_unique_string(locale_data, value.as_object().get("2"sv).as_string());
  325. if (!locale_data.list_pattern_types.contains_slow(type))
  326. locale_data.list_pattern_types.append(type);
  327. if (!locale_data.list_pattern_styles.contains_slow(style))
  328. locale_data.list_pattern_styles.append(style);
  329. locale.list_patterns.append({ move(type), move(style), move(start), move(middle), move(end), move(pair) });
  330. });
  331. }
  332. static void parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  333. {
  334. LexicalPath currencies_path(move(numbers_path));
  335. currencies_path = currencies_path.append("currencies.json"sv);
  336. VERIFY(Core::File::exists(currencies_path.string()));
  337. auto currencies_file_or_error = Core::File::open(currencies_path.string(), Core::OpenMode::ReadOnly);
  338. VERIFY(!currencies_file_or_error.is_error());
  339. auto currencies = JsonParser(currencies_file_or_error.value()->read_all()).parse();
  340. VERIFY(currencies.has_value());
  341. auto const& main_object = currencies->as_object().get("main"sv);
  342. auto const& locale_object = main_object.as_object().get(currencies_path.parent().basename());
  343. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  344. auto const& currencies_object = locale_numbers_object.as_object().get("currencies"sv);
  345. currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  346. auto const& display_name = value.as_object().get("displayName"sv);
  347. auto index = ensure_unique_string(locale_data, display_name.as_string());
  348. locale.currencies.set(key, index);
  349. if (!locale_data.currencies.contains_slow(key))
  350. locale_data.currencies.append(key);
  351. });
  352. }
  353. static void parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  354. {
  355. static constexpr StringView key = "nu"sv;
  356. LexicalPath numbers_path(move(locale_numbers_path));
  357. numbers_path = numbers_path.append("numbers.json"sv);
  358. VERIFY(Core::File::exists(numbers_path.string()));
  359. auto numbers_file_or_error = Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly);
  360. VERIFY(!numbers_file_or_error.is_error());
  361. auto numbers = JsonParser(numbers_file_or_error.value()->read_all()).parse();
  362. VERIFY(numbers.has_value());
  363. auto const& main_object = numbers->as_object().get("main"sv);
  364. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  365. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  366. auto const& default_numbering_system_object = locale_numbers_object.as_object().get("defaultNumberingSystem"sv);
  367. auto const& other_numbering_systems_object = locale_numbers_object.as_object().get("otherNumberingSystems"sv);
  368. Vector<String> keyword_values {};
  369. keyword_values.append(default_numbering_system_object.as_string());
  370. other_numbering_systems_object.as_object().for_each_member([&](auto const&, JsonValue const& value) {
  371. auto keyword_value = value.as_string();
  372. if (!keyword_values.contains_slow(keyword_value))
  373. keyword_values.append(move(keyword_value));
  374. });
  375. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  376. if (!key.starts_with("defaultNumberingSystem-alt-"sv))
  377. return;
  378. auto keyword_value = value.as_string();
  379. if (!keyword_values.contains_slow(keyword_value))
  380. keyword_values.append(move(keyword_value));
  381. });
  382. StringBuilder builder;
  383. builder.join(',', keyword_values);
  384. auto index = ensure_unique_string(locale_data, builder.build());
  385. locale.keywords.set(key, index);
  386. if (!locale_data.keywords.contains_slow(key))
  387. locale_data.keywords.append(key);
  388. }
  389. static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  390. {
  391. LexicalPath numbers_path(move(locale_numbers_path));
  392. numbers_path = numbers_path.append("numbers.json"sv);
  393. VERIFY(Core::File::exists(numbers_path.string()));
  394. auto numbers_file_or_error = Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly);
  395. VERIFY(!numbers_file_or_error.is_error());
  396. auto numbers = JsonParser(numbers_file_or_error.value()->read_all()).parse();
  397. VERIFY(numbers.has_value());
  398. auto const& main_object = numbers->as_object().get("main"sv);
  399. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  400. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  401. auto ensure_number_system = [&](auto const& system) -> NumberSystem& {
  402. return locale.number_systems.ensure(system, [&]() {
  403. auto system_index = ensure_unique_string(locale_data, system);
  404. return NumberSystem { .system = system_index };
  405. });
  406. };
  407. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  408. constexpr auto symbols_prefix = "symbols-numberSystem-"sv;
  409. if (key.starts_with(symbols_prefix)) {
  410. auto system = key.substring(symbols_prefix.length());
  411. auto& number_system = ensure_number_system(system);
  412. value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) {
  413. auto symbol_index = ensure_unique_string(locale_data, localization.as_string());
  414. number_system.symbols.set(symbol, symbol_index);
  415. if (!locale_data.numeric_symbols.contains_slow(symbol))
  416. locale_data.numeric_symbols.append(symbol);
  417. });
  418. }
  419. });
  420. }
  421. static void parse_default_content_locales(String core_path, UnicodeLocaleData& locale_data)
  422. {
  423. LexicalPath default_content_path(move(core_path));
  424. default_content_path = default_content_path.append("defaultContent.json"sv);
  425. VERIFY(Core::File::exists(default_content_path.string()));
  426. auto default_content_file_or_error = Core::File::open(default_content_path.string(), Core::OpenMode::ReadOnly);
  427. VERIFY(!default_content_file_or_error.is_error());
  428. auto default_content = JsonParser(default_content_file_or_error.value()->read_all()).parse();
  429. VERIFY(default_content.has_value());
  430. auto const& default_content_array = default_content->as_object().get("defaultContent"sv);
  431. default_content_array.as_array().for_each([&](JsonValue const& value) {
  432. auto locale = value.as_string();
  433. StringView default_locale = locale;
  434. while (true) {
  435. if (locale_data.locales.contains(default_locale))
  436. break;
  437. auto pos = default_locale.find_last('-');
  438. if (!pos.has_value())
  439. return;
  440. default_locale = default_locale.substring_view(0, *pos);
  441. }
  442. locale_data.locales.set(locale, locale_data.locales.get(default_locale).value());
  443. });
  444. }
  445. static Core::DirIterator path_to_dir_iterator(String path)
  446. {
  447. LexicalPath lexical_path(move(path));
  448. lexical_path = lexical_path.append("main"sv);
  449. VERIFY(Core::File::is_directory(lexical_path.string()));
  450. Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir);
  451. if (iterator.has_error()) {
  452. warnln("{}: {}", lexical_path.string(), iterator.error_string());
  453. VERIFY_NOT_REACHED();
  454. }
  455. return iterator;
  456. }
  457. static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
  458. {
  459. auto identity_iterator = path_to_dir_iterator(locale_names_path);
  460. auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path));
  461. auto misc_iterator = path_to_dir_iterator(move(misc_path));
  462. auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
  463. LexicalPath core_supplemental_path(core_path);
  464. core_supplemental_path = core_supplemental_path.append("supplemental"sv);
  465. VERIFY(Core::File::is_directory(core_supplemental_path.string()));
  466. parse_core_aliases(core_supplemental_path.string(), locale_data);
  467. parse_likely_subtags(core_supplemental_path.string(), locale_data);
  468. auto remove_variants_from_path = [&](String path) -> Optional<String> {
  469. auto parsed_locale = parse_language(locale_data, LexicalPath::basename(path));
  470. if (!parsed_locale.has_value())
  471. return {};
  472. StringBuilder builder;
  473. builder.append(get_unique_string(locale_data, parsed_locale->language));
  474. if (auto script = get_unique_string(locale_data, parsed_locale->script); !script.is_empty())
  475. builder.appendff("-{}", script);
  476. if (auto region = get_unique_string(locale_data, parsed_locale->region); !region.is_empty())
  477. builder.appendff("-{}", region);
  478. return builder.build();
  479. };
  480. while (identity_iterator.has_next()) {
  481. auto locale_path = identity_iterator.next_full_path();
  482. VERIFY(Core::File::is_directory(locale_path));
  483. auto language = remove_variants_from_path(locale_path);
  484. if (!language.has_value())
  485. continue;
  486. auto& locale = locale_data.locales.ensure(*language);
  487. parse_identity(locale_path, locale_data, locale);
  488. }
  489. while (locale_names_iterator.has_next()) {
  490. auto locale_path = locale_names_iterator.next_full_path();
  491. VERIFY(Core::File::is_directory(locale_path));
  492. auto language = remove_variants_from_path(locale_path);
  493. if (!language.has_value())
  494. continue;
  495. auto& locale = locale_data.locales.ensure(*language);
  496. parse_locale_languages(locale_path, locale_data, locale);
  497. parse_locale_territories(locale_path, locale_data, locale);
  498. parse_locale_scripts(locale_path, locale_data, locale);
  499. }
  500. while (misc_iterator.has_next()) {
  501. auto misc_path = misc_iterator.next_full_path();
  502. VERIFY(Core::File::is_directory(misc_path));
  503. auto language = remove_variants_from_path(misc_path);
  504. if (!language.has_value())
  505. continue;
  506. auto& locale = locale_data.locales.ensure(*language);
  507. parse_locale_list_patterns(misc_path, locale_data, locale);
  508. }
  509. while (numbers_iterator.has_next()) {
  510. auto numbers_path = numbers_iterator.next_full_path();
  511. VERIFY(Core::File::is_directory(numbers_path));
  512. auto language = remove_variants_from_path(numbers_path);
  513. if (!language.has_value())
  514. continue;
  515. auto& locale = locale_data.locales.ensure(*language);
  516. parse_locale_currencies(numbers_path, locale_data, locale);
  517. parse_numeric_keywords(numbers_path, locale_data, locale);
  518. parse_number_systems(numbers_path, locale_data, locale);
  519. }
  520. parse_default_content_locales(move(core_path), locale_data);
  521. }
  522. static String format_identifier(StringView owner, String identifier)
  523. {
  524. identifier = identifier.replace("-"sv, "_"sv, true);
  525. if (all_of(identifier, is_ascii_digit))
  526. return String::formatted("{}_{}", owner[0], identifier);
  527. if (is_ascii_lower_alpha(identifier[0]))
  528. return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
  529. return identifier;
  530. }
  531. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  532. {
  533. StringBuilder builder;
  534. SourceGenerator generator { builder };
  535. auto generate_enum = [&](StringView name, StringView default_, Vector<String>& values) {
  536. quick_sort(values);
  537. generator.set("name", name);
  538. generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv);
  539. generator.append(R"~~~(
  540. enum class @name@ : @underlying@ {)~~~");
  541. if (!default_.is_empty()) {
  542. generator.set("default", default_);
  543. generator.append(R"~~~(
  544. @default@,)~~~");
  545. }
  546. for (auto const& value : values) {
  547. generator.set("value", format_identifier(name, value));
  548. generator.append(R"~~~(
  549. @value@,)~~~");
  550. }
  551. generator.append(R"~~~(
  552. };
  553. )~~~");
  554. };
  555. generator.append(R"~~~(
  556. #pragma once
  557. #include <AK/Optional.h>
  558. #include <AK/StringView.h>
  559. #include <AK/Types.h>
  560. #include <LibUnicode/Forward.h>
  561. namespace Unicode {
  562. )~~~");
  563. auto locales = locale_data.locales.keys();
  564. generate_enum("Locale"sv, "None"sv, locales);
  565. generate_enum("Language"sv, {}, locale_data.languages);
  566. generate_enum("Territory"sv, {}, locale_data.territories);
  567. generate_enum("ScriptTag"sv, {}, locale_data.scripts);
  568. generate_enum("Currency"sv, {}, locale_data.currencies);
  569. generate_enum("Key"sv, {}, locale_data.keywords);
  570. generate_enum("NumericSymbol"sv, {}, locale_data.numeric_symbols);
  571. generate_enum("Variant"sv, {}, locale_data.variants);
  572. generate_enum("ListPatternType"sv, {}, locale_data.list_pattern_types);
  573. generate_enum("ListPatternStyle"sv, {}, locale_data.list_pattern_styles);
  574. generator.append(R"~~~(
  575. namespace Detail {
  576. Optional<Locale> locale_from_string(StringView locale);
  577. Optional<StringView> get_locale_language_mapping(StringView locale, StringView language);
  578. Optional<Language> language_from_string(StringView language);
  579. Optional<StringView> resolve_language_alias(StringView language);
  580. Optional<StringView> get_locale_territory_mapping(StringView locale, StringView territory);
  581. Optional<Territory> territory_from_string(StringView territory);
  582. Optional<StringView> resolve_territory_alias(StringView territory);
  583. Optional<StringView> get_locale_script_tag_mapping(StringView locale, StringView script_tag);
  584. Optional<ScriptTag> script_tag_from_string(StringView script_tag);
  585. Optional<StringView> resolve_script_tag_alias(StringView script_tag);
  586. Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency);
  587. Optional<Currency> currency_from_string(StringView currency);
  588. Optional<StringView> get_locale_key_mapping(StringView locale, StringView key);
  589. Optional<Key> key_from_string(StringView key);
  590. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView numeric_symbol);
  591. Optional<NumericSymbol> numeric_symbol_from_string(StringView numeric_symbol);
  592. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style);
  593. Optional<ListPatternType> list_pattern_type_from_string(StringView list_pattern_type);
  594. Optional<ListPatternStyle> list_pattern_style_from_string(StringView list_pattern_style);
  595. Optional<StringView> resolve_variant_alias(StringView variant);
  596. Optional<StringView> resolve_subdivision_alias(StringView subdivision);
  597. void resolve_complex_language_aliases(Unicode::LanguageID& language_id);
  598. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id);
  599. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id);
  600. }
  601. }
  602. )~~~");
  603. file.write(generator.as_string_view());
  604. }
  605. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  606. {
  607. StringBuilder builder;
  608. SourceGenerator generator { builder };
  609. generator.set("string_index_type"sv, s_string_index_type);
  610. generator.set("strings_size"sv, String::number(locale_data.unique_strings.size()));
  611. generator.set("locales_size"sv, String::number(locale_data.locales.size()));
  612. generator.set("territories_size", String::number(locale_data.territories.size()));
  613. generator.set("variants_size", String::number(locale_data.max_variant_size));
  614. generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size()));
  615. generator.append(R"~~~(
  616. #include <AK/Array.h>
  617. #include <AK/BinarySearch.h>
  618. #include <AK/Span.h>
  619. #include <LibUnicode/Locale.h>
  620. #include <LibUnicode/UnicodeLocale.h>
  621. namespace Unicode {
  622. struct Patterns {
  623. ListPatternType type;
  624. ListPatternStyle style;
  625. @string_index_type@ start { 0 };
  626. @string_index_type@ middle { 0 };
  627. @string_index_type@ end { 0 };
  628. @string_index_type@ pair { 0 };
  629. };
  630. )~~~");
  631. generator.append(R"~~~(
  632. static constexpr Array<StringView, @strings_size@ + 1> s_string_list { {
  633. {})~~~");
  634. constexpr size_t max_strings_per_row = 30;
  635. size_t strings_in_current_row = 1;
  636. for (auto const& string : locale_data.unique_strings) {
  637. if (strings_in_current_row++ > 0)
  638. generator.append(", ");
  639. generator.append(String::formatted("\"{}\"sv", string));
  640. if (strings_in_current_row == max_strings_per_row) {
  641. strings_in_current_row = 0;
  642. generator.append(",\n ");
  643. }
  644. }
  645. generator.append(R"~~~(
  646. } };
  647. struct NumberSystem {
  648. @string_index_type@ system { 0 };
  649. Array<@string_index_type@, @numeric_symbols_size@> symbols {};
  650. };
  651. )~~~");
  652. auto format_mapping_name = [](StringView format, StringView name) {
  653. auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true);
  654. return String::formatted(format, mapping_name);
  655. };
  656. auto append_index = [&](auto index) {
  657. generator.append(String::formatted(", {}", index));
  658. };
  659. auto append_list_and_size = [&](auto const& list) {
  660. if (list.is_empty()) {
  661. generator.append(", {}, 0");
  662. return;
  663. }
  664. bool first = true;
  665. generator.append(", {");
  666. for (auto const& item : list) {
  667. generator.append(first ? " " : ", ");
  668. generator.append(String::number(item));
  669. first = false;
  670. }
  671. generator.append(String::formatted(" }}, {}", list.size()));
  672. };
  673. auto append_string_index_list = [&](String name, auto const& keys, auto const& mappings) {
  674. generator.set("name", name);
  675. generator.set("size", String::number(keys.size()));
  676. generator.append(R"~~~(
  677. static constexpr Array<@string_index_type@, @size@> @name@ { {
  678. )~~~");
  679. constexpr size_t max_values_per_row = 30;
  680. size_t values_in_current_row = 0;
  681. for (auto const& key : keys) {
  682. if (values_in_current_row++ > 0)
  683. generator.append(" ");
  684. if (auto it = mappings.find(key); it != mappings.end())
  685. generator.set("mapping"sv, String::number(it->value));
  686. else
  687. generator.set("mapping"sv, "0"sv);
  688. generator.append("@mapping@,");
  689. if (values_in_current_row == max_values_per_row) {
  690. values_in_current_row = 0;
  691. generator.append("\n ");
  692. }
  693. }
  694. generator.append(R"~~~(
  695. } };
  696. )~~~");
  697. };
  698. auto append_number_systems = [&](String name, auto const& number_systems) {
  699. generator.set("name", move(name));
  700. generator.set("size", String::number(number_systems.size()));
  701. generator.append(R"~~~(
  702. static constexpr Array<NumberSystem, @size@> @name@ { {)~~~");
  703. for (auto const& number_system : number_systems) {
  704. generator.set("system"sv, String::number(number_system.value.system));
  705. generator.append(R"~~~(
  706. { @system@, {)~~~");
  707. for (auto const& symbol : locale_data.numeric_symbols) {
  708. auto index = number_system.value.symbols.get(symbol).value_or(0);
  709. generator.set("index", String::number(index));
  710. generator.append(" @index@,");
  711. }
  712. generator.append(" } },");
  713. }
  714. generator.append(R"~~~(
  715. } };
  716. )~~~");
  717. };
  718. auto append_list_patterns = [&](StringView name, Vector<ListPatterns> const& list_patterns) {
  719. generator.set("name", name);
  720. generator.set("size", String::number(list_patterns.size()));
  721. generator.append(R"~~~(
  722. static constexpr Array<Patterns, @size@> @name@ { {)~~~");
  723. for (auto const& list_pattern : list_patterns) {
  724. generator.set("type"sv, String::formatted("ListPatternType::{}", format_identifier({}, list_pattern.type)));
  725. generator.set("style"sv, String::formatted("ListPatternStyle::{}", format_identifier({}, list_pattern.style)));
  726. generator.set("start"sv, String::number(list_pattern.start));
  727. generator.set("middle"sv, String::number(list_pattern.middle));
  728. generator.set("end"sv, String::number(list_pattern.end));
  729. generator.set("pair"sv, String::number(list_pattern.pair));
  730. generator.append(R"~~~(
  731. { @type@, @style@, @start@, @middle@, @end@, @pair@ },)~~~");
  732. }
  733. generator.append(R"~~~(
  734. } };
  735. )~~~");
  736. };
  737. auto append_mapping = [&](StringView type, StringView name, StringView format, auto format_list_callback) {
  738. Vector<String> mapping_names;
  739. for (auto const& locale : locale_data.locales) {
  740. auto mapping_name = format_mapping_name(format, locale.key);
  741. format_list_callback(mapping_name, locale.value);
  742. mapping_names.append(move(mapping_name));
  743. }
  744. quick_sort(mapping_names);
  745. generator.set("type", type);
  746. generator.set("name", name);
  747. generator.set("size", String::number(locale_data.locales.size()));
  748. generator.append(R"~~~(
  749. static constexpr Array<Span<@type@ const>, @size@> @name@ { {
  750. )~~~");
  751. constexpr size_t max_values_per_row = 10;
  752. size_t values_in_current_row = 0;
  753. for (auto& mapping_name : mapping_names) {
  754. if (values_in_current_row++ > 0)
  755. generator.append(" ");
  756. generator.set("name", move(mapping_name));
  757. generator.append("@name@.span(),");
  758. if (values_in_current_row == max_values_per_row) {
  759. values_in_current_row = 0;
  760. generator.append("\n ");
  761. }
  762. }
  763. generator.append(R"~~~(
  764. } };
  765. )~~~");
  766. };
  767. append_mapping(s_string_index_type, "s_languages"sv, "s_languages_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.languages, value.languages); });
  768. append_mapping(s_string_index_type, "s_territories"sv, "s_territories_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.territories, value.territories); });
  769. append_mapping(s_string_index_type, "s_scripts"sv, "s_scripts_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.scripts, value.scripts); });
  770. append_mapping(s_string_index_type, "s_currencies"sv, "s_currencies_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.currencies, value.currencies); });
  771. append_mapping(s_string_index_type, "s_keywords"sv, "s_keywords_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.keywords, value.keywords); });
  772. append_mapping("NumberSystem"sv, "s_number_systems"sv, "s_number_systems_{}", [&](auto const& name, auto const& value) { append_number_systems(name, value.number_systems); });
  773. append_mapping("Patterns"sv, "s_list_patterns"sv, "s_list_patterns_{}", [&](auto const& name, auto const& value) { append_list_patterns(name, value.list_patterns); });
  774. generator.append(R"~~~(
  775. struct CanonicalLanguageID {
  776. Unicode::LanguageID to_unicode_language_id() const
  777. {
  778. Unicode::LanguageID language_id {};
  779. language_id.variants.ensure_capacity(variants_size);
  780. language_id.language = s_string_list[language];
  781. if (script != 0)
  782. language_id.script = s_string_list[script];
  783. if (region != 0)
  784. language_id.region = s_string_list[region];
  785. for (size_t i = 0; i < variants_size; ++i)
  786. language_id.variants.append(s_string_list[variants[i]]);
  787. return language_id;
  788. }
  789. bool matches_variants(Vector<String> const& other_variants) const {
  790. if (variants_size == 0)
  791. return true;
  792. if (other_variants.size() != variants_size)
  793. return false;
  794. for (size_t i = 0; i < variants_size; ++i) {
  795. if (s_string_list[variants[i]] != other_variants[i])
  796. return false;
  797. }
  798. return true;
  799. };
  800. @string_index_type@ language { 0 };
  801. @string_index_type@ script { 0 };
  802. @string_index_type@ region { 0 };
  803. Array<@string_index_type@, @variants_size@> variants {};
  804. size_t variants_size { 0 };
  805. };
  806. struct LanguageMapping {
  807. CanonicalLanguageID key;
  808. CanonicalLanguageID alias;
  809. };
  810. )~~~");
  811. auto append_complex_mapping = [&](StringView name, auto& mappings) {
  812. generator.set("size", String::number(mappings.size()));
  813. generator.set("name"sv, name);
  814. generator.append(R"~~~(
  815. static constexpr Array<LanguageMapping, @size@> s_@name@ { {
  816. )~~~");
  817. quick_sort(mappings, [&](auto const& lhs, auto const& rhs) {
  818. auto const& lhs_language = get_unique_string(locale_data, lhs.key.language);
  819. auto const& rhs_language = get_unique_string(locale_data, rhs.key.language);
  820. // Sort the keys such that "und" language tags are at the end, as those are less specific.
  821. if (lhs_language.starts_with("und"sv) && !rhs_language.starts_with("und"sv))
  822. return false;
  823. if (!lhs_language.starts_with("und"sv) && rhs_language.starts_with("und"sv))
  824. return true;
  825. return lhs_language < rhs_language;
  826. });
  827. for (auto const& mapping : mappings) {
  828. generator.set("language"sv, String::number(mapping.key.language));
  829. generator.append(" { { @language@");
  830. append_index(mapping.key.script);
  831. append_index(mapping.key.region);
  832. append_list_and_size(mapping.key.variants);
  833. generator.set("language"sv, String::number(mapping.alias.language));
  834. generator.append(" }, { @language@");
  835. append_index(mapping.alias.script);
  836. append_index(mapping.alias.region);
  837. append_list_and_size(mapping.alias.variants);
  838. generator.append(" } },\n");
  839. }
  840. generator.append("} };\n");
  841. };
  842. append_complex_mapping("complex_alias"sv, locale_data.complex_mappings);
  843. append_complex_mapping("likely_subtags"sv, locale_data.likely_subtags);
  844. generator.append(R"~~~(
  845. static LanguageMapping const* resolve_likely_subtag(Unicode::LanguageID const& language_id)
  846. {
  847. // https://unicode.org/reports/tr35/#Likely_Subtags
  848. enum class State {
  849. LanguageScriptRegion,
  850. LanguageRegion,
  851. LanguageScript,
  852. Language,
  853. UndScript,
  854. Done,
  855. };
  856. auto state = State::LanguageScriptRegion;
  857. while (state != State::Done) {
  858. Unicode::LanguageID search_key;
  859. switch (state) {
  860. case State::LanguageScriptRegion:
  861. state = State::LanguageRegion;
  862. if (!language_id.script.has_value() || !language_id.region.has_value())
  863. continue;
  864. search_key.language = *language_id.language;
  865. search_key.script = *language_id.script;
  866. search_key.region = *language_id.region;
  867. break;
  868. case State::LanguageRegion:
  869. state = State::LanguageScript;
  870. if (!language_id.region.has_value())
  871. continue;
  872. search_key.language = *language_id.language;
  873. search_key.region = *language_id.region;
  874. break;
  875. case State::LanguageScript:
  876. state = State::Language;
  877. if (!language_id.script.has_value())
  878. continue;
  879. search_key.language = *language_id.language;
  880. search_key.script = *language_id.script;
  881. break;
  882. case State::Language:
  883. state = State::UndScript;
  884. search_key.language = *language_id.language;
  885. break;
  886. case State::UndScript:
  887. state = State::Done;
  888. if (!language_id.script.has_value())
  889. continue;
  890. search_key.language = "und"sv;
  891. search_key.script = *language_id.script;
  892. break;
  893. default:
  894. VERIFY_NOT_REACHED();
  895. }
  896. for (auto const& map : s_likely_subtags) {
  897. auto const& key_language = s_string_list[map.key.language];
  898. auto const& key_script = s_string_list[map.key.script];
  899. auto const& key_region = s_string_list[map.key.region];
  900. if (key_language != search_key.language)
  901. continue;
  902. if (!key_script.is_empty() || search_key.script.has_value()) {
  903. if (key_script != search_key.script)
  904. continue;
  905. }
  906. if (!key_region.is_empty() || search_key.region.has_value()) {
  907. if (key_region != search_key.region)
  908. continue;
  909. }
  910. return &map;
  911. }
  912. }
  913. return nullptr;
  914. }
  915. namespace Detail {
  916. )~~~");
  917. auto append_mapping_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) {
  918. generator.set("enum_title", enum_title);
  919. generator.set("enum_snake", enum_snake);
  920. generator.set("collection_name", collection_name);
  921. generator.append(R"~~~(
  922. Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@)
  923. {
  924. auto locale_value = locale_from_string(locale);
  925. if (!locale_value.has_value())
  926. return {};
  927. auto @enum_snake@_value = @enum_snake@_from_string(@enum_snake@);
  928. if (!@enum_snake@_value.has_value())
  929. return {};
  930. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  931. auto @enum_snake@_index = to_underlying(*@enum_snake@_value);
  932. auto const& mappings = @collection_name@.at(locale_index);
  933. auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
  934. auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
  935. if (@enum_snake@_mapping.is_empty())
  936. return {};
  937. return @enum_snake@_mapping;
  938. }
  939. )~~~");
  940. };
  941. auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values) {
  942. HashValueMap<String> hashes;
  943. hashes.ensure_capacity(values.size());
  944. for (auto const& value : values)
  945. hashes.set(value.hash(), format_identifier(enum_title, value));
  946. generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
  947. };
  948. auto append_alias_search = [&](StringView enum_snake, auto const& aliases) {
  949. HashValueMap<StringIndexType> hashes;
  950. hashes.ensure_capacity(aliases.size());
  951. for (auto const& alias : aliases)
  952. hashes.set(alias.key.hash(), alias.value);
  953. generate_value_from_string(generator, "resolve_{}_alias"sv, s_string_index_type, enum_snake, move(hashes), "StringView"sv, "s_string_list[{}]"sv);
  954. };
  955. append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys());
  956. append_mapping_search("Language"sv, "language"sv, "s_languages"sv);
  957. append_from_string("Language"sv, "language"sv, locale_data.languages);
  958. append_alias_search("language"sv, locale_data.language_aliases);
  959. append_mapping_search("Territory"sv, "territory"sv, "s_territories"sv);
  960. append_from_string("Territory"sv, "territory"sv, locale_data.territories);
  961. append_alias_search("territory"sv, locale_data.territory_aliases);
  962. append_mapping_search("ScriptTag"sv, "script_tag"sv, "s_scripts"sv);
  963. append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts);
  964. append_alias_search("script_tag"sv, locale_data.script_aliases);
  965. append_mapping_search("Currency"sv, "currency"sv, "s_currencies"sv);
  966. append_from_string("Currency"sv, "currency"sv, locale_data.currencies);
  967. append_mapping_search("Key"sv, "key"sv, "s_keywords"sv);
  968. append_from_string("Key"sv, "key"sv, locale_data.keywords);
  969. append_alias_search("variant"sv, locale_data.variant_aliases);
  970. append_alias_search("subdivision"sv, locale_data.subdivision_aliases);
  971. append_from_string("ListPatternType"sv, "list_pattern_type"sv, locale_data.list_pattern_types);
  972. append_from_string("ListPatternStyle"sv, "list_pattern_style"sv, locale_data.list_pattern_styles);
  973. append_from_string("NumericSymbol"sv, "numeric_symbol"sv, locale_data.numeric_symbols);
  974. generator.append(R"~~~(
  975. static NumberSystem const* find_number_system(StringView locale, StringView system)
  976. {
  977. auto locale_value = locale_from_string(locale);
  978. if (!locale_value.has_value())
  979. return nullptr;
  980. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  981. auto const& number_systems = s_number_systems.at(locale_index);
  982. for (auto const& number_system : number_systems) {
  983. if (system == s_string_list[number_system.system])
  984. return &number_system;
  985. };
  986. return nullptr;
  987. }
  988. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol)
  989. {
  990. auto symbol_value = numeric_symbol_from_string(symbol);
  991. if (!symbol_value.has_value())
  992. return {};
  993. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  994. auto symbol_index = to_underlying(*symbol_value);
  995. return s_string_list[number_system->symbols[symbol_index]];
  996. }
  997. return {};
  998. }
  999. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style)
  1000. {
  1001. auto locale_value = locale_from_string(locale);
  1002. if (!locale_value.has_value())
  1003. return {};
  1004. auto type_value = list_pattern_type_from_string(list_pattern_type);
  1005. if (!type_value.has_value())
  1006. return {};
  1007. auto style_value = list_pattern_style_from_string(list_pattern_style);
  1008. if (!style_value.has_value())
  1009. return {};
  1010. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  1011. auto const& locale_list_patterns = s_list_patterns.at(locale_index);
  1012. for (auto const& list_patterns : locale_list_patterns) {
  1013. if ((list_patterns.type == type_value) && (list_patterns.style == style_value)) {
  1014. auto const& start = s_string_list[list_patterns.start];
  1015. auto const& middle = s_string_list[list_patterns.middle];
  1016. auto const& end = s_string_list[list_patterns.end];
  1017. auto const& pair = s_string_list[list_patterns.pair];
  1018. return ListPatterns { start, middle, end, pair };
  1019. }
  1020. }
  1021. return {};
  1022. }
  1023. void resolve_complex_language_aliases(Unicode::LanguageID& language_id)
  1024. {
  1025. for (auto const& map : s_complex_alias) {
  1026. auto const& key_language = s_string_list[map.key.language];
  1027. auto const& key_script = s_string_list[map.key.script];
  1028. auto const& key_region = s_string_list[map.key.region];
  1029. if ((key_language != language_id.language) && (key_language != "und"sv))
  1030. continue;
  1031. if (!key_script.is_empty() && (key_script != language_id.script))
  1032. continue;
  1033. if (!key_region.is_empty() && (key_region != language_id.region))
  1034. continue;
  1035. if (!map.key.matches_variants(language_id.variants))
  1036. continue;
  1037. auto alias = map.alias.to_unicode_language_id();
  1038. if (alias.language == "und"sv)
  1039. alias.language = move(language_id.language);
  1040. if (key_script.is_empty() && !alias.script.has_value())
  1041. alias.script = move(language_id.script);
  1042. if (key_region.is_empty() && !alias.region.has_value())
  1043. alias.region = move(language_id.region);
  1044. if (map.key.variants_size == 0 && alias.variants.is_empty())
  1045. alias.variants = move(language_id.variants);
  1046. language_id = move(alias);
  1047. break;
  1048. }
  1049. }
  1050. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id)
  1051. {
  1052. // https://www.unicode.org/reports/tr35/#Likely_Subtags
  1053. auto const* likely_subtag = resolve_likely_subtag(language_id);
  1054. if (likely_subtag == nullptr)
  1055. return {};
  1056. auto maximized = language_id;
  1057. auto const& key_script = s_string_list[likely_subtag->key.script];
  1058. auto const& key_region = s_string_list[likely_subtag->key.region];
  1059. auto const& alias_language = s_string_list[likely_subtag->alias.language];
  1060. auto const& alias_script = s_string_list[likely_subtag->alias.script];
  1061. auto const& alias_region = s_string_list[likely_subtag->alias.region];
  1062. if (maximized.language == "und"sv)
  1063. maximized.language = alias_language;
  1064. if (!maximized.script.has_value() || (!key_script.is_empty() && !alias_script.is_empty()))
  1065. maximized.script = alias_script;
  1066. if (!maximized.region.has_value() || (!key_region.is_empty() && !alias_region.is_empty()))
  1067. maximized.region = alias_region;
  1068. return maximized;
  1069. }
  1070. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id)
  1071. {
  1072. if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
  1073. return s_string_list[likely_subtag->alias.region];
  1074. return {};
  1075. }
  1076. }
  1077. }
  1078. )~~~");
  1079. file.write(generator.as_string_view());
  1080. }
  1081. int main(int argc, char** argv)
  1082. {
  1083. char const* generated_header_path = nullptr;
  1084. char const* generated_implementation_path = nullptr;
  1085. char const* core_path = nullptr;
  1086. char const* locale_names_path = nullptr;
  1087. char const* misc_path = nullptr;
  1088. char const* numbers_path = nullptr;
  1089. Core::ArgsParser args_parser;
  1090. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  1091. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  1092. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  1093. args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path");
  1094. args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path");
  1095. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  1096. args_parser.parse(argc, argv);
  1097. auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
  1098. if (path.is_empty()) {
  1099. warnln("{} is required", flags);
  1100. args_parser.print_usage(stderr, argv[0]);
  1101. exit(1);
  1102. }
  1103. auto file_or_error = Core::File::open(path, mode);
  1104. if (file_or_error.is_error()) {
  1105. warnln("Failed to open {}: {}", path, file_or_error.release_error());
  1106. exit(1);
  1107. }
  1108. return file_or_error.release_value();
  1109. };
  1110. auto generated_header_file = open_file(generated_header_path, "-h/--generated-header-path", Core::OpenMode::ReadWrite);
  1111. auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite);
  1112. UnicodeLocaleData locale_data;
  1113. parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, locale_data);
  1114. generate_unicode_locale_header(generated_header_file, locale_data);
  1115. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  1116. return 0;
  1117. }