GenerateUnicodeLocale.cpp 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AllOf.h>
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Format.h>
  10. #include <AK/HashMap.h>
  11. #include <AK/JsonObject.h>
  12. #include <AK/JsonParser.h>
  13. #include <AK/JsonValue.h>
  14. #include <AK/LexicalPath.h>
  15. #include <AK/QuickSort.h>
  16. #include <AK/SourceGenerator.h>
  17. #include <AK/String.h>
  18. #include <AK/StringBuilder.h>
  19. #include <LibCore/ArgsParser.h>
  20. #include <LibCore/DirIterator.h>
  21. #include <LibCore/File.h>
  22. using StringIndexType = u16;
  23. constexpr auto s_string_index_type = "u16"sv;
  24. using LanguageListIndexType = u8;
  25. constexpr auto s_language_list_index_type = "u8"sv;
  26. using TerritoryListIndexType = u8;
  27. constexpr auto s_territory_list_index_type = "u8"sv;
  28. using ScriptListIndexType = u8;
  29. constexpr auto s_script_list_index_type = "u8"sv;
  30. using CurrencyListIndexType = u16;
  31. constexpr auto s_currency_list_index_type = "u16"sv;
  32. using KeywordListIndexType = u8;
  33. constexpr auto s_keyword_list_index_type = "u8"sv;
  34. struct ListPatterns {
  35. String type;
  36. String style;
  37. StringIndexType start { 0 };
  38. StringIndexType middle { 0 };
  39. StringIndexType end { 0 };
  40. StringIndexType pair { 0 };
  41. };
  42. using LanguageList = Vector<StringIndexType>;
  43. using TerritoryList = Vector<StringIndexType>;
  44. using ScriptList = Vector<StringIndexType>;
  45. using CurrencyList = Vector<StringIndexType>;
  46. using KeywordList = Vector<StringIndexType>;
  47. struct Locale {
  48. String language;
  49. Optional<String> territory;
  50. Optional<String> variant;
  51. LanguageListIndexType languages { 0 };
  52. TerritoryListIndexType territories { 0 };
  53. ScriptListIndexType scripts { 0 };
  54. CurrencyListIndexType long_currencies { 0 };
  55. CurrencyListIndexType short_currencies { 0 };
  56. CurrencyListIndexType narrow_currencies { 0 };
  57. CurrencyListIndexType numeric_currencies { 0 };
  58. KeywordListIndexType keywords { 0 };
  59. Vector<ListPatterns> list_patterns;
  60. };
  61. struct LanguageMapping {
  62. CanonicalLanguageID<StringIndexType> key {};
  63. CanonicalLanguageID<StringIndexType> alias {};
  64. };
  65. struct UnicodeLocaleData {
  66. UniqueStringStorage<StringIndexType> unique_strings;
  67. UniqueStorage<LanguageList, LanguageListIndexType> unique_language_lists;
  68. UniqueStorage<TerritoryList, TerritoryListIndexType> unique_territory_lists;
  69. UniqueStorage<ScriptList, ScriptListIndexType> unique_script_lists;
  70. UniqueStorage<CurrencyList, CurrencyListIndexType> unique_currency_lists;
  71. UniqueStorage<KeywordList, KeywordListIndexType> unique_keyword_lists;
  72. HashMap<String, Locale> locales;
  73. Vector<Alias> locale_aliases;
  74. Vector<String> languages;
  75. Vector<String> territories;
  76. Vector<String> scripts;
  77. Vector<String> variants;
  78. Vector<String> currencies;
  79. Vector<String> keywords { "ca"sv, "nu"sv }; // FIXME: These should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158
  80. Vector<String> list_pattern_types;
  81. Vector<String> list_pattern_styles;
  82. HashMap<String, StringIndexType> language_aliases;
  83. HashMap<String, StringIndexType> territory_aliases;
  84. HashMap<String, StringIndexType> script_aliases;
  85. HashMap<String, StringIndexType> variant_aliases;
  86. HashMap<String, StringIndexType> subdivision_aliases;
  87. Vector<LanguageMapping> complex_mappings;
  88. Vector<LanguageMapping> likely_subtags;
  89. size_t max_variant_size { 0 };
  90. };
  91. // Some parsing is expected to fail. For example, the CLDR contains language mappings
  92. // with locales such as "en-GB-oed" that are canonically invalid locale IDs.
  93. #define TRY_OR_DISCARD(expression) \
  94. ({ \
  95. auto _temporary_result = (expression); \
  96. if (_temporary_result.is_error()) \
  97. return; \
  98. _temporary_result.release_value(); \
  99. })
  100. static ErrorOr<LanguageMapping> parse_language_mapping(UnicodeLocaleData& locale_data, StringView key, StringView alias)
  101. {
  102. auto parsed_key = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, key));
  103. auto parsed_alias = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, alias));
  104. return LanguageMapping { move(parsed_key), move(parsed_alias) };
  105. }
  106. static ErrorOr<void> parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data)
  107. {
  108. LexicalPath core_aliases_path(move(core_supplemental_path));
  109. core_aliases_path = core_aliases_path.append("aliases.json"sv);
  110. auto core_aliases_file = TRY(Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly));
  111. auto core_aliases = TRY(JsonValue::from_string(core_aliases_file->read_all()));
  112. auto const& supplemental_object = core_aliases.as_object().get("supplemental"sv);
  113. auto const& metadata_object = supplemental_object.as_object().get("metadata"sv);
  114. auto const& alias_object = metadata_object.as_object().get("alias"sv);
  115. auto append_aliases = [&](auto& alias_object, auto& alias_map) {
  116. alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  117. auto alias = value.as_object().get("_replacement"sv).as_string();
  118. if (key.contains('-')) {
  119. auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, alias));
  120. locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
  121. locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
  122. locale_data.complex_mappings.append(move(mapping));
  123. } else {
  124. alias_map.set(key, locale_data.unique_strings.ensure(alias));
  125. }
  126. });
  127. };
  128. append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases);
  129. append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases);
  130. append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases);
  131. append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases);
  132. append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases);
  133. return {};
  134. }
  135. static ErrorOr<void> parse_likely_subtags(String core_supplemental_path, UnicodeLocaleData& locale_data)
  136. {
  137. LexicalPath likely_subtags_path(move(core_supplemental_path));
  138. likely_subtags_path = likely_subtags_path.append("likelySubtags.json"sv);
  139. auto likely_subtags_file = TRY(Core::File::open(likely_subtags_path.string(), Core::OpenMode::ReadOnly));
  140. auto likely_subtags = TRY(JsonValue::from_string(likely_subtags_file->read_all()));
  141. auto const& supplemental_object = likely_subtags.as_object().get("supplemental"sv);
  142. auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv);
  143. likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  144. auto mapping = TRY_OR_DISCARD(parse_language_mapping(locale_data, key, value.as_string()));
  145. locale_data.max_variant_size = max(mapping.key.variants.size(), locale_data.max_variant_size);
  146. locale_data.max_variant_size = max(mapping.alias.variants.size(), locale_data.max_variant_size);
  147. locale_data.likely_subtags.append(move(mapping));
  148. });
  149. return {};
  150. }
  151. static ErrorOr<void> parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  152. {
  153. LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them.
  154. languages_path = languages_path.append("languages.json"sv);
  155. auto languages_file = TRY(Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly));
  156. auto languages = TRY(JsonValue::from_string(languages_file->read_all()));
  157. auto const& main_object = languages.as_object().get("main"sv);
  158. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  159. auto const& identity_object = locale_object.as_object().get("identity"sv);
  160. auto const& language_string = identity_object.as_object().get("language"sv);
  161. auto const& territory_string = identity_object.as_object().get("territory"sv);
  162. auto const& script_string = identity_object.as_object().get("script"sv);
  163. auto const& variant_string = identity_object.as_object().get("variant"sv);
  164. locale.language = language_string.as_string();
  165. if (!locale_data.languages.contains_slow(locale.language))
  166. locale_data.languages.append(locale.language);
  167. if (territory_string.is_string()) {
  168. locale.territory = territory_string.as_string();
  169. if (!locale_data.territories.contains_slow(*locale.territory))
  170. locale_data.territories.append(*locale.territory);
  171. }
  172. if (script_string.is_string()) {
  173. auto script = script_string.as_string();
  174. if (!locale_data.scripts.contains_slow(script))
  175. locale_data.scripts.append(script);
  176. }
  177. if (variant_string.is_string()) {
  178. locale.variant = variant_string.as_string();
  179. if (!locale_data.variants.contains_slow(*locale.variant))
  180. locale_data.variants.append(*locale.variant);
  181. }
  182. return {};
  183. }
  184. static ErrorOr<void> parse_locale_languages(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  185. {
  186. LexicalPath languages_path(move(locale_path));
  187. languages_path = languages_path.append("languages.json"sv);
  188. auto languages_file = TRY(Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly));
  189. auto locale_languages = TRY(JsonValue::from_string(languages_file->read_all()));
  190. auto const& main_object = locale_languages.as_object().get("main"sv);
  191. auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
  192. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  193. auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
  194. LanguageList languages;
  195. languages.resize(locale_data.languages.size());
  196. languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  197. if (auto index = locale_data.languages.find_first_index(key); index.has_value())
  198. languages[*index] = locale_data.unique_strings.ensure(value.as_string());
  199. });
  200. locale.languages = locale_data.unique_language_lists.ensure(move(languages));
  201. return {};
  202. }
  203. static ErrorOr<void> parse_locale_territories(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  204. {
  205. LexicalPath territories_path(move(locale_path));
  206. territories_path = territories_path.append("territories.json"sv);
  207. auto territories_file = TRY(Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly));
  208. auto locale_territories = TRY(JsonValue::from_string(territories_file->read_all()));
  209. auto const& main_object = locale_territories.as_object().get("main"sv);
  210. auto const& locale_object = main_object.as_object().get(territories_path.parent().basename());
  211. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  212. auto const& territories_object = locale_display_names_object.as_object().get("territories"sv);
  213. TerritoryList territories;
  214. territories.resize(locale_data.territories.size());
  215. territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  216. if (auto index = locale_data.territories.find_first_index(key); index.has_value())
  217. territories[*index] = locale_data.unique_strings.ensure(value.as_string());
  218. });
  219. locale.territories = locale_data.unique_territory_lists.ensure(move(territories));
  220. return {};
  221. }
  222. static ErrorOr<void> parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_data, Locale& locale)
  223. {
  224. LexicalPath scripts_path(move(locale_path));
  225. scripts_path = scripts_path.append("scripts.json"sv);
  226. auto scripts_file = TRY(Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly));
  227. auto locale_scripts = TRY(JsonValue::from_string(scripts_file->read_all()));
  228. auto const& main_object = locale_scripts.as_object().get("main"sv);
  229. auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename());
  230. auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
  231. auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv);
  232. ScriptList scripts;
  233. scripts.resize(locale_data.scripts.size());
  234. scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  235. if (auto index = locale_data.scripts.find_first_index(key); index.has_value())
  236. scripts[*index] = locale_data.unique_strings.ensure(value.as_string());
  237. });
  238. locale.scripts = locale_data.unique_script_lists.ensure(move(scripts));
  239. return {};
  240. }
  241. static ErrorOr<void> parse_locale_list_patterns(String misc_path, UnicodeLocaleData& locale_data, Locale& locale)
  242. {
  243. LexicalPath list_patterns_path(move(misc_path));
  244. list_patterns_path = list_patterns_path.append("listPatterns.json"sv);
  245. auto list_patterns_file = TRY(Core::File::open(list_patterns_path.string(), Core::OpenMode::ReadOnly));
  246. auto list_patterns = TRY(JsonValue::from_string(list_patterns_file->read_all()));
  247. auto const& main_object = list_patterns.as_object().get("main"sv);
  248. auto const& locale_object = main_object.as_object().get(list_patterns_path.parent().basename());
  249. auto const& list_patterns_object = locale_object.as_object().get("listPatterns"sv);
  250. auto list_pattern_type = [](StringView key) {
  251. if (key.contains("type-standard"sv))
  252. return "conjunction"sv;
  253. if (key.contains("type-or"sv))
  254. return "disjunction"sv;
  255. if (key.contains("type-unit"sv))
  256. return "unit"sv;
  257. VERIFY_NOT_REACHED();
  258. };
  259. auto list_pattern_style = [](StringView key) {
  260. if (key.contains("short"sv))
  261. return "short"sv;
  262. if (key.contains("narrow"sv))
  263. return "narrow"sv;
  264. return "long"sv;
  265. };
  266. list_patterns_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  267. auto type = list_pattern_type(key);
  268. auto style = list_pattern_style(key);
  269. auto start = locale_data.unique_strings.ensure(value.as_object().get("start"sv).as_string());
  270. auto middle = locale_data.unique_strings.ensure(value.as_object().get("middle"sv).as_string());
  271. auto end = locale_data.unique_strings.ensure(value.as_object().get("end"sv).as_string());
  272. auto pair = locale_data.unique_strings.ensure(value.as_object().get("2"sv).as_string());
  273. if (!locale_data.list_pattern_types.contains_slow(type))
  274. locale_data.list_pattern_types.append(type);
  275. if (!locale_data.list_pattern_styles.contains_slow(style))
  276. locale_data.list_pattern_styles.append(style);
  277. locale.list_patterns.append({ move(type), move(style), move(start), move(middle), move(end), move(pair) });
  278. });
  279. return {};
  280. }
  281. static ErrorOr<void> parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  282. {
  283. LexicalPath currencies_path(move(numbers_path));
  284. currencies_path = currencies_path.append("currencies.json"sv);
  285. auto currencies_file = TRY(Core::File::open(currencies_path.string(), Core::OpenMode::ReadOnly));
  286. auto locale_currencies = TRY(JsonValue::from_string(currencies_file->read_all()));
  287. auto const& main_object = locale_currencies.as_object().get("main"sv);
  288. auto const& locale_object = main_object.as_object().get(currencies_path.parent().basename());
  289. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  290. auto const& currencies_object = locale_numbers_object.as_object().get("currencies"sv);
  291. currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const&) {
  292. if (!locale_data.currencies.contains_slow(key))
  293. locale_data.currencies.append(key);
  294. });
  295. CurrencyList long_currencies {};
  296. long_currencies.resize(locale_data.currencies.size());
  297. CurrencyList short_currencies {};
  298. short_currencies.resize(locale_data.currencies.size());
  299. CurrencyList narrow_currencies {};
  300. narrow_currencies.resize(locale_data.currencies.size());
  301. CurrencyList numeric_currencies {};
  302. numeric_currencies.resize(locale_data.currencies.size());
  303. currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  304. auto const& long_name = value.as_object().get("displayName"sv);
  305. auto const& short_name = value.as_object().get("symbol"sv);
  306. auto const& narrow_name = value.as_object().get("symbol-alt-narrow"sv);
  307. auto const& numeric_name = value.as_object().get("displayName-count-other"sv);
  308. auto index = locale_data.currencies.find_first_index(key).value();
  309. long_currencies[index] = locale_data.unique_strings.ensure(long_name.as_string());
  310. short_currencies[index] = locale_data.unique_strings.ensure(short_name.as_string());
  311. narrow_currencies[index] = narrow_name.is_null() ? 0 : locale_data.unique_strings.ensure(narrow_name.as_string());
  312. numeric_currencies[index] = locale_data.unique_strings.ensure(numeric_name.is_null() ? long_name.as_string() : numeric_name.as_string());
  313. });
  314. locale.long_currencies = locale_data.unique_currency_lists.ensure(move(long_currencies));
  315. locale.short_currencies = locale_data.unique_currency_lists.ensure(move(short_currencies));
  316. locale.narrow_currencies = locale_data.unique_currency_lists.ensure(move(narrow_currencies));
  317. locale.numeric_currencies = locale_data.unique_currency_lists.ensure(move(numeric_currencies));
  318. return {};
  319. }
  320. static ErrorOr<void> parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData& locale_data, KeywordList& keywords)
  321. {
  322. static constexpr StringView key = "nu"sv;
  323. LexicalPath numbers_path(move(locale_numbers_path));
  324. numbers_path = numbers_path.append("numbers.json"sv);
  325. auto numbers_file = TRY(Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly));
  326. auto numbers = TRY(JsonValue::from_string(numbers_file->read_all()));
  327. auto const& main_object = numbers.as_object().get("main"sv);
  328. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  329. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  330. auto const& default_numbering_system_object = locale_numbers_object.as_object().get("defaultNumberingSystem"sv);
  331. auto const& other_numbering_systems_object = locale_numbers_object.as_object().get("otherNumberingSystems"sv);
  332. Vector<String> keyword_values {};
  333. keyword_values.append(default_numbering_system_object.as_string());
  334. other_numbering_systems_object.as_object().for_each_member([&](auto const&, JsonValue const& value) {
  335. auto keyword_value = value.as_string();
  336. if (!keyword_values.contains_slow(keyword_value))
  337. keyword_values.append(move(keyword_value));
  338. });
  339. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  340. if (!key.starts_with("defaultNumberingSystem-alt-"sv))
  341. return;
  342. auto keyword_value = value.as_string();
  343. if (!keyword_values.contains_slow(keyword_value))
  344. keyword_values.append(move(keyword_value));
  345. });
  346. StringBuilder builder;
  347. builder.join(',', keyword_values);
  348. auto index = locale_data.keywords.find_first_index(key).value();
  349. keywords[index] = locale_data.unique_strings.ensure(builder.build());
  350. return {};
  351. }
  352. static ErrorOr<void> parse_calendar_keywords(String locale_dates_path, UnicodeLocaleData& locale_data, KeywordList& keywords)
  353. {
  354. static constexpr StringView key = "ca"sv;
  355. auto calendars_iterator = TRY(path_to_dir_iterator(locale_dates_path, {}));
  356. Vector<String> keyword_values {};
  357. while (calendars_iterator.has_next()) {
  358. auto locale_calendars_path = TRY(next_path_from_dir_iterator(calendars_iterator));
  359. LexicalPath calendars_path(move(locale_calendars_path));
  360. if (!calendars_path.basename().starts_with("ca-"sv))
  361. continue;
  362. auto calendars_file = TRY(Core::File::open(calendars_path.string(), Core::OpenMode::ReadOnly));
  363. auto calendars = TRY(JsonValue::from_string(calendars_file->read_all()));
  364. auto const& main_object = calendars.as_object().get("main"sv);
  365. auto const& locale_object = main_object.as_object().get(calendars_path.parent().basename());
  366. auto const& dates_object = locale_object.as_object().get("dates"sv);
  367. auto const& calendars_object = dates_object.as_object().get("calendars"sv);
  368. calendars_object.as_object().for_each_member([&](auto const& calendar_name, JsonValue const&) {
  369. // The generic calendar is not a supported Unicode calendar key, so skip it:
  370. // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
  371. if (calendar_name == "generic"sv)
  372. return;
  373. // FIXME: Similar to the calendar aliases defined in GenerateUnicodeDateTimeFormat, this
  374. // should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158
  375. if (calendar_name == "gregorian"sv)
  376. keyword_values.append("gregory"sv);
  377. else
  378. keyword_values.append(calendar_name);
  379. });
  380. }
  381. StringBuilder builder;
  382. builder.join(',', keyword_values);
  383. auto index = locale_data.keywords.find_first_index(key).value();
  384. keywords[index] = locale_data.unique_strings.ensure(builder.build());
  385. return {};
  386. }
  387. static ErrorOr<void> parse_default_content_locales(String core_path, UnicodeLocaleData& locale_data)
  388. {
  389. LexicalPath default_content_path(move(core_path));
  390. default_content_path = default_content_path.append("defaultContent.json"sv);
  391. auto default_content_file = TRY(Core::File::open(default_content_path.string(), Core::OpenMode::ReadOnly));
  392. auto default_content = TRY(JsonValue::from_string(default_content_file->read_all()));
  393. auto const& default_content_array = default_content.as_object().get("defaultContent"sv);
  394. default_content_array.as_array().for_each([&](JsonValue const& value) {
  395. auto locale = value.as_string();
  396. StringView default_locale = locale;
  397. while (true) {
  398. if (locale_data.locales.contains(default_locale))
  399. break;
  400. auto pos = default_locale.find_last('-');
  401. if (!pos.has_value())
  402. return;
  403. default_locale = default_locale.substring_view(0, *pos);
  404. }
  405. if (default_locale != locale)
  406. locale_data.locale_aliases.append({ default_locale, move(locale) });
  407. });
  408. return {};
  409. }
  410. static ErrorOr<void> define_aliases_without_scripts(UnicodeLocaleData& locale_data)
  411. {
  412. // From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
  413. //
  414. // For locales that include a script subtag in addition to language and region, the
  415. // corresponding locale without a script subtag must also be supported.
  416. //
  417. // So we define aliases for locales that contain all three subtags, but we must also take
  418. // care to handle when the locale itself or the locale without a script subtag are an alias
  419. // by way of default-content locales.
  420. auto find_alias = [&](auto const& locale) {
  421. return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; });
  422. };
  423. auto append_alias_without_script = [&](auto const& locale) -> ErrorOr<void> {
  424. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale));
  425. if ((parsed_locale.language == 0) || (parsed_locale.script == 0) || (parsed_locale.region == 0))
  426. return {};
  427. auto locale_without_script = String::formatted("{}-{}",
  428. locale_data.unique_strings.get(parsed_locale.language),
  429. locale_data.unique_strings.get(parsed_locale.region));
  430. if (locale_data.locales.contains(locale_without_script))
  431. return {};
  432. if (find_alias(locale_without_script) != locale_data.locale_aliases.end())
  433. return {};
  434. if (auto it = find_alias(locale); it != locale_data.locale_aliases.end())
  435. locale_data.locale_aliases.append({ it->name, locale_without_script });
  436. else
  437. locale_data.locale_aliases.append({ locale, locale_without_script });
  438. return {};
  439. };
  440. for (auto const& locale : locale_data.locales)
  441. TRY(append_alias_without_script(locale.key));
  442. for (auto const& locale : locale_data.locale_aliases)
  443. TRY(append_alias_without_script(locale.alias));
  444. return {};
  445. }
  446. static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, String dates_path, UnicodeLocaleData& locale_data)
  447. {
  448. auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path));
  449. auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path)));
  450. auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path)));
  451. auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
  452. auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
  453. LexicalPath core_supplemental_path(core_path);
  454. core_supplemental_path = core_supplemental_path.append("supplemental"sv);
  455. VERIFY(Core::File::is_directory(core_supplemental_path.string()));
  456. TRY(parse_core_aliases(core_supplemental_path.string(), locale_data));
  457. TRY(parse_likely_subtags(core_supplemental_path.string(), locale_data));
  458. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  459. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
  460. StringBuilder builder;
  461. builder.append(locale_data.unique_strings.get(parsed_locale.language));
  462. if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
  463. builder.appendff("-{}", script);
  464. if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
  465. builder.appendff("-{}", region);
  466. return builder.build();
  467. };
  468. while (identity_iterator.has_next()) {
  469. auto locale_path = TRY(next_path_from_dir_iterator(identity_iterator));
  470. auto language = TRY(remove_variants_from_path(locale_path));
  471. auto& locale = locale_data.locales.ensure(language);
  472. TRY(parse_identity(locale_path, locale_data, locale));
  473. }
  474. quick_sort(locale_data.languages);
  475. quick_sort(locale_data.territories);
  476. quick_sort(locale_data.scripts);
  477. HashMap<String, KeywordList> keywords;
  478. auto ensure_keyword_list = [&](auto const& language) -> KeywordList& {
  479. return keywords.ensure(language, [&]() {
  480. KeywordList keywords;
  481. keywords.resize(locale_data.keywords.size());
  482. return keywords;
  483. });
  484. };
  485. while (locale_names_iterator.has_next()) {
  486. auto locale_path = TRY(next_path_from_dir_iterator(locale_names_iterator));
  487. auto language = TRY(remove_variants_from_path(locale_path));
  488. auto& locale = locale_data.locales.ensure(language);
  489. TRY(parse_locale_languages(locale_path, locale_data, locale));
  490. TRY(parse_locale_territories(locale_path, locale_data, locale));
  491. TRY(parse_locale_scripts(locale_path, locale_data, locale));
  492. }
  493. while (misc_iterator.has_next()) {
  494. auto misc_path = TRY(next_path_from_dir_iterator(misc_iterator));
  495. auto language = TRY(remove_variants_from_path(misc_path));
  496. auto& locale = locale_data.locales.ensure(language);
  497. TRY(parse_locale_list_patterns(misc_path, locale_data, locale));
  498. }
  499. while (numbers_iterator.has_next()) {
  500. auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
  501. auto language = TRY(remove_variants_from_path(numbers_path));
  502. auto& locale = locale_data.locales.ensure(language);
  503. TRY(parse_locale_currencies(numbers_path, locale_data, locale));
  504. auto& keywords = ensure_keyword_list(language);
  505. TRY(parse_numeric_keywords(numbers_path, locale_data, keywords));
  506. }
  507. while (dates_iterator.has_next()) {
  508. auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
  509. auto language = TRY(remove_variants_from_path(dates_path));
  510. auto& keywords = ensure_keyword_list(language);
  511. TRY(parse_calendar_keywords(dates_path, locale_data, keywords));
  512. }
  513. TRY(parse_default_content_locales(move(core_path), locale_data));
  514. TRY(define_aliases_without_scripts(locale_data));
  515. for (auto& list : keywords) {
  516. auto& locale = locale_data.locales.find(list.key)->value;
  517. locale.keywords = locale_data.unique_keyword_lists.ensure(move(list.value));
  518. }
  519. return {};
  520. }
  521. static String format_identifier(StringView owner, String identifier)
  522. {
  523. identifier = identifier.replace("-"sv, "_"sv, true);
  524. if (all_of(identifier, is_ascii_digit))
  525. return String::formatted("{}_{}", owner[0], identifier);
  526. if (is_ascii_lower_alpha(identifier[0]))
  527. return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
  528. return identifier;
  529. }
  530. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  531. {
  532. StringBuilder builder;
  533. SourceGenerator generator { builder };
  534. generator.append(R"~~~(
  535. #pragma once
  536. #include <AK/Optional.h>
  537. #include <AK/StringView.h>
  538. #include <AK/Types.h>
  539. #include <AK/Vector.h>
  540. #include <LibUnicode/Forward.h>
  541. namespace Unicode {
  542. )~~~");
  543. auto locales = locale_data.locales.keys();
  544. generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, locale_data.locale_aliases);
  545. generate_enum(generator, format_identifier, "Language"sv, {}, locale_data.languages);
  546. generate_enum(generator, format_identifier, "Territory"sv, {}, locale_data.territories);
  547. generate_enum(generator, format_identifier, "ScriptTag"sv, {}, locale_data.scripts);
  548. generate_enum(generator, format_identifier, "Currency"sv, {}, locale_data.currencies);
  549. generate_enum(generator, format_identifier, "Key"sv, {}, locale_data.keywords);
  550. generate_enum(generator, format_identifier, "Variant"sv, {}, locale_data.variants);
  551. generate_enum(generator, format_identifier, "ListPatternType"sv, {}, locale_data.list_pattern_types);
  552. generate_enum(generator, format_identifier, "ListPatternStyle"sv, {}, locale_data.list_pattern_styles);
  553. generator.append(R"~~~(
  554. namespace Detail {
  555. Optional<Locale> locale_from_string(StringView locale);
  556. Optional<StringView> get_locale_language_mapping(StringView locale, StringView language);
  557. Optional<Language> language_from_string(StringView language);
  558. Optional<StringView> resolve_language_alias(StringView language);
  559. Optional<StringView> get_locale_territory_mapping(StringView locale, StringView territory);
  560. Optional<Territory> territory_from_string(StringView territory);
  561. Optional<StringView> resolve_territory_alias(StringView territory);
  562. Optional<StringView> get_locale_script_tag_mapping(StringView locale, StringView script_tag);
  563. Optional<ScriptTag> script_tag_from_string(StringView script_tag);
  564. Optional<StringView> resolve_script_tag_alias(StringView script_tag);
  565. Optional<StringView> get_locale_long_currency_mapping(StringView locale, StringView currency);
  566. Optional<StringView> get_locale_short_currency_mapping(StringView locale, StringView currency);
  567. Optional<StringView> get_locale_narrow_currency_mapping(StringView locale, StringView currency);
  568. Optional<StringView> get_locale_numeric_currency_mapping(StringView locale, StringView currency);
  569. Optional<Currency> currency_from_string(StringView currency);
  570. Optional<StringView> get_locale_key_mapping(StringView locale, StringView key);
  571. Optional<Key> key_from_string(StringView key);
  572. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style);
  573. Optional<ListPatternType> list_pattern_type_from_string(StringView list_pattern_type);
  574. Optional<ListPatternStyle> list_pattern_style_from_string(StringView list_pattern_style);
  575. Optional<StringView> resolve_variant_alias(StringView variant);
  576. Optional<StringView> resolve_subdivision_alias(StringView subdivision);
  577. void resolve_complex_language_aliases(Unicode::LanguageID& language_id);
  578. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id);
  579. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id);
  580. }
  581. }
  582. )~~~");
  583. VERIFY(file.write(generator.as_string_view()));
  584. }
  585. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  586. {
  587. StringBuilder builder;
  588. SourceGenerator generator { builder };
  589. generator.set("string_index_type"sv, s_string_index_type);
  590. generator.set("locales_size"sv, String::number(locale_data.locales.size()));
  591. generator.set("territories_size", String::number(locale_data.territories.size()));
  592. generator.set("variants_size", String::number(locale_data.max_variant_size));
  593. generator.append(R"~~~(
  594. #include <AK/Array.h>
  595. #include <AK/BinarySearch.h>
  596. #include <AK/Span.h>
  597. #include <LibUnicode/Locale.h>
  598. #include <LibUnicode/UnicodeLocale.h>
  599. namespace Unicode::Detail {
  600. struct Patterns {
  601. ListPatternType type;
  602. ListPatternStyle style;
  603. @string_index_type@ start { 0 };
  604. @string_index_type@ middle { 0 };
  605. @string_index_type@ end { 0 };
  606. @string_index_type@ pair { 0 };
  607. };
  608. )~~~");
  609. locale_data.unique_strings.generate(generator);
  610. locale_data.unique_language_lists.generate(generator, s_string_index_type, "s_language_lists"sv);
  611. locale_data.unique_territory_lists.generate(generator, s_string_index_type, "s_territory_lists"sv);
  612. locale_data.unique_script_lists.generate(generator, s_string_index_type, "s_script_lists"sv);
  613. locale_data.unique_currency_lists.generate(generator, s_string_index_type, "s_currency_lists"sv);
  614. locale_data.unique_keyword_lists.generate(generator, s_string_index_type, "s_keyword_lists"sv);
  615. auto append_index = [&](auto index) {
  616. generator.append(String::formatted(", {}", index));
  617. };
  618. auto append_list_and_size = [&](auto const& list) {
  619. if (list.is_empty()) {
  620. generator.append(", {}, 0");
  621. return;
  622. }
  623. bool first = true;
  624. generator.append(", {");
  625. for (auto const& item : list) {
  626. generator.append(first ? " " : ", ");
  627. generator.append(String::number(item));
  628. first = false;
  629. }
  630. generator.append(String::formatted(" }}, {}", list.size()));
  631. };
  632. auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) {
  633. generator.set("type", type);
  634. generator.set("name", name);
  635. generator.set("size", String::number(keys.size()));
  636. generator.append(R"~~~(
  637. static constexpr Array<@type@, @size@> @name@ { {)~~~");
  638. bool first = true;
  639. for (auto const& key : keys) {
  640. auto const& value = map.find(key)->value;
  641. auto mapping = mapping_getter(value);
  642. generator.append(first ? " " : ", ");
  643. generator.append(String::number(mapping));
  644. first = false;
  645. }
  646. generator.append(" } };");
  647. };
  648. auto append_list_patterns = [&](StringView name, Vector<ListPatterns> const& list_patterns) {
  649. generator.set("name", name);
  650. generator.set("size", String::number(list_patterns.size()));
  651. generator.append(R"~~~(
  652. static constexpr Array<Patterns, @size@> @name@ { {)~~~");
  653. for (auto const& list_pattern : list_patterns) {
  654. generator.set("type"sv, String::formatted("ListPatternType::{}", format_identifier({}, list_pattern.type)));
  655. generator.set("style"sv, String::formatted("ListPatternStyle::{}", format_identifier({}, list_pattern.style)));
  656. generator.set("start"sv, String::number(list_pattern.start));
  657. generator.set("middle"sv, String::number(list_pattern.middle));
  658. generator.set("end"sv, String::number(list_pattern.end));
  659. generator.set("pair"sv, String::number(list_pattern.pair));
  660. generator.append(R"~~~(
  661. { @type@, @style@, @start@, @middle@, @end@, @pair@ },)~~~");
  662. }
  663. generator.append(R"~~~(
  664. } };
  665. )~~~");
  666. };
  667. auto locales = locale_data.locales.keys();
  668. quick_sort(locales);
  669. append_mapping(locales, locale_data.locales, s_language_list_index_type, "s_languages"sv, [&](auto const& locale) { return locale.languages; });
  670. append_mapping(locales, locale_data.locales, s_territory_list_index_type, "s_territories"sv, [&](auto const& locale) { return locale.territories; });
  671. append_mapping(locales, locale_data.locales, s_script_list_index_type, "s_scripts"sv, [&](auto const& locale) { return locale.scripts; });
  672. append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_long_currencies"sv, [&](auto const& locale) { return locale.long_currencies; });
  673. append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_short_currencies"sv, [&](auto const& locale) { return locale.short_currencies; });
  674. append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_narrow_currencies"sv, [&](auto const& locale) { return locale.narrow_currencies; });
  675. append_mapping(locales, locale_data.locales, s_currency_list_index_type, "s_numeric_currencies"sv, [&](auto const& locale) { return locale.numeric_currencies; });
  676. append_mapping(locales, locale_data.locales, s_keyword_list_index_type, "s_keywords"sv, [&](auto const& locale) { return locale.keywords; });
  677. generate_mapping(generator, locale_data.locales, "Patterns"sv, "s_list_patterns"sv, "s_list_patterns_{}", [&](auto const& name, auto const& value) { append_list_patterns(name, value.list_patterns); });
  678. generator.append(R"~~~(
  679. struct CanonicalLanguageID {
  680. Unicode::LanguageID to_unicode_language_id() const
  681. {
  682. Unicode::LanguageID language_id {};
  683. language_id.variants.ensure_capacity(variants_size);
  684. language_id.language = s_string_list[language];
  685. if (script != 0)
  686. language_id.script = s_string_list[script];
  687. if (region != 0)
  688. language_id.region = s_string_list[region];
  689. for (size_t i = 0; i < variants_size; ++i)
  690. language_id.variants.append(s_string_list[variants[i]]);
  691. return language_id;
  692. }
  693. bool matches_variants(Vector<String> const& other_variants) const {
  694. if (variants_size == 0)
  695. return true;
  696. if (other_variants.size() != variants_size)
  697. return false;
  698. for (size_t i = 0; i < variants_size; ++i) {
  699. if (s_string_list[variants[i]] != other_variants[i])
  700. return false;
  701. }
  702. return true;
  703. };
  704. @string_index_type@ language { 0 };
  705. @string_index_type@ script { 0 };
  706. @string_index_type@ region { 0 };
  707. Array<@string_index_type@, @variants_size@> variants {};
  708. size_t variants_size { 0 };
  709. };
  710. struct LanguageMapping {
  711. CanonicalLanguageID key;
  712. CanonicalLanguageID alias;
  713. };
  714. )~~~");
  715. auto append_complex_mapping = [&](StringView name, auto& mappings) {
  716. generator.set("size", String::number(mappings.size()));
  717. generator.set("name"sv, name);
  718. generator.append(R"~~~(
  719. static constexpr Array<LanguageMapping, @size@> s_@name@ { {
  720. )~~~");
  721. quick_sort(mappings, [&](auto const& lhs, auto const& rhs) {
  722. auto const& lhs_language = locale_data.unique_strings.get(lhs.key.language);
  723. auto const& rhs_language = locale_data.unique_strings.get(rhs.key.language);
  724. // Sort the keys such that "und" language tags are at the end, as those are less specific.
  725. if (lhs_language.starts_with("und"sv) && !rhs_language.starts_with("und"sv))
  726. return false;
  727. if (!lhs_language.starts_with("und"sv) && rhs_language.starts_with("und"sv))
  728. return true;
  729. return lhs_language < rhs_language;
  730. });
  731. for (auto const& mapping : mappings) {
  732. generator.set("language"sv, String::number(mapping.key.language));
  733. generator.append(" { { @language@");
  734. append_index(mapping.key.script);
  735. append_index(mapping.key.region);
  736. append_list_and_size(mapping.key.variants);
  737. generator.set("language"sv, String::number(mapping.alias.language));
  738. generator.append(" }, { @language@");
  739. append_index(mapping.alias.script);
  740. append_index(mapping.alias.region);
  741. append_list_and_size(mapping.alias.variants);
  742. generator.append(" } },\n");
  743. }
  744. generator.append("} };\n");
  745. };
  746. append_complex_mapping("complex_alias"sv, locale_data.complex_mappings);
  747. append_complex_mapping("likely_subtags"sv, locale_data.likely_subtags);
  748. generator.append(R"~~~(
  749. static LanguageMapping const* resolve_likely_subtag(Unicode::LanguageID const& language_id)
  750. {
  751. // https://unicode.org/reports/tr35/#Likely_Subtags
  752. enum class State {
  753. LanguageScriptRegion,
  754. LanguageRegion,
  755. LanguageScript,
  756. Language,
  757. UndScript,
  758. Done,
  759. };
  760. auto state = State::LanguageScriptRegion;
  761. while (state != State::Done) {
  762. Unicode::LanguageID search_key;
  763. switch (state) {
  764. case State::LanguageScriptRegion:
  765. state = State::LanguageRegion;
  766. if (!language_id.script.has_value() || !language_id.region.has_value())
  767. continue;
  768. search_key.language = *language_id.language;
  769. search_key.script = *language_id.script;
  770. search_key.region = *language_id.region;
  771. break;
  772. case State::LanguageRegion:
  773. state = State::LanguageScript;
  774. if (!language_id.region.has_value())
  775. continue;
  776. search_key.language = *language_id.language;
  777. search_key.region = *language_id.region;
  778. break;
  779. case State::LanguageScript:
  780. state = State::Language;
  781. if (!language_id.script.has_value())
  782. continue;
  783. search_key.language = *language_id.language;
  784. search_key.script = *language_id.script;
  785. break;
  786. case State::Language:
  787. state = State::UndScript;
  788. search_key.language = *language_id.language;
  789. break;
  790. case State::UndScript:
  791. state = State::Done;
  792. if (!language_id.script.has_value())
  793. continue;
  794. search_key.language = "und"sv;
  795. search_key.script = *language_id.script;
  796. break;
  797. default:
  798. VERIFY_NOT_REACHED();
  799. }
  800. for (auto const& map : s_likely_subtags) {
  801. auto const& key_language = s_string_list[map.key.language];
  802. auto const& key_script = s_string_list[map.key.script];
  803. auto const& key_region = s_string_list[map.key.region];
  804. if (key_language != search_key.language)
  805. continue;
  806. if (!key_script.is_empty() || search_key.script.has_value()) {
  807. if (key_script != search_key.script)
  808. continue;
  809. }
  810. if (!key_region.is_empty() || search_key.region.has_value()) {
  811. if (key_region != search_key.region)
  812. continue;
  813. }
  814. return &map;
  815. }
  816. }
  817. return nullptr;
  818. }
  819. )~~~");
  820. auto append_mapping_search = [&](StringView enum_snake, StringView from_string_name, StringView collection_name, StringView unique_list) {
  821. generator.set("enum_snake", enum_snake);
  822. generator.set("from_string_name", from_string_name);
  823. generator.set("collection_name", collection_name);
  824. generator.set("unique_list", unique_list);
  825. generator.append(R"~~~(
  826. Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@)
  827. {
  828. auto locale_value = locale_from_string(locale);
  829. if (!locale_value.has_value())
  830. return {};
  831. auto @enum_snake@_value = @from_string_name@_from_string(@enum_snake@);
  832. if (!@enum_snake@_value.has_value())
  833. return {};
  834. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  835. auto @enum_snake@_index = to_underlying(*@enum_snake@_value);
  836. auto mapping_index = @collection_name@.at(locale_index);
  837. auto const& mappings = @unique_list@.at(mapping_index);
  838. auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
  839. auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
  840. if (@enum_snake@_mapping.is_empty())
  841. return {};
  842. return @enum_snake@_mapping;
  843. }
  844. )~~~");
  845. };
  846. auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) {
  847. HashValueMap<String> hashes;
  848. hashes.ensure_capacity(values.size());
  849. for (auto const& value : values)
  850. hashes.set(value.hash(), format_identifier(enum_title, value));
  851. for (auto const& alias : aliases)
  852. hashes.set(alias.alias.hash(), format_identifier(enum_title, alias.alias));
  853. generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
  854. };
  855. auto append_alias_search = [&](StringView enum_snake, auto const& aliases) {
  856. HashValueMap<StringIndexType> hashes;
  857. hashes.ensure_capacity(aliases.size());
  858. for (auto const& alias : aliases)
  859. hashes.set(alias.key.hash(), alias.value);
  860. generate_value_from_string(generator, "resolve_{}_alias"sv, s_string_index_type, enum_snake, move(hashes), "StringView"sv, "s_string_list[{}]"sv);
  861. };
  862. append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys(), locale_data.locale_aliases);
  863. append_mapping_search("language"sv, "language"sv, "s_languages"sv, "s_language_lists"sv);
  864. append_from_string("Language"sv, "language"sv, locale_data.languages);
  865. append_alias_search("language"sv, locale_data.language_aliases);
  866. append_mapping_search("territory"sv, "territory"sv, "s_territories"sv, "s_territory_lists"sv);
  867. append_from_string("Territory"sv, "territory"sv, locale_data.territories);
  868. append_alias_search("territory"sv, locale_data.territory_aliases);
  869. append_mapping_search("script_tag"sv, "script_tag"sv, "s_scripts"sv, "s_script_lists"sv);
  870. append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts);
  871. append_alias_search("script_tag"sv, locale_data.script_aliases);
  872. append_mapping_search("long_currency"sv, "currency"sv, "s_long_currencies"sv, "s_currency_lists"sv);
  873. append_mapping_search("short_currency"sv, "currency"sv, "s_short_currencies"sv, "s_currency_lists"sv);
  874. append_mapping_search("narrow_currency"sv, "currency"sv, "s_narrow_currencies"sv, "s_currency_lists"sv);
  875. append_mapping_search("numeric_currency"sv, "currency"sv, "s_numeric_currencies"sv, "s_currency_lists"sv);
  876. append_from_string("Currency"sv, "currency"sv, locale_data.currencies);
  877. append_mapping_search("key"sv, "key"sv, "s_keywords"sv, "s_keyword_lists"sv);
  878. append_from_string("Key"sv, "key"sv, locale_data.keywords);
  879. append_alias_search("variant"sv, locale_data.variant_aliases);
  880. append_alias_search("subdivision"sv, locale_data.subdivision_aliases);
  881. append_from_string("ListPatternType"sv, "list_pattern_type"sv, locale_data.list_pattern_types);
  882. append_from_string("ListPatternStyle"sv, "list_pattern_style"sv, locale_data.list_pattern_styles);
  883. generator.append(R"~~~(
  884. Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style)
  885. {
  886. auto locale_value = locale_from_string(locale);
  887. if (!locale_value.has_value())
  888. return {};
  889. auto type_value = list_pattern_type_from_string(list_pattern_type);
  890. if (!type_value.has_value())
  891. return {};
  892. auto style_value = list_pattern_style_from_string(list_pattern_style);
  893. if (!style_value.has_value())
  894. return {};
  895. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  896. auto const& locale_list_patterns = s_list_patterns.at(locale_index);
  897. for (auto const& list_patterns : locale_list_patterns) {
  898. if ((list_patterns.type == type_value) && (list_patterns.style == style_value)) {
  899. auto const& start = s_string_list[list_patterns.start];
  900. auto const& middle = s_string_list[list_patterns.middle];
  901. auto const& end = s_string_list[list_patterns.end];
  902. auto const& pair = s_string_list[list_patterns.pair];
  903. return ListPatterns { start, middle, end, pair };
  904. }
  905. }
  906. return {};
  907. }
  908. void resolve_complex_language_aliases(Unicode::LanguageID& language_id)
  909. {
  910. for (auto const& map : s_complex_alias) {
  911. auto const& key_language = s_string_list[map.key.language];
  912. auto const& key_script = s_string_list[map.key.script];
  913. auto const& key_region = s_string_list[map.key.region];
  914. if ((key_language != language_id.language) && (key_language != "und"sv))
  915. continue;
  916. if (!key_script.is_empty() && (key_script != language_id.script))
  917. continue;
  918. if (!key_region.is_empty() && (key_region != language_id.region))
  919. continue;
  920. if (!map.key.matches_variants(language_id.variants))
  921. continue;
  922. auto alias = map.alias.to_unicode_language_id();
  923. if (alias.language == "und"sv)
  924. alias.language = move(language_id.language);
  925. if (key_script.is_empty() && !alias.script.has_value())
  926. alias.script = move(language_id.script);
  927. if (key_region.is_empty() && !alias.region.has_value())
  928. alias.region = move(language_id.region);
  929. if (map.key.variants_size == 0 && alias.variants.is_empty())
  930. alias.variants = move(language_id.variants);
  931. language_id = move(alias);
  932. break;
  933. }
  934. }
  935. Optional<Unicode::LanguageID> add_likely_subtags(Unicode::LanguageID const& language_id)
  936. {
  937. // https://www.unicode.org/reports/tr35/#Likely_Subtags
  938. auto const* likely_subtag = resolve_likely_subtag(language_id);
  939. if (likely_subtag == nullptr)
  940. return {};
  941. auto maximized = language_id;
  942. auto const& key_script = s_string_list[likely_subtag->key.script];
  943. auto const& key_region = s_string_list[likely_subtag->key.region];
  944. auto const& alias_language = s_string_list[likely_subtag->alias.language];
  945. auto const& alias_script = s_string_list[likely_subtag->alias.script];
  946. auto const& alias_region = s_string_list[likely_subtag->alias.region];
  947. if (maximized.language == "und"sv)
  948. maximized.language = alias_language;
  949. if (!maximized.script.has_value() || (!key_script.is_empty() && !alias_script.is_empty()))
  950. maximized.script = alias_script;
  951. if (!maximized.region.has_value() || (!key_region.is_empty() && !alias_region.is_empty()))
  952. maximized.region = alias_region;
  953. return maximized;
  954. }
  955. Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& language_id)
  956. {
  957. if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
  958. return s_string_list[likely_subtag->alias.region];
  959. return {};
  960. }
  961. }
  962. )~~~");
  963. VERIFY(file.write(generator.as_string_view()));
  964. }
  965. ErrorOr<int> serenity_main(Main::Arguments arguments)
  966. {
  967. StringView generated_header_path;
  968. StringView generated_implementation_path;
  969. StringView core_path;
  970. StringView locale_names_path;
  971. StringView misc_path;
  972. StringView numbers_path;
  973. StringView dates_path;
  974. Core::ArgsParser args_parser;
  975. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  976. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  977. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  978. args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path");
  979. args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path");
  980. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  981. args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
  982. args_parser.parse(arguments);
  983. auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
  984. if (path.is_empty()) {
  985. args_parser.print_usage(stderr, arguments.argv[0]);
  986. return Error::from_string_literal("Must provide all command line options"sv);
  987. }
  988. return Core::File::open(path, Core::OpenMode::ReadWrite);
  989. };
  990. auto generated_header_file = TRY(open_file(generated_header_path));
  991. auto generated_implementation_file = TRY(open_file(generated_implementation_path));
  992. UnicodeLocaleData locale_data;
  993. TRY(parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, dates_path, locale_data));
  994. generate_unicode_locale_header(generated_header_file, locale_data);
  995. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  996. return 0;
  997. }