GenerateUnicodeNumberFormat.cpp 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
  1. /*
  2. * Copyright (c) 2021-2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AllOf.h>
  8. #include <AK/Array.h>
  9. #include <AK/CharacterTypes.h>
  10. #include <AK/Find.h>
  11. #include <AK/Format.h>
  12. #include <AK/HashFunctions.h>
  13. #include <AK/HashMap.h>
  14. #include <AK/JsonObject.h>
  15. #include <AK/JsonParser.h>
  16. #include <AK/JsonValue.h>
  17. #include <AK/LexicalPath.h>
  18. #include <AK/QuickSort.h>
  19. #include <AK/SourceGenerator.h>
  20. #include <AK/String.h>
  21. #include <AK/StringBuilder.h>
  22. #include <AK/Traits.h>
  23. #include <AK/Utf8View.h>
  24. #include <LibCore/ArgsParser.h>
  25. #include <LibCore/DirIterator.h>
  26. #include <LibCore/File.h>
  27. #include <LibCore/Stream.h>
  28. #include <LibJS/Runtime/Intl/AbstractOperations.h>
  29. #include <LibUnicode/Locale.h>
  30. #include <LibUnicode/NumberFormat.h>
  31. #include <LibUnicode/PluralRules.h>
  32. #include <math.h>
  33. using StringIndexType = u16;
  34. constexpr auto s_string_index_type = "u16"sv;
  35. using NumberFormatIndexType = u16;
  36. constexpr auto s_number_format_index_type = "u16"sv;
  37. using NumberFormatListIndexType = u16;
  38. constexpr auto s_number_format_list_index_type = "u16"sv;
  39. using NumericSymbolListIndexType = u8;
  40. constexpr auto s_numeric_symbol_list_index_type = "u8"sv;
  41. using NumberSystemIndexType = u8;
  42. constexpr auto s_number_system_index_type = "u8"sv;
  43. using UnitIndexType = u16;
  44. constexpr auto s_unit_index_type = "u16"sv;
  45. enum class NumberFormatType {
  46. Standard,
  47. Compact,
  48. };
  49. struct NumberFormat : public Unicode::NumberFormat {
  50. using Base = Unicode::NumberFormat;
  51. unsigned hash() const
  52. {
  53. auto hash = pair_int_hash(magnitude, exponent);
  54. hash = pair_int_hash(hash, to_underlying(plurality));
  55. hash = pair_int_hash(hash, zero_format_index);
  56. hash = pair_int_hash(hash, positive_format_index);
  57. hash = pair_int_hash(hash, negative_format_index);
  58. for (auto index : identifier_indices)
  59. hash = pair_int_hash(hash, index);
  60. return hash;
  61. }
  62. bool operator==(NumberFormat const& other) const
  63. {
  64. return (magnitude == other.magnitude)
  65. && (exponent == other.exponent)
  66. && (plurality == other.plurality)
  67. && (zero_format_index == other.zero_format_index)
  68. && (positive_format_index == other.positive_format_index)
  69. && (negative_format_index == other.negative_format_index)
  70. && (identifier_indices == other.identifier_indices);
  71. }
  72. StringIndexType zero_format_index { 0 };
  73. StringIndexType positive_format_index { 0 };
  74. StringIndexType negative_format_index { 0 };
  75. Vector<StringIndexType> identifier_indices {};
  76. };
  77. template<>
  78. struct AK::Formatter<NumberFormat> : Formatter<FormatString> {
  79. ErrorOr<void> format(FormatBuilder& builder, NumberFormat const& format)
  80. {
  81. StringBuilder identifier_indices;
  82. identifier_indices.join(", "sv, format.identifier_indices);
  83. return Formatter<FormatString>::format(builder,
  84. "{{ {}, {}, {}, {}, {}, {}, {{ {} }} }}"sv,
  85. format.magnitude,
  86. format.exponent,
  87. to_underlying(format.plurality),
  88. format.zero_format_index,
  89. format.positive_format_index,
  90. format.negative_format_index,
  91. identifier_indices.build());
  92. }
  93. };
  94. template<>
  95. struct AK::Traits<NumberFormat> : public GenericTraits<NumberFormat> {
  96. static unsigned hash(NumberFormat const& f) { return f.hash(); }
  97. };
  98. using NumberFormatList = Vector<NumberFormatIndexType>;
  99. using NumericSymbolList = Vector<StringIndexType>;
  100. struct NumberSystem {
  101. unsigned hash() const
  102. {
  103. auto hash = int_hash(symbols);
  104. hash = pair_int_hash(hash, primary_grouping_size);
  105. hash = pair_int_hash(hash, secondary_grouping_size);
  106. hash = pair_int_hash(hash, decimal_format);
  107. hash = pair_int_hash(hash, decimal_long_formats);
  108. hash = pair_int_hash(hash, decimal_short_formats);
  109. hash = pair_int_hash(hash, currency_format);
  110. hash = pair_int_hash(hash, accounting_format);
  111. hash = pair_int_hash(hash, currency_unit_formats);
  112. hash = pair_int_hash(hash, currency_short_formats);
  113. hash = pair_int_hash(hash, percent_format);
  114. hash = pair_int_hash(hash, scientific_format);
  115. return hash;
  116. }
  117. bool operator==(NumberSystem const& other) const
  118. {
  119. return (symbols == other.symbols)
  120. && (primary_grouping_size == other.primary_grouping_size)
  121. && (secondary_grouping_size == other.secondary_grouping_size)
  122. && (decimal_format == other.decimal_format)
  123. && (decimal_long_formats == other.decimal_long_formats)
  124. && (decimal_short_formats == other.decimal_short_formats)
  125. && (currency_format == other.currency_format)
  126. && (accounting_format == other.accounting_format)
  127. && (currency_unit_formats == other.currency_unit_formats)
  128. && (currency_short_formats == other.currency_short_formats)
  129. && (percent_format == other.percent_format)
  130. && (scientific_format == other.scientific_format);
  131. }
  132. NumericSymbolListIndexType symbols { 0 };
  133. u8 primary_grouping_size { 0 };
  134. u8 secondary_grouping_size { 0 };
  135. NumberFormatIndexType decimal_format { 0 };
  136. NumberFormatListIndexType decimal_long_formats { 0 };
  137. NumberFormatListIndexType decimal_short_formats { 0 };
  138. NumberFormatIndexType currency_format { 0 };
  139. NumberFormatIndexType accounting_format { 0 };
  140. NumberFormatListIndexType currency_unit_formats { 0 };
  141. NumberFormatListIndexType currency_short_formats { 0 };
  142. NumberFormatIndexType percent_format { 0 };
  143. NumberFormatIndexType scientific_format { 0 };
  144. };
  145. template<>
  146. struct AK::Formatter<NumberSystem> : Formatter<FormatString> {
  147. ErrorOr<void> format(FormatBuilder& builder, NumberSystem const& system)
  148. {
  149. return Formatter<FormatString>::format(builder,
  150. "{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}"sv,
  151. system.symbols,
  152. system.primary_grouping_size,
  153. system.secondary_grouping_size,
  154. system.decimal_format,
  155. system.decimal_long_formats,
  156. system.decimal_short_formats,
  157. system.currency_format,
  158. system.accounting_format,
  159. system.currency_unit_formats,
  160. system.currency_short_formats,
  161. system.percent_format,
  162. system.scientific_format);
  163. }
  164. };
  165. template<>
  166. struct AK::Traits<NumberSystem> : public GenericTraits<NumberSystem> {
  167. static unsigned hash(NumberSystem const& s) { return s.hash(); }
  168. };
  169. struct Unit {
  170. unsigned hash() const
  171. {
  172. auto hash = int_hash(unit);
  173. hash = pair_int_hash(hash, long_formats);
  174. hash = pair_int_hash(hash, short_formats);
  175. hash = pair_int_hash(hash, narrow_formats);
  176. return hash;
  177. }
  178. bool operator==(Unit const& other) const
  179. {
  180. return (unit == other.unit)
  181. && (long_formats == other.long_formats)
  182. && (short_formats == other.short_formats)
  183. && (narrow_formats == other.narrow_formats);
  184. }
  185. StringIndexType unit { 0 };
  186. NumberFormatListIndexType long_formats { 0 };
  187. NumberFormatListIndexType short_formats { 0 };
  188. NumberFormatListIndexType narrow_formats { 0 };
  189. };
  190. template<>
  191. struct AK::Formatter<Unit> : Formatter<FormatString> {
  192. ErrorOr<void> format(FormatBuilder& builder, Unit const& system)
  193. {
  194. return Formatter<FormatString>::format(builder,
  195. "{{ {}, {}, {}, {} }}"sv,
  196. system.unit,
  197. system.long_formats,
  198. system.short_formats,
  199. system.narrow_formats);
  200. }
  201. };
  202. template<>
  203. struct AK::Traits<Unit> : public GenericTraits<Unit> {
  204. static unsigned hash(Unit const& u) { return u.hash(); }
  205. };
  206. struct Locale {
  207. Vector<NumberSystemIndexType> number_systems;
  208. HashMap<String, UnitIndexType> units {};
  209. u8 minimum_grouping_digits { 0 };
  210. };
  211. struct UnicodeLocaleData {
  212. UniqueStringStorage<StringIndexType> unique_strings;
  213. UniqueStorage<NumberFormat, NumberFormatIndexType> unique_formats;
  214. UniqueStorage<NumberFormatList, NumberFormatListIndexType> unique_format_lists;
  215. UniqueStorage<NumericSymbolList, NumericSymbolListIndexType> unique_symbols;
  216. UniqueStorage<NumberSystem, NumberSystemIndexType> unique_systems;
  217. UniqueStorage<Unit, UnitIndexType> unique_units;
  218. HashMap<String, Array<u32, 10>> number_system_digits;
  219. Vector<String> number_systems;
  220. HashMap<String, Locale> locales;
  221. size_t max_identifier_count { 0 };
  222. };
  223. static ErrorOr<void> parse_number_system_digits(String core_supplemental_path, UnicodeLocaleData& locale_data)
  224. {
  225. LexicalPath number_systems_path(move(core_supplemental_path));
  226. number_systems_path = number_systems_path.append("numberingSystems.json"sv);
  227. auto number_systems = TRY(read_json_file(number_systems_path.string()));
  228. auto const& supplemental_object = number_systems.as_object().get("supplemental"sv);
  229. auto const& number_systems_object = supplemental_object.as_object().get("numberingSystems"sv);
  230. number_systems_object.as_object().for_each_member([&](auto const& number_system, auto const& digits_object) {
  231. auto type = digits_object.as_object().get("_type"sv).as_string();
  232. if (type != "numeric"sv)
  233. return;
  234. auto digits = digits_object.as_object().get("_digits"sv).as_string();
  235. Utf8View utf8_digits { digits };
  236. VERIFY(utf8_digits.length() == 10);
  237. auto& number_system_digits = locale_data.number_system_digits.ensure(number_system);
  238. size_t index = 0;
  239. for (u32 digit : utf8_digits)
  240. number_system_digits[index++] = digit;
  241. if (!locale_data.number_systems.contains_slow(number_system))
  242. locale_data.number_systems.append(number_system);
  243. });
  244. return {};
  245. }
  246. static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
  247. {
  248. static constexpr Utf8View whitespace { "\u0020\u00a0\u200f"sv };
  249. while (true) {
  250. Utf8View utf8_pattern { pattern };
  251. Optional<size_t> start_index;
  252. Optional<size_t> end_index;
  253. bool inside_replacement = false;
  254. for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
  255. if (*it == '{') {
  256. if (start_index.has_value()) {
  257. end_index = utf8_pattern.byte_offset_of(it);
  258. break;
  259. }
  260. inside_replacement = true;
  261. } else if (*it == '}') {
  262. inside_replacement = false;
  263. } else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
  264. start_index = utf8_pattern.byte_offset_of(it);
  265. }
  266. }
  267. if (!start_index.has_value())
  268. return pattern;
  269. end_index = end_index.value_or(pattern.length());
  270. utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
  271. utf8_pattern = utf8_pattern.trim(whitespace);
  272. auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv, ReplaceMode::FirstOnly);
  273. auto identifier_index = locale_data.unique_strings.ensure(move(identifier));
  274. size_t replacement_index = 0;
  275. if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) {
  276. replacement_index = *index;
  277. } else {
  278. replacement_index = format.identifier_indices.size();
  279. format.identifier_indices.append(identifier_index);
  280. locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size());
  281. }
  282. pattern = String::formatted("{}{{{}:{}}}{}",
  283. *start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
  284. replacement,
  285. replacement_index,
  286. pattern.substring_view(*start_index + utf8_pattern.byte_length()));
  287. }
  288. }
  289. static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
  290. {
  291. // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns
  292. // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns
  293. VERIFY((patterns.size() == 1) || (patterns.size() == 2));
  294. auto replace_patterns = [&](String pattern) {
  295. static HashMap<StringView, StringView> replacements = {
  296. { "{0}"sv, "{number}"sv },
  297. { "{1}"sv, "{currency}"sv },
  298. { "%"sv, "{percentSign}"sv },
  299. { "+"sv, "{plusSign}"sv },
  300. { "-"sv, "{minusSign}"sv },
  301. { "¤"sv, "{currency}"sv }, // U+00A4 Currency Sign
  302. { "E"sv, "{scientificSeparator}"sv },
  303. };
  304. for (auto const& replacement : replacements)
  305. pattern = pattern.replace(replacement.key, replacement.value, ReplaceMode::All);
  306. if (auto start_number_index = pattern.find_any_of("#0"sv, String::SearchDirection::Forward); start_number_index.has_value()) {
  307. auto end_number_index = *start_number_index + 1;
  308. for (; end_number_index < pattern.length(); ++end_number_index) {
  309. auto ch = pattern[end_number_index];
  310. if ((ch != '#') && (ch != '0') && (ch != ',') && (ch != '.'))
  311. break;
  312. }
  313. if (number_system_for_groupings) {
  314. auto number_pattern = pattern.substring_view(*start_number_index, end_number_index - *start_number_index);
  315. auto group_separators = number_pattern.find_all(","sv);
  316. VERIFY((group_separators.size() == 1) || (group_separators.size() == 2));
  317. auto decimal = number_pattern.find('.');
  318. VERIFY(decimal.has_value());
  319. if (group_separators.size() == 1) {
  320. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[0] - 1;
  321. number_system_for_groupings->secondary_grouping_size = number_system_for_groupings->primary_grouping_size;
  322. } else {
  323. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[1] - 1;
  324. number_system_for_groupings->secondary_grouping_size = group_separators[1] - group_separators[0] - 1;
  325. }
  326. }
  327. pattern = String::formatted("{}{{number}}{}",
  328. *start_number_index > 0 ? pattern.substring_view(0, *start_number_index) : ""sv,
  329. pattern.substring_view(end_number_index));
  330. // This is specifically handled here rather than in the replacements HashMap above so
  331. // that we do not errantly replace zeroes in number patterns.
  332. if (pattern.contains(*replacements.get("E"sv)))
  333. pattern = pattern.replace("0"sv, "{scientificExponent}"sv, ReplaceMode::FirstOnly);
  334. }
  335. if (type == NumberFormatType::Compact)
  336. return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format);
  337. return pattern;
  338. };
  339. auto zero_format = replace_patterns(move(patterns[0]));
  340. format.positive_format_index = locale_data.unique_strings.ensure(String::formatted("{{plusSign}}{}", zero_format));
  341. if (patterns.size() == 2) {
  342. auto negative_format = replace_patterns(move(patterns[1]));
  343. format.negative_format_index = locale_data.unique_strings.ensure(move(negative_format));
  344. } else {
  345. format.negative_format_index = locale_data.unique_strings.ensure(String::formatted("{{minusSign}}{}", zero_format));
  346. }
  347. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  348. }
  349. static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormatIndexType& format_index, NumberSystem* number_system_for_groupings = nullptr)
  350. {
  351. NumberFormat format {};
  352. parse_number_pattern(move(patterns), locale_data, type, format, number_system_for_groupings);
  353. format_index = locale_data.unique_formats.ensure(move(format));
  354. }
  355. static ErrorOr<void> parse_number_systems(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  356. {
  357. LexicalPath numbers_path(move(locale_numbers_path));
  358. numbers_path = numbers_path.append("numbers.json"sv);
  359. auto numbers = TRY(read_json_file(numbers_path.string()));
  360. auto const& main_object = numbers.as_object().get("main"sv);
  361. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  362. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  363. auto const& minimum_grouping_digits = locale_numbers_object.as_object().get("minimumGroupingDigits"sv);
  364. Vector<Optional<NumberSystem>> number_systems;
  365. number_systems.resize(locale_data.number_systems.size());
  366. auto ensure_number_system = [&](auto const& system) -> NumberSystem& {
  367. auto system_index = locale_data.number_systems.find_first_index(system).value();
  368. VERIFY(system_index < number_systems.size());
  369. auto& number_system = number_systems.at(system_index);
  370. if (!number_system.has_value())
  371. number_system = NumberSystem {};
  372. return number_system.value();
  373. };
  374. auto parse_number_format = [&](auto const& format_object) {
  375. Vector<NumberFormatIndexType> result;
  376. result.ensure_capacity(format_object.size());
  377. format_object.for_each_member([&](auto const& key, JsonValue const& value) {
  378. auto split_key = key.split_view('-');
  379. if (split_key.size() != 3)
  380. return;
  381. auto patterns = value.as_string().split(';');
  382. NumberFormat format {};
  383. if (auto type = split_key[0].template to_uint<u64>(); type.has_value()) {
  384. VERIFY(*type % 10 == 0);
  385. format.magnitude = static_cast<u8>(log10(*type));
  386. if (patterns[0] != "0"sv) {
  387. auto number_of_zeroes_in_pattern = patterns[0].count("0"sv);
  388. VERIFY(format.magnitude >= number_of_zeroes_in_pattern);
  389. format.exponent = format.magnitude + 1 - number_of_zeroes_in_pattern;
  390. }
  391. } else {
  392. VERIFY(split_key[0] == "unitPattern"sv);
  393. }
  394. format.plurality = Unicode::plural_category_from_string(split_key[2]);
  395. parse_number_pattern(move(patterns), locale_data, NumberFormatType::Compact, format);
  396. auto format_index = locale_data.unique_formats.ensure(move(format));
  397. result.append(format_index);
  398. });
  399. return locale_data.unique_format_lists.ensure(move(result));
  400. };
  401. auto numeric_symbol_from_string = [&](StringView numeric_symbol) -> Optional<Unicode::NumericSymbol> {
  402. if (numeric_symbol == "approximatelySign"sv)
  403. return Unicode::NumericSymbol::ApproximatelySign;
  404. if (numeric_symbol == "decimal"sv)
  405. return Unicode::NumericSymbol::Decimal;
  406. if (numeric_symbol == "exponential"sv)
  407. return Unicode::NumericSymbol::Exponential;
  408. if (numeric_symbol == "group"sv)
  409. return Unicode::NumericSymbol::Group;
  410. if (numeric_symbol == "infinity"sv)
  411. return Unicode::NumericSymbol::Infinity;
  412. if (numeric_symbol == "minusSign"sv)
  413. return Unicode::NumericSymbol::MinusSign;
  414. if (numeric_symbol == "nan"sv)
  415. return Unicode::NumericSymbol::NaN;
  416. if (numeric_symbol == "percentSign"sv)
  417. return Unicode::NumericSymbol::PercentSign;
  418. if (numeric_symbol == "plusSign"sv)
  419. return Unicode::NumericSymbol::PlusSign;
  420. if (numeric_symbol == "timeSeparator"sv)
  421. return Unicode::NumericSymbol::TimeSeparator;
  422. return {};
  423. };
  424. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  425. constexpr auto symbols_prefix = "symbols-numberSystem-"sv;
  426. constexpr auto decimal_formats_prefix = "decimalFormats-numberSystem-"sv;
  427. constexpr auto currency_formats_prefix = "currencyFormats-numberSystem-"sv;
  428. constexpr auto percent_formats_prefix = "percentFormats-numberSystem-"sv;
  429. constexpr auto scientific_formats_prefix = "scientificFormats-numberSystem-"sv;
  430. constexpr auto misc_patterns_prefix = "miscPatterns-numberSystem-"sv;
  431. if (key.starts_with(symbols_prefix)) {
  432. auto system = key.substring(symbols_prefix.length());
  433. auto& number_system = ensure_number_system(system);
  434. NumericSymbolList symbols;
  435. value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) {
  436. auto numeric_symbol = numeric_symbol_from_string(symbol);
  437. if (!numeric_symbol.has_value())
  438. return;
  439. if (to_underlying(*numeric_symbol) >= symbols.size())
  440. symbols.resize(to_underlying(*numeric_symbol) + 1);
  441. auto symbol_index = locale_data.unique_strings.ensure(localization.as_string());
  442. symbols[to_underlying(*numeric_symbol)] = symbol_index;
  443. });
  444. // The range separator does not appear in the symbols list, we have to extract it from
  445. // the range pattern.
  446. auto misc_patterns_key = String::formatted("{}{}", misc_patterns_prefix, system);
  447. auto misc_patterns = locale_numbers_object.as_object().get(misc_patterns_key);
  448. auto range_separator = misc_patterns.as_object().get("range"sv).as_string();
  449. auto begin_index = range_separator.find("{0}"sv).value() + "{0}"sv.length();
  450. auto end_index = range_separator.find("{1}"sv).value();
  451. range_separator = range_separator.substring(begin_index, end_index - begin_index);
  452. if (to_underlying(Unicode::NumericSymbol::RangeSeparator) >= symbols.size())
  453. symbols.resize(to_underlying(Unicode::NumericSymbol::RangeSeparator) + 1);
  454. auto symbol_index = locale_data.unique_strings.ensure(move(range_separator));
  455. symbols[to_underlying(Unicode::NumericSymbol::RangeSeparator)] = symbol_index;
  456. number_system.symbols = locale_data.unique_symbols.ensure(move(symbols));
  457. } else if (key.starts_with(decimal_formats_prefix)) {
  458. auto system = key.substring(decimal_formats_prefix.length());
  459. auto& number_system = ensure_number_system(system);
  460. auto format_object = value.as_object().get("standard"sv);
  461. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system);
  462. auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv);
  463. number_system.decimal_long_formats = parse_number_format(long_format.as_object());
  464. auto const& short_format = value.as_object().get("short"sv).as_object().get("decimalFormat"sv);
  465. number_system.decimal_short_formats = parse_number_format(short_format.as_object());
  466. } else if (key.starts_with(currency_formats_prefix)) {
  467. auto system = key.substring(currency_formats_prefix.length());
  468. auto& number_system = ensure_number_system(system);
  469. auto format_object = value.as_object().get("standard"sv);
  470. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.currency_format);
  471. format_object = value.as_object().get("accounting"sv);
  472. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.accounting_format);
  473. number_system.currency_unit_formats = parse_number_format(value.as_object());
  474. if (value.as_object().has("short"sv)) {
  475. auto const& short_format = value.as_object().get("short"sv).as_object().get("standard"sv);
  476. number_system.currency_short_formats = parse_number_format(short_format.as_object());
  477. }
  478. } else if (key.starts_with(percent_formats_prefix)) {
  479. auto system = key.substring(percent_formats_prefix.length());
  480. auto& number_system = ensure_number_system(system);
  481. auto format_object = value.as_object().get("standard"sv);
  482. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.percent_format);
  483. } else if (key.starts_with(scientific_formats_prefix)) {
  484. auto system = key.substring(scientific_formats_prefix.length());
  485. auto& number_system = ensure_number_system(system);
  486. auto format_object = value.as_object().get("standard"sv);
  487. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.scientific_format);
  488. }
  489. });
  490. locale.number_systems.ensure_capacity(number_systems.size());
  491. for (auto& number_system : number_systems) {
  492. NumberSystemIndexType system_index = 0;
  493. if (number_system.has_value())
  494. system_index = locale_data.unique_systems.ensure(number_system.release_value());
  495. locale.number_systems.append(system_index);
  496. }
  497. locale.minimum_grouping_digits = minimum_grouping_digits.as_string().template to_uint<u8>().value();
  498. return {};
  499. }
  500. static ErrorOr<void> parse_units(String locale_units_path, UnicodeLocaleData& locale_data, Locale& locale)
  501. {
  502. LexicalPath units_path(move(locale_units_path));
  503. units_path = units_path.append("units.json"sv);
  504. auto locale_units = TRY(read_json_file(units_path.string()));
  505. auto const& main_object = locale_units.as_object().get("main"sv);
  506. auto const& locale_object = main_object.as_object().get(units_path.parent().basename());
  507. auto const& locale_units_object = locale_object.as_object().get("units"sv);
  508. auto const& long_object = locale_units_object.as_object().get("long"sv);
  509. auto const& short_object = locale_units_object.as_object().get("short"sv);
  510. auto const& narrow_object = locale_units_object.as_object().get("narrow"sv);
  511. HashMap<String, Unit> units;
  512. auto ensure_unit = [&](auto const& unit) -> Unit& {
  513. return units.ensure(unit, [&]() {
  514. auto unit_index = locale_data.unique_strings.ensure(unit);
  515. return Unit { .unit = unit_index };
  516. });
  517. };
  518. auto is_sanctioned_unit = [](StringView unit_name) {
  519. // LibUnicode generally tries to avoid being directly dependent on ECMA-402, but this rather significantly reduces the amount
  520. // of data generated here, and ECMA-402 is currently the only consumer of this data.
  521. constexpr auto sanctioned_units = JS::Intl::sanctioned_single_unit_identifiers();
  522. if (find(sanctioned_units.begin(), sanctioned_units.end(), unit_name) != sanctioned_units.end())
  523. return true;
  524. static constexpr auto extra_sanctioned_units = JS::Intl::extra_sanctioned_single_unit_identifiers();
  525. return find(extra_sanctioned_units.begin(), extra_sanctioned_units.end(), unit_name) != extra_sanctioned_units.end();
  526. };
  527. auto parse_units_object = [&](auto const& units_object, Unicode::Style style) {
  528. constexpr auto unit_pattern_prefix = "unitPattern-count-"sv;
  529. constexpr auto combined_unit_separator = "-per-"sv;
  530. units_object.for_each_member([&](auto const& key, JsonValue const& value) {
  531. auto end_of_category = key.find('-');
  532. if (!end_of_category.has_value())
  533. return;
  534. auto unit_name = key.substring(*end_of_category + 1);
  535. if (!is_sanctioned_unit(unit_name)) {
  536. auto indices = unit_name.find_all(combined_unit_separator);
  537. if (indices.size() != 1)
  538. return;
  539. auto numerator = unit_name.substring_view(0, indices[0]);
  540. auto denominator = unit_name.substring_view(indices[0] + combined_unit_separator.length());
  541. if (!is_sanctioned_unit(numerator) || !is_sanctioned_unit(denominator))
  542. return;
  543. }
  544. auto& unit = ensure_unit(unit_name);
  545. NumberFormatList formats;
  546. value.as_object().for_each_member([&](auto const& unit_key, JsonValue const& pattern_value) {
  547. if (!unit_key.starts_with(unit_pattern_prefix))
  548. return;
  549. NumberFormat format {};
  550. auto plurality = unit_key.substring_view(unit_pattern_prefix.length());
  551. format.plurality = Unicode::plural_category_from_string(plurality);
  552. auto zero_format = pattern_value.as_string().replace("{0}"sv, "{number}"sv, ReplaceMode::FirstOnly);
  553. zero_format = parse_identifiers(zero_format, "unitIdentifier"sv, locale_data, format);
  554. format.positive_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{plusSign}{number}"sv, ReplaceMode::FirstOnly));
  555. format.negative_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{minusSign}{number}"sv, ReplaceMode::FirstOnly));
  556. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  557. formats.append(locale_data.unique_formats.ensure(move(format)));
  558. });
  559. auto number_format_list_index = locale_data.unique_format_lists.ensure(move(formats));
  560. switch (style) {
  561. case Unicode::Style::Long:
  562. unit.long_formats = number_format_list_index;
  563. break;
  564. case Unicode::Style::Short:
  565. unit.short_formats = number_format_list_index;
  566. break;
  567. case Unicode::Style::Narrow:
  568. unit.narrow_formats = number_format_list_index;
  569. break;
  570. default:
  571. VERIFY_NOT_REACHED();
  572. }
  573. });
  574. };
  575. parse_units_object(long_object.as_object(), Unicode::Style::Long);
  576. parse_units_object(short_object.as_object(), Unicode::Style::Short);
  577. parse_units_object(narrow_object.as_object(), Unicode::Style::Narrow);
  578. for (auto& unit : units) {
  579. auto unit_index = locale_data.unique_units.ensure(move(unit.value));
  580. locale.units.set(unit.key, unit_index);
  581. }
  582. return {};
  583. }
  584. static ErrorOr<void> parse_all_locales(String core_path, String numbers_path, String units_path, UnicodeLocaleData& locale_data)
  585. {
  586. auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
  587. auto units_iterator = TRY(path_to_dir_iterator(move(units_path)));
  588. LexicalPath core_supplemental_path(move(core_path));
  589. core_supplemental_path = core_supplemental_path.append("supplemental"sv);
  590. VERIFY(Core::File::is_directory(core_supplemental_path.string()));
  591. TRY(parse_number_system_digits(core_supplemental_path.string(), locale_data));
  592. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  593. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
  594. StringBuilder builder;
  595. builder.append(locale_data.unique_strings.get(parsed_locale.language));
  596. if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
  597. builder.appendff("-{}", script);
  598. if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
  599. builder.appendff("-{}", region);
  600. return builder.build();
  601. };
  602. while (numbers_iterator.has_next()) {
  603. auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
  604. auto language = TRY(remove_variants_from_path(numbers_path));
  605. auto& locale = locale_data.locales.ensure(language);
  606. TRY(parse_number_systems(numbers_path, locale_data, locale));
  607. }
  608. while (units_iterator.has_next()) {
  609. auto units_path = TRY(next_path_from_dir_iterator(units_iterator));
  610. auto language = TRY(remove_variants_from_path(units_path));
  611. auto& locale = locale_data.locales.ensure(language);
  612. TRY(parse_units(units_path, locale_data, locale));
  613. }
  614. return {};
  615. }
  616. static String format_identifier(StringView, String identifier)
  617. {
  618. return identifier.to_titlecase();
  619. }
  620. static ErrorOr<void> generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data)
  621. {
  622. StringBuilder builder;
  623. SourceGenerator generator { builder };
  624. generator.append(R"~~~(
  625. #include <AK/Types.h>
  626. #pragma once
  627. namespace Unicode {
  628. )~~~");
  629. generate_enum(generator, format_identifier, "NumberSystem"sv, {}, locale_data.number_systems);
  630. generator.append(R"~~~(
  631. }
  632. )~~~");
  633. TRY(file.write(generator.as_string_view().bytes()));
  634. return {};
  635. }
  636. static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data)
  637. {
  638. StringBuilder builder;
  639. SourceGenerator generator { builder };
  640. generator.set("string_index_type"sv, s_string_index_type);
  641. generator.set("number_format_index_type"sv, s_number_format_index_type);
  642. generator.set("number_format_list_index_type"sv, s_number_format_list_index_type);
  643. generator.set("numeric_symbol_list_index_type"sv, s_numeric_symbol_list_index_type);
  644. generator.set("identifier_count", String::number(locale_data.max_identifier_count));
  645. generator.append(R"~~~(
  646. #include <AK/Array.h>
  647. #include <AK/BinarySearch.h>
  648. #include <AK/Optional.h>
  649. #include <AK/Span.h>
  650. #include <AK/StringView.h>
  651. #include <AK/Vector.h>
  652. #include <LibUnicode/Locale.h>
  653. #include <LibUnicode/NumberFormat.h>
  654. #include <LibUnicode/PluralRules.h>
  655. #include <LibUnicode/UnicodeLocale.h>
  656. #include <LibUnicode/UnicodeNumberFormat.h>
  657. namespace Unicode {
  658. )~~~");
  659. locale_data.unique_strings.generate(generator);
  660. generator.append(R"~~~(
  661. struct NumberFormatImpl {
  662. NumberFormat to_unicode_number_format() const {
  663. NumberFormat number_format {};
  664. number_format.magnitude = magnitude;
  665. number_format.exponent = exponent;
  666. number_format.plurality = static_cast<PluralCategory>(plurality);
  667. number_format.zero_format = s_string_list[zero_format];
  668. number_format.positive_format = s_string_list[positive_format];
  669. number_format.negative_format = s_string_list[negative_format];
  670. number_format.identifiers.ensure_capacity(identifiers.size());
  671. for (@string_index_type@ identifier : identifiers)
  672. number_format.identifiers.append(s_string_list[identifier]);
  673. return number_format;
  674. }
  675. u8 magnitude { 0 };
  676. u8 exponent { 0 };
  677. u8 plurality { 0 };
  678. @string_index_type@ zero_format { 0 };
  679. @string_index_type@ positive_format { 0 };
  680. @string_index_type@ negative_format { 0 };
  681. Array<@string_index_type@, @identifier_count@> identifiers {};
  682. };
  683. struct NumberSystemData {
  684. @numeric_symbol_list_index_type@ symbols { 0 };
  685. u8 primary_grouping_size { 0 };
  686. u8 secondary_grouping_size { 0 };
  687. @number_format_index_type@ decimal_format { 0 };
  688. @number_format_list_index_type@ decimal_long_formats { 0 };
  689. @number_format_list_index_type@ decimal_short_formats { 0 };
  690. @number_format_index_type@ currency_format { 0 };
  691. @number_format_index_type@ accounting_format { 0 };
  692. @number_format_list_index_type@ currency_unit_formats { 0 };
  693. @number_format_list_index_type@ currency_short_formats { 0 };
  694. @number_format_index_type@ percent_format { 0 };
  695. @number_format_index_type@ scientific_format { 0 };
  696. };
  697. struct Unit {
  698. @string_index_type@ unit { 0 };
  699. @number_format_list_index_type@ long_formats { 0 };
  700. @number_format_list_index_type@ short_formats { 0 };
  701. @number_format_list_index_type@ narrow_formats { 0 };
  702. };
  703. )~~~");
  704. locale_data.unique_formats.generate(generator, "NumberFormatImpl"sv, "s_number_formats"sv, 10);
  705. locale_data.unique_format_lists.generate(generator, s_number_format_index_type, "s_number_format_lists"sv);
  706. locale_data.unique_symbols.generate(generator, s_string_index_type, "s_numeric_symbol_lists"sv);
  707. locale_data.unique_systems.generate(generator, "NumberSystemData"sv, "s_number_systems"sv, 10);
  708. locale_data.unique_units.generate(generator, "Unit"sv, "s_units"sv, 10);
  709. auto locales = locale_data.locales.keys();
  710. quick_sort(locales);
  711. generator.set("size", String::number(locales.size()));
  712. generator.append(R"~~~(
  713. static constexpr Array<u8, @size@> s_minimum_grouping_digits { { )~~~");
  714. bool first = true;
  715. for (auto const& locale : locales) {
  716. generator.append(first ? " "sv : ", "sv);
  717. generator.append(String::number(locale_data.locales.find(locale)->value.minimum_grouping_digits));
  718. first = false;
  719. }
  720. generator.append(" } };\n");
  721. auto append_map = [&](String name, auto type, auto const& map) {
  722. generator.set("name", name);
  723. generator.set("type", type);
  724. generator.set("size", String::number(map.size()));
  725. generator.append(R"~~~(
  726. static constexpr Array<@type@, @size@> @name@ { {)~~~");
  727. bool first = true;
  728. for (auto const& item : map) {
  729. generator.append(first ? " "sv : ", "sv);
  730. if constexpr (requires { item.value; })
  731. generator.append(String::number(item.value));
  732. else
  733. generator.append(String::number(item));
  734. first = false;
  735. }
  736. generator.append(" } };");
  737. };
  738. generate_mapping(generator, locale_data.number_system_digits, "u32"sv, "s_number_systems_digits"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_map(name, "u32"sv, value); });
  739. generate_mapping(generator, locale_data.locales, s_number_system_index_type, "s_locale_number_systems"sv, "s_number_systems_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_map(name, s_number_system_index_type, value.number_systems); });
  740. generate_mapping(generator, locale_data.locales, s_unit_index_type, "s_locale_units"sv, "s_units_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_map(name, s_unit_index_type, value.units); });
  741. generator.append(R"~~~(
  742. static Optional<NumberSystem> keyword_to_number_system(KeywordNumbers keyword)
  743. {
  744. switch (keyword) {)~~~");
  745. for (auto const& number_system : locale_data.number_systems) {
  746. generator.set("name"sv, format_identifier({}, number_system));
  747. generator.append(R"~~~(
  748. case KeywordNumbers::@name@:
  749. return NumberSystem::@name@;)~~~");
  750. }
  751. generator.append(R"~~~(
  752. default:
  753. return {};
  754. }
  755. }
  756. Optional<Span<u32 const>> get_digits_for_number_system(StringView system)
  757. {
  758. auto number_system_keyword = keyword_nu_from_string(system);
  759. if (!number_system_keyword.has_value())
  760. return {};
  761. auto number_system_value = keyword_to_number_system(*number_system_keyword);
  762. if (!number_system_value.has_value())
  763. return {};
  764. auto number_system_index = to_underlying(*number_system_value);
  765. return s_number_systems_digits[number_system_index];
  766. }
  767. static NumberSystemData const* find_number_system(StringView locale, StringView system)
  768. {
  769. auto locale_value = locale_from_string(locale);
  770. if (!locale_value.has_value())
  771. return nullptr;
  772. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  773. auto const& number_systems = s_locale_number_systems.at(locale_index);
  774. auto lookup_number_system = [&](auto number_system) -> NumberSystemData const* {
  775. auto number_system_keyword = keyword_nu_from_string(number_system);
  776. if (!number_system_keyword.has_value())
  777. return nullptr;
  778. auto number_system_value = keyword_to_number_system(*number_system_keyword);
  779. if (!number_system_value.has_value())
  780. return nullptr;
  781. auto number_system_index = to_underlying(*number_system_value);
  782. number_system_index = number_systems.at(number_system_index);
  783. if (number_system_index == 0)
  784. return nullptr;
  785. return &s_number_systems.at(number_system_index);
  786. };
  787. if (auto const* number_system = lookup_number_system(system))
  788. return number_system;
  789. auto default_number_system = get_preferred_keyword_value_for_locale(locale, "nu"sv);
  790. if (!default_number_system.has_value())
  791. return nullptr;
  792. return lookup_number_system(*default_number_system);
  793. }
  794. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, NumericSymbol symbol)
  795. {
  796. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  797. auto symbols = s_numeric_symbol_lists.at(number_system->symbols);
  798. auto symbol_index = to_underlying(symbol);
  799. if (symbol_index >= symbols.size())
  800. return {};
  801. return s_string_list[symbols[symbol_index]];
  802. }
  803. return {};
  804. }
  805. Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system)
  806. {
  807. auto locale_value = locale_from_string(locale);
  808. if (!locale_value.has_value())
  809. return {};
  810. u8 minimum_grouping_digits = s_minimum_grouping_digits[to_underlying(*locale_value) - 1];
  811. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr)
  812. return NumberGroupings { minimum_grouping_digits, number_system->primary_grouping_size, number_system->secondary_grouping_size };
  813. return {};
  814. }
  815. Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type)
  816. {
  817. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  818. @number_format_index_type@ format_index = 0;
  819. switch (type) {
  820. case StandardNumberFormatType::Decimal:
  821. format_index = number_system->decimal_format;
  822. break;
  823. case StandardNumberFormatType::Currency:
  824. format_index = number_system->currency_format;
  825. break;
  826. case StandardNumberFormatType::Accounting:
  827. format_index = number_system->accounting_format;
  828. break;
  829. case StandardNumberFormatType::Percent:
  830. format_index = number_system->percent_format;
  831. break;
  832. case StandardNumberFormatType::Scientific:
  833. format_index = number_system->scientific_format;
  834. break;
  835. }
  836. return s_number_formats[format_index].to_unicode_number_format();
  837. }
  838. return {};
  839. }
  840. Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type)
  841. {
  842. Vector<NumberFormat> formats;
  843. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  844. @number_format_list_index_type@ number_format_list_index { 0 };
  845. switch (type) {
  846. case CompactNumberFormatType::DecimalLong:
  847. number_format_list_index = number_system->decimal_long_formats;
  848. break;
  849. case CompactNumberFormatType::DecimalShort:
  850. number_format_list_index = number_system->decimal_short_formats;
  851. break;
  852. case CompactNumberFormatType::CurrencyUnit:
  853. number_format_list_index = number_system->currency_unit_formats;
  854. break;
  855. case CompactNumberFormatType::CurrencyShort:
  856. number_format_list_index = number_system->currency_short_formats;
  857. break;
  858. }
  859. auto number_formats = s_number_format_lists.at(number_format_list_index);
  860. formats.ensure_capacity(number_formats.size());
  861. for (auto number_format : number_formats)
  862. formats.append(s_number_formats[number_format].to_unicode_number_format());
  863. }
  864. return formats;
  865. }
  866. static Unit const* find_units(StringView locale, StringView unit)
  867. {
  868. auto locale_value = locale_from_string(locale);
  869. if (!locale_value.has_value())
  870. return nullptr;
  871. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  872. auto const& locale_units = s_locale_units.at(locale_index);
  873. for (auto unit_index : locale_units) {
  874. auto const& units = s_units.at(unit_index);
  875. if (unit == s_string_list[units.unit])
  876. return &units;
  877. };
  878. return nullptr;
  879. }
  880. Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style)
  881. {
  882. Vector<NumberFormat> formats;
  883. if (auto const* units = find_units(locale, unit); units != nullptr) {
  884. @number_format_list_index_type@ number_format_list_index { 0 };
  885. switch (style) {
  886. case Style::Long:
  887. number_format_list_index = units->long_formats;
  888. break;
  889. case Style::Short:
  890. number_format_list_index = units->short_formats;
  891. break;
  892. case Style::Narrow:
  893. number_format_list_index = units->narrow_formats;
  894. break;
  895. default:
  896. VERIFY_NOT_REACHED();
  897. }
  898. auto number_formats = s_number_format_lists.at(number_format_list_index);
  899. formats.ensure_capacity(number_formats.size());
  900. for (auto number_format : number_formats)
  901. formats.append(s_number_formats[number_format].to_unicode_number_format());
  902. }
  903. return formats;
  904. }
  905. }
  906. )~~~");
  907. TRY(file.write(generator.as_string_view().bytes()));
  908. return {};
  909. }
  910. ErrorOr<int> serenity_main(Main::Arguments arguments)
  911. {
  912. StringView generated_header_path;
  913. StringView generated_implementation_path;
  914. StringView core_path;
  915. StringView numbers_path;
  916. StringView units_path;
  917. Core::ArgsParser args_parser;
  918. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  919. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  920. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  921. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  922. args_parser.add_option(units_path, "Path to cldr-units directory", "units-path", 'u', "units-path");
  923. args_parser.parse(arguments);
  924. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
  925. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
  926. UnicodeLocaleData locale_data;
  927. TRY(parse_all_locales(core_path, numbers_path, units_path, locale_data));
  928. TRY(generate_unicode_locale_header(*generated_header_file, locale_data));
  929. TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data));
  930. return 0;
  931. }