GenerateUnicodeNumberFormat.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AllOf.h>
  8. #include <AK/Array.h>
  9. #include <AK/CharacterTypes.h>
  10. #include <AK/Find.h>
  11. #include <AK/Format.h>
  12. #include <AK/HashMap.h>
  13. #include <AK/JsonObject.h>
  14. #include <AK/JsonParser.h>
  15. #include <AK/JsonValue.h>
  16. #include <AK/LexicalPath.h>
  17. #include <AK/QuickSort.h>
  18. #include <AK/SourceGenerator.h>
  19. #include <AK/String.h>
  20. #include <AK/StringBuilder.h>
  21. #include <AK/Utf8View.h>
  22. #include <LibCore/ArgsParser.h>
  23. #include <LibCore/DirIterator.h>
  24. #include <LibCore/File.h>
  25. #include <LibUnicode/Locale.h>
  26. #include <math.h>
  27. using StringIndexType = u16;
  28. constexpr auto s_string_index_type = "u16"sv;
  29. enum class NumberFormatType {
  30. Standard,
  31. Compact,
  32. };
  33. struct NumberFormat : public Unicode::NumberFormat {
  34. using Base = Unicode::NumberFormat;
  35. static Base::Plurality plurality_from_string(StringView plurality)
  36. {
  37. if (plurality == "other"sv)
  38. return Base::Plurality::Other;
  39. if (plurality == "1"sv)
  40. return Base::Plurality::Single;
  41. if (plurality == "zero"sv)
  42. return Base::Plurality::Zero;
  43. if (plurality == "one"sv)
  44. return Base::Plurality::One;
  45. if (plurality == "two"sv)
  46. return Base::Plurality::Two;
  47. if (plurality == "few"sv)
  48. return Base::Plurality::Few;
  49. if (plurality == "many"sv)
  50. return Base::Plurality::Many;
  51. VERIFY_NOT_REACHED();
  52. }
  53. StringIndexType zero_format_index { 0 };
  54. StringIndexType positive_format_index { 0 };
  55. StringIndexType negative_format_index { 0 };
  56. Vector<StringIndexType> identifier_indices {};
  57. };
  58. struct NumberSystem {
  59. StringIndexType system { 0 };
  60. HashMap<String, StringIndexType> symbols {};
  61. u8 primary_grouping_size { 0 };
  62. u8 secondary_grouping_size { 0 };
  63. NumberFormat decimal_format {};
  64. Vector<NumberFormat> decimal_long_formats {};
  65. Vector<NumberFormat> decimal_short_formats {};
  66. NumberFormat currency_format {};
  67. NumberFormat accounting_format {};
  68. Vector<NumberFormat> currency_unit_formats {};
  69. Vector<NumberFormat> currency_short_formats {};
  70. NumberFormat percent_format {};
  71. NumberFormat scientific_format {};
  72. };
  73. struct Unit {
  74. StringIndexType unit { 0 };
  75. Vector<NumberFormat> long_formats {};
  76. Vector<NumberFormat> short_formats {};
  77. Vector<NumberFormat> narrow_formats {};
  78. };
  79. struct Locale {
  80. HashMap<String, NumberSystem> number_systems;
  81. HashMap<String, Unit> units {};
  82. };
  83. struct UnicodeLocaleData {
  84. UniqueStringStorage<StringIndexType> unique_strings;
  85. HashMap<String, Locale> locales;
  86. Vector<String> numeric_symbols;
  87. size_t max_identifier_count { 0 };
  88. };
  89. static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
  90. {
  91. static Utf8View whitespace { "\u0020\u00a0\u200f"sv };
  92. while (true) {
  93. Utf8View utf8_pattern { pattern };
  94. Optional<size_t> start_index;
  95. Optional<size_t> end_index;
  96. bool inside_replacement = false;
  97. for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
  98. if (*it == '{') {
  99. if (start_index.has_value()) {
  100. end_index = utf8_pattern.byte_offset_of(it);
  101. break;
  102. }
  103. inside_replacement = true;
  104. } else if (*it == '}') {
  105. inside_replacement = false;
  106. } else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
  107. start_index = utf8_pattern.byte_offset_of(it);
  108. }
  109. }
  110. if (!start_index.has_value())
  111. return pattern;
  112. end_index = end_index.value_or(pattern.length());
  113. utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
  114. utf8_pattern = utf8_pattern.trim(whitespace);
  115. auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
  116. auto identifier_index = locale_data.unique_strings.ensure(move(identifier));
  117. size_t replacement_index = 0;
  118. if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) {
  119. replacement_index = *index;
  120. } else {
  121. replacement_index = format.identifier_indices.size();
  122. format.identifier_indices.append(identifier_index);
  123. locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size());
  124. }
  125. pattern = String::formatted("{}{{{}:{}}}{}",
  126. *start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
  127. replacement,
  128. replacement_index,
  129. pattern.substring_view(*start_index + utf8_pattern.byte_length()));
  130. }
  131. }
  132. static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
  133. {
  134. // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns
  135. // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns
  136. VERIFY((patterns.size() == 1) || (patterns.size() == 2));
  137. auto replace_patterns = [&](String pattern) {
  138. static HashMap<StringView, StringView> replacements = {
  139. { "{0}"sv, "{number}"sv },
  140. { "{1}"sv, "{currency}"sv },
  141. { "%"sv, "{percentSign}"sv },
  142. { "+"sv, "{plusSign}"sv },
  143. { "-"sv, "{minusSign}"sv },
  144. { "¤"sv, "{currency}"sv }, // U+00A4 Currency Sign
  145. { "E"sv, "{scientificSeparator}"sv },
  146. };
  147. for (auto const& replacement : replacements)
  148. pattern = pattern.replace(replacement.key, replacement.value, true);
  149. if (auto start_number_index = pattern.find_any_of("#0"sv, String::SearchDirection::Forward); start_number_index.has_value()) {
  150. auto end_number_index = *start_number_index + 1;
  151. for (; end_number_index < pattern.length(); ++end_number_index) {
  152. auto ch = pattern[end_number_index];
  153. if ((ch != '#') && (ch != '0') && (ch != ',') && (ch != '.'))
  154. break;
  155. }
  156. if (number_system_for_groupings) {
  157. auto number_pattern = pattern.substring_view(*start_number_index, end_number_index - *start_number_index);
  158. auto group_separators = number_pattern.find_all(","sv);
  159. VERIFY((group_separators.size() == 1) || (group_separators.size() == 2));
  160. auto decimal = number_pattern.find('.');
  161. VERIFY(decimal.has_value());
  162. if (group_separators.size() == 1) {
  163. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[0] - 1;
  164. number_system_for_groupings->secondary_grouping_size = number_system_for_groupings->primary_grouping_size;
  165. } else {
  166. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[1] - 1;
  167. number_system_for_groupings->secondary_grouping_size = group_separators[1] - group_separators[0] - 1;
  168. }
  169. }
  170. pattern = String::formatted("{}{{number}}{}",
  171. *start_number_index > 0 ? pattern.substring_view(0, *start_number_index) : ""sv,
  172. pattern.substring_view(end_number_index));
  173. // This is specifically handled here rather than in the replacements HashMap above so
  174. // that we do not errantly replace zeroes in number patterns.
  175. if (pattern.contains(*replacements.get("E"sv)))
  176. pattern = pattern.replace("0"sv, "{scientificExponent}"sv);
  177. }
  178. if (type == NumberFormatType::Compact)
  179. return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format);
  180. return pattern;
  181. };
  182. auto zero_format = replace_patterns(move(patterns[0]));
  183. format.positive_format_index = locale_data.unique_strings.ensure(String::formatted("{{plusSign}}{}", zero_format));
  184. if (patterns.size() == 2) {
  185. auto negative_format = replace_patterns(move(patterns[1]));
  186. format.negative_format_index = locale_data.unique_strings.ensure(move(negative_format));
  187. } else {
  188. format.negative_format_index = locale_data.unique_strings.ensure(String::formatted("{{minusSign}}{}", zero_format));
  189. }
  190. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  191. }
  192. static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  193. {
  194. LexicalPath numbers_path(move(locale_numbers_path));
  195. numbers_path = numbers_path.append("numbers.json"sv);
  196. VERIFY(Core::File::exists(numbers_path.string()));
  197. auto numbers_file_or_error = Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly);
  198. VERIFY(!numbers_file_or_error.is_error());
  199. auto numbers = JsonValue::from_string(numbers_file_or_error.value()->read_all()).release_value_but_fixme_should_propagate_errors();
  200. auto const& main_object = numbers.as_object().get("main"sv);
  201. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  202. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  203. auto ensure_number_system = [&](auto const& system) -> NumberSystem& {
  204. return locale.number_systems.ensure(system, [&]() {
  205. auto system_index = locale_data.unique_strings.ensure(system);
  206. return NumberSystem { .system = system_index };
  207. });
  208. };
  209. auto parse_number_format = [&](auto const& format_object) {
  210. Vector<NumberFormat> result;
  211. result.ensure_capacity(format_object.size());
  212. format_object.for_each_member([&](auto const& key, JsonValue const& value) {
  213. auto split_key = key.split_view('-');
  214. if (split_key.size() != 3)
  215. return;
  216. auto patterns = value.as_string().split(';');
  217. NumberFormat format {};
  218. if (auto type = split_key[0].template to_uint<u64>(); type.has_value()) {
  219. VERIFY(*type % 10 == 0);
  220. format.magnitude = static_cast<u8>(log10(*type));
  221. if (patterns[0] != "0"sv) {
  222. auto number_of_zeroes_in_pattern = patterns[0].count("0"sv);
  223. VERIFY(format.magnitude >= number_of_zeroes_in_pattern);
  224. format.exponent = format.magnitude + 1 - number_of_zeroes_in_pattern;
  225. }
  226. } else {
  227. VERIFY(split_key[0] == "unitPattern"sv);
  228. }
  229. format.plurality = NumberFormat::plurality_from_string(split_key[2]);
  230. parse_number_pattern(move(patterns), locale_data, NumberFormatType::Compact, format);
  231. result.append(move(format));
  232. });
  233. return result;
  234. };
  235. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  236. constexpr auto symbols_prefix = "symbols-numberSystem-"sv;
  237. constexpr auto decimal_formats_prefix = "decimalFormats-numberSystem-"sv;
  238. constexpr auto currency_formats_prefix = "currencyFormats-numberSystem-"sv;
  239. constexpr auto percent_formats_prefix = "percentFormats-numberSystem-"sv;
  240. constexpr auto scientific_formats_prefix = "scientificFormats-numberSystem-"sv;
  241. if (key.starts_with(symbols_prefix)) {
  242. auto system = key.substring(symbols_prefix.length());
  243. auto& number_system = ensure_number_system(system);
  244. value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) {
  245. auto symbol_index = locale_data.unique_strings.ensure(localization.as_string());
  246. number_system.symbols.set(symbol, symbol_index);
  247. if (!locale_data.numeric_symbols.contains_slow(symbol))
  248. locale_data.numeric_symbols.append(symbol);
  249. });
  250. } else if (key.starts_with(decimal_formats_prefix)) {
  251. auto system = key.substring(decimal_formats_prefix.length());
  252. auto& number_system = ensure_number_system(system);
  253. auto format_object = value.as_object().get("standard"sv);
  254. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system);
  255. auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv);
  256. number_system.decimal_long_formats = parse_number_format(long_format.as_object());
  257. auto const& short_format = value.as_object().get("short"sv).as_object().get("decimalFormat"sv);
  258. number_system.decimal_short_formats = parse_number_format(short_format.as_object());
  259. } else if (key.starts_with(currency_formats_prefix)) {
  260. auto system = key.substring(currency_formats_prefix.length());
  261. auto& number_system = ensure_number_system(system);
  262. auto format_object = value.as_object().get("standard"sv);
  263. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.currency_format);
  264. format_object = value.as_object().get("accounting"sv);
  265. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.accounting_format);
  266. number_system.currency_unit_formats = parse_number_format(value.as_object());
  267. if (value.as_object().has("short"sv)) {
  268. auto const& short_format = value.as_object().get("short"sv).as_object().get("standard"sv);
  269. number_system.currency_short_formats = parse_number_format(short_format.as_object());
  270. }
  271. } else if (key.starts_with(percent_formats_prefix)) {
  272. auto system = key.substring(percent_formats_prefix.length());
  273. auto& number_system = ensure_number_system(system);
  274. auto format_object = value.as_object().get("standard"sv);
  275. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.percent_format);
  276. } else if (key.starts_with(scientific_formats_prefix)) {
  277. auto system = key.substring(scientific_formats_prefix.length());
  278. auto& number_system = ensure_number_system(system);
  279. auto format_object = value.as_object().get("standard"sv);
  280. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.scientific_format);
  281. }
  282. });
  283. }
  284. static void parse_units(String locale_units_path, UnicodeLocaleData& locale_data, Locale& locale)
  285. {
  286. LexicalPath units_path(move(locale_units_path));
  287. units_path = units_path.append("units.json"sv);
  288. VERIFY(Core::File::exists(units_path.string()));
  289. auto units_file_or_error = Core::File::open(units_path.string(), Core::OpenMode::ReadOnly);
  290. VERIFY(!units_file_or_error.is_error());
  291. auto units = JsonValue::from_string(units_file_or_error.value()->read_all()).release_value_but_fixme_should_propagate_errors();
  292. auto const& main_object = units.as_object().get("main"sv);
  293. auto const& locale_object = main_object.as_object().get(units_path.parent().basename());
  294. auto const& locale_units_object = locale_object.as_object().get("units"sv);
  295. auto const& long_object = locale_units_object.as_object().get("long"sv);
  296. auto const& short_object = locale_units_object.as_object().get("short"sv);
  297. auto const& narrow_object = locale_units_object.as_object().get("narrow"sv);
  298. auto ensure_unit = [&](auto const& unit) -> Unit& {
  299. return locale.units.ensure(unit, [&]() {
  300. auto unit_index = locale_data.unique_strings.ensure(unit);
  301. return Unit { .unit = unit_index };
  302. });
  303. };
  304. auto is_sanctioned_unit = [](StringView unit_name) {
  305. // This is a copy of the units sanctioned for use within ECMA-402. LibUnicode generally tries to
  306. // avoid being directly dependent on ECMA-402, but this rather significantly reduces the amount
  307. // of data generated here, and ECMA-402 is currently the only consumer of this data.
  308. // https://tc39.es/ecma402/#table-sanctioned-simple-unit-identifiers
  309. constexpr auto sanctioned_units = AK::Array { "acre"sv, "bit"sv, "byte"sv, "celsius"sv, "centimeter"sv, "day"sv, "degree"sv, "fahrenheit"sv, "fluid-ounce"sv, "foot"sv, "gallon"sv, "gigabit"sv, "gigabyte"sv, "gram"sv, "hectare"sv, "hour"sv, "inch"sv, "kilobit"sv, "kilobyte"sv, "kilogram"sv, "kilometer"sv, "liter"sv, "megabit"sv, "megabyte"sv, "meter"sv, "mile"sv, "mile-scandinavian"sv, "milliliter"sv, "millimeter"sv, "millisecond"sv, "minute"sv, "month"sv, "ounce"sv, "percent"sv, "petabyte"sv, "pound"sv, "second"sv, "stone"sv, "terabit"sv, "terabyte"sv, "week"sv, "yard"sv, "year"sv };
  310. return find(sanctioned_units.begin(), sanctioned_units.end(), unit_name) != sanctioned_units.end();
  311. };
  312. auto parse_units_object = [&](auto const& units_object, Unicode::Style style) {
  313. constexpr auto unit_pattern_prefix = "unitPattern-count-"sv;
  314. constexpr auto combined_unit_separator = "-per-"sv;
  315. units_object.for_each_member([&](auto const& key, JsonValue const& value) {
  316. auto end_of_category = key.find('-');
  317. if (!end_of_category.has_value())
  318. return;
  319. auto unit_name = key.substring(*end_of_category + 1);
  320. if (!is_sanctioned_unit(unit_name)) {
  321. auto indices = unit_name.find_all(combined_unit_separator);
  322. if (indices.size() != 1)
  323. return;
  324. auto numerator = unit_name.substring_view(0, indices[0]);
  325. auto denominator = unit_name.substring_view(indices[0] + combined_unit_separator.length());
  326. if (!is_sanctioned_unit(numerator) || !is_sanctioned_unit(denominator))
  327. return;
  328. }
  329. value.as_object().for_each_member([&](auto const& unit_key, JsonValue const& pattern_value) {
  330. if (!unit_key.starts_with(unit_pattern_prefix))
  331. return;
  332. auto& unit = ensure_unit(unit_name);
  333. NumberFormat format {};
  334. auto plurality = unit_key.substring_view(unit_pattern_prefix.length());
  335. format.plurality = NumberFormat::plurality_from_string(plurality);
  336. auto zero_format = pattern_value.as_string().replace("{0}"sv, "{number}"sv);
  337. zero_format = parse_identifiers(zero_format, "unitIdentifier"sv, locale_data, format);
  338. format.positive_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{plusSign}{number}"sv));
  339. format.negative_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{minusSign}{number}"sv));
  340. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  341. switch (style) {
  342. case Unicode::Style::Long:
  343. unit.long_formats.append(move(format));
  344. break;
  345. case Unicode::Style::Short:
  346. unit.short_formats.append(move(format));
  347. break;
  348. case Unicode::Style::Narrow:
  349. unit.narrow_formats.append(move(format));
  350. break;
  351. default:
  352. VERIFY_NOT_REACHED();
  353. }
  354. });
  355. });
  356. };
  357. parse_units_object(long_object.as_object(), Unicode::Style::Long);
  358. parse_units_object(short_object.as_object(), Unicode::Style::Short);
  359. parse_units_object(narrow_object.as_object(), Unicode::Style::Narrow);
  360. }
  361. static void parse_all_locales(String core_path, String numbers_path, String units_path, UnicodeLocaleData& locale_data)
  362. {
  363. auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
  364. auto units_iterator = path_to_dir_iterator(move(units_path));
  365. auto remove_variants_from_path = [&](String path) -> Optional<String> {
  366. auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path));
  367. if (!parsed_locale.has_value())
  368. return {};
  369. StringBuilder builder;
  370. builder.append(locale_data.unique_strings.get(parsed_locale->language));
  371. if (auto script = locale_data.unique_strings.get(parsed_locale->script); !script.is_empty())
  372. builder.appendff("-{}", script);
  373. if (auto region = locale_data.unique_strings.get(parsed_locale->region); !region.is_empty())
  374. builder.appendff("-{}", region);
  375. return builder.build();
  376. };
  377. while (numbers_iterator.has_next()) {
  378. auto numbers_path = numbers_iterator.next_full_path();
  379. VERIFY(Core::File::is_directory(numbers_path));
  380. auto language = remove_variants_from_path(numbers_path);
  381. if (!language.has_value())
  382. continue;
  383. auto& locale = locale_data.locales.ensure(*language);
  384. parse_number_systems(numbers_path, locale_data, locale);
  385. }
  386. while (units_iterator.has_next()) {
  387. auto units_path = units_iterator.next_full_path();
  388. VERIFY(Core::File::is_directory(units_path));
  389. auto language = remove_variants_from_path(units_path);
  390. if (!language.has_value())
  391. continue;
  392. auto& locale = locale_data.locales.ensure(*language);
  393. parse_units(units_path, locale_data, locale);
  394. }
  395. }
  396. static String format_identifier(StringView owner, String identifier)
  397. {
  398. identifier = identifier.replace("-"sv, "_"sv, true);
  399. if (all_of(identifier, is_ascii_digit))
  400. return String::formatted("{}_{}", owner[0], identifier);
  401. if (is_ascii_lower_alpha(identifier[0]))
  402. return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
  403. return identifier;
  404. }
  405. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  406. {
  407. StringBuilder builder;
  408. SourceGenerator generator { builder };
  409. generator.append(R"~~~(
  410. #pragma once
  411. #include <AK/Optional.h>
  412. #include <AK/StringView.h>
  413. #include <AK/Types.h>
  414. #include <AK/Vector.h>
  415. #include <LibUnicode/Forward.h>
  416. namespace Unicode {
  417. )~~~");
  418. generate_enum(generator, format_identifier, "NumericSymbol"sv, {}, locale_data.numeric_symbols);
  419. generator.append(R"~~~(
  420. namespace Detail {
  421. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView numeric_symbol);
  422. Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
  423. Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
  424. Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
  425. Vector<Unicode::NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
  426. Optional<NumericSymbol> numeric_symbol_from_string(StringView numeric_symbol);
  427. }
  428. }
  429. )~~~");
  430. file.write(generator.as_string_view());
  431. }
  432. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  433. {
  434. StringBuilder builder;
  435. SourceGenerator generator { builder };
  436. generator.set("string_index_type"sv, s_string_index_type);
  437. generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size()));
  438. generator.set("identifier_count", String::number(locale_data.max_identifier_count));
  439. generator.append(R"~~~(
  440. #include <AK/Array.h>
  441. #include <AK/BinarySearch.h>
  442. #include <AK/Span.h>
  443. #include <LibUnicode/Locale.h>
  444. #include <LibUnicode/UnicodeNumberFormat.h>
  445. namespace Unicode::Detail {
  446. )~~~");
  447. locale_data.unique_strings.generate(generator);
  448. generator.append(R"~~~(
  449. struct NumberFormat {
  450. Unicode::NumberFormat to_unicode_number_format() const {
  451. Unicode::NumberFormat number_format {};
  452. number_format.magnitude = magnitude;
  453. number_format.exponent = exponent;
  454. number_format.plurality = static_cast<Unicode::NumberFormat::Plurality>(plurality);
  455. number_format.zero_format = s_string_list[zero_format];
  456. number_format.positive_format = s_string_list[positive_format];
  457. number_format.negative_format = s_string_list[negative_format];
  458. number_format.identifiers.ensure_capacity(identifiers.size());
  459. for (@string_index_type@ identifier : identifiers)
  460. number_format.identifiers.append(s_string_list[identifier]);
  461. return number_format;
  462. }
  463. u8 magnitude { 0 };
  464. u8 exponent { 0 };
  465. u8 plurality { 0 };
  466. @string_index_type@ zero_format { 0 };
  467. @string_index_type@ positive_format { 0 };
  468. @string_index_type@ negative_format { 0 };
  469. Array<@string_index_type@, @identifier_count@> identifiers {};
  470. };
  471. struct NumberSystem {
  472. @string_index_type@ system { 0 };
  473. Array<@string_index_type@, @numeric_symbols_size@> symbols {};
  474. u8 primary_grouping_size { 0 };
  475. u8 secondary_grouping_size { 0 };
  476. NumberFormat decimal_format {};
  477. Span<NumberFormat const> decimal_long_formats {};
  478. Span<NumberFormat const> decimal_short_formats {};
  479. NumberFormat currency_format {};
  480. NumberFormat accounting_format {};
  481. Span<NumberFormat const> currency_unit_formats {};
  482. Span<NumberFormat const> currency_short_formats {};
  483. NumberFormat percent_format {};
  484. NumberFormat scientific_format {};
  485. };
  486. struct Unit {
  487. @string_index_type@ unit { 0 };
  488. Span<NumberFormat const> long_formats {};
  489. Span<NumberFormat const> short_formats {};
  490. Span<NumberFormat const> narrow_formats {};
  491. };
  492. )~~~");
  493. auto append_number_format = [&](auto const& number_format) {
  494. generator.set("magnitude"sv, String::number(number_format.magnitude));
  495. generator.set("exponent"sv, String::number(number_format.exponent));
  496. generator.set("plurality"sv, String::number(static_cast<u8>(number_format.plurality)));
  497. generator.set("zero_format"sv, String::number(number_format.zero_format_index));
  498. generator.set("positive_format"sv, String::number(number_format.positive_format_index));
  499. generator.set("negative_format"sv, String::number(number_format.negative_format_index));
  500. generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, { ");
  501. bool first = true;
  502. for (auto identifier_index : number_format.identifier_indices) {
  503. if (!first)
  504. generator.append(", ");
  505. generator.append(String::number(identifier_index));
  506. first = false;
  507. }
  508. generator.append(" } },");
  509. };
  510. auto append_number_formats = [&](String name, auto const& number_formats) {
  511. generator.set("name"sv, move(name));
  512. generator.set("size"sv, String::number(number_formats.size()));
  513. generator.append(R"~~~(
  514. static constexpr Array<NumberFormat, @size@> @name@ { {
  515. )~~~");
  516. constexpr size_t max_values_per_row = 10;
  517. size_t values_in_current_row = 0;
  518. for (auto const& number_format : number_formats) {
  519. if (values_in_current_row++ > 0)
  520. generator.append(" ");
  521. append_number_format(number_format);
  522. if (values_in_current_row == max_values_per_row) {
  523. values_in_current_row = 0;
  524. generator.append("\n ");
  525. }
  526. }
  527. generator.append(R"~~~(
  528. } };
  529. )~~~");
  530. };
  531. auto append_number_systems = [&](String name, auto const& number_systems) {
  532. auto format_name = [&](StringView system, StringView format) {
  533. return String::formatted("{}_{}_{}", name, system, format);
  534. };
  535. for (auto const& number_system : number_systems) {
  536. append_number_formats(format_name(number_system.key, "dl"sv), number_system.value.decimal_long_formats);
  537. append_number_formats(format_name(number_system.key, "ds"sv), number_system.value.decimal_short_formats);
  538. append_number_formats(format_name(number_system.key, "cu"sv), number_system.value.currency_unit_formats);
  539. append_number_formats(format_name(number_system.key, "cs"sv), number_system.value.currency_short_formats);
  540. }
  541. generator.set("name", name);
  542. generator.set("size", String::number(number_systems.size()));
  543. generator.append(R"~~~(
  544. static constexpr Array<NumberSystem, @size@> @name@ { {)~~~");
  545. for (auto const& number_system : number_systems) {
  546. generator.set("system"sv, String::number(number_system.value.system));
  547. generator.set("primary_grouping_size"sv, String::number(number_system.value.primary_grouping_size));
  548. generator.set("secondary_grouping_size"sv, String::number(number_system.value.secondary_grouping_size));
  549. generator.set("decimal_long_formats"sv, format_name(number_system.key, "dl"sv));
  550. generator.set("decimal_short_formats"sv, format_name(number_system.key, "ds"sv));
  551. generator.set("currency_unit_formats"sv, format_name(number_system.key, "cu"sv));
  552. generator.set("currency_short_formats"sv, format_name(number_system.key, "cs"sv));
  553. generator.append(R"~~~(
  554. { @system@, {)~~~");
  555. for (auto const& symbol : locale_data.numeric_symbols) {
  556. auto index = number_system.value.symbols.get(symbol).value_or(0);
  557. generator.set("index", String::number(index));
  558. generator.append(" @index@,");
  559. }
  560. generator.append(" }, @primary_grouping_size@, @secondary_grouping_size@, ");
  561. append_number_format(number_system.value.decimal_format);
  562. generator.append(" @decimal_long_formats@.span(), @decimal_short_formats@.span(), ");
  563. append_number_format(number_system.value.currency_format);
  564. generator.append(" ");
  565. append_number_format(number_system.value.accounting_format);
  566. generator.append(" @currency_unit_formats@.span(), @currency_short_formats@.span(), ");
  567. append_number_format(number_system.value.percent_format);
  568. generator.append(" ");
  569. append_number_format(number_system.value.scientific_format);
  570. generator.append(" },");
  571. }
  572. generator.append(R"~~~(
  573. } };
  574. )~~~");
  575. };
  576. auto append_units = [&](String name, auto const& units) {
  577. auto format_name = [&](String unit, StringView format) {
  578. unit = unit.replace("-"sv, "_"sv, true);
  579. return String::formatted("{}_{}_{}", name, unit, format);
  580. };
  581. for (auto const& unit : units) {
  582. append_number_formats(format_name(unit.key, "l"sv), unit.value.long_formats);
  583. append_number_formats(format_name(unit.key, "s"sv), unit.value.short_formats);
  584. append_number_formats(format_name(unit.key, "n"sv), unit.value.narrow_formats);
  585. }
  586. generator.set("name", name);
  587. generator.set("size", String::number(units.size()));
  588. generator.append(R"~~~(
  589. static constexpr Array<Unit, @size@> @name@ { {)~~~");
  590. for (auto const& unit : units) {
  591. generator.set("unit"sv, String::number(unit.value.unit));
  592. generator.set("long_formats"sv, format_name(unit.key, "l"sv));
  593. generator.set("short_formats"sv, format_name(unit.key, "s"sv));
  594. generator.set("narrow_formats"sv, format_name(unit.key, "n"sv));
  595. generator.append(R"~~~(
  596. { @unit@, @long_formats@.span(), @short_formats@.span(), @narrow_formats@.span() },)~~~");
  597. }
  598. generator.append(R"~~~(
  599. } };
  600. )~~~");
  601. };
  602. generate_mapping(generator, locale_data.locales, "NumberSystem"sv, "s_number_systems"sv, "s_number_systems_{}", [&](auto const& name, auto const& value) { append_number_systems(name, value.number_systems); });
  603. generate_mapping(generator, locale_data.locales, "Unit"sv, "s_units"sv, "s_units_{}", [&](auto const& name, auto const& value) { append_units(name, value.units); });
  604. auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values) {
  605. HashValueMap<String> hashes;
  606. hashes.ensure_capacity(values.size());
  607. for (auto const& value : values)
  608. hashes.set(value.hash(), format_identifier(enum_title, value));
  609. generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
  610. };
  611. append_from_string("NumericSymbol"sv, "numeric_symbol"sv, locale_data.numeric_symbols);
  612. generator.append(R"~~~(
  613. static NumberSystem const* find_number_system(StringView locale, StringView system)
  614. {
  615. auto locale_value = locale_from_string(locale);
  616. if (!locale_value.has_value())
  617. return nullptr;
  618. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  619. auto const& number_systems = s_number_systems.at(locale_index);
  620. for (auto const& number_system : number_systems) {
  621. if (system == s_string_list[number_system.system])
  622. return &number_system;
  623. };
  624. return nullptr;
  625. }
  626. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol)
  627. {
  628. auto symbol_value = numeric_symbol_from_string(symbol);
  629. if (!symbol_value.has_value())
  630. return {};
  631. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  632. auto symbol_index = to_underlying(*symbol_value);
  633. return s_string_list[number_system->symbols[symbol_index]];
  634. }
  635. return {};
  636. }
  637. Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system)
  638. {
  639. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr)
  640. return NumberGroupings { number_system->primary_grouping_size, number_system->secondary_grouping_size };
  641. return {};
  642. }
  643. Optional<Unicode::NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type)
  644. {
  645. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  646. switch (type) {
  647. case StandardNumberFormatType::Decimal:
  648. return number_system->decimal_format.to_unicode_number_format();
  649. case StandardNumberFormatType::Currency:
  650. return number_system->currency_format.to_unicode_number_format();
  651. case StandardNumberFormatType::Accounting:
  652. return number_system->accounting_format.to_unicode_number_format();
  653. case StandardNumberFormatType::Percent:
  654. return number_system->percent_format.to_unicode_number_format();
  655. case StandardNumberFormatType::Scientific:
  656. return number_system->scientific_format.to_unicode_number_format();
  657. }
  658. }
  659. return {};
  660. }
  661. Vector<Unicode::NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type)
  662. {
  663. Vector<Unicode::NumberFormat> formats;
  664. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  665. Span<NumberFormat const> number_formats;
  666. switch (type) {
  667. case CompactNumberFormatType::DecimalLong:
  668. number_formats = number_system->decimal_long_formats;
  669. break;
  670. case CompactNumberFormatType::DecimalShort:
  671. number_formats = number_system->decimal_short_formats;
  672. break;
  673. case CompactNumberFormatType::CurrencyUnit:
  674. number_formats = number_system->currency_unit_formats;
  675. break;
  676. case CompactNumberFormatType::CurrencyShort:
  677. number_formats = number_system->currency_short_formats;
  678. break;
  679. }
  680. formats.ensure_capacity(number_formats.size());
  681. for (auto const& number_format : number_formats)
  682. formats.append(number_format.to_unicode_number_format());
  683. }
  684. return formats;
  685. }
  686. static Unit const* find_units(StringView locale, StringView unit)
  687. {
  688. auto locale_value = locale_from_string(locale);
  689. if (!locale_value.has_value())
  690. return nullptr;
  691. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  692. auto const& locale_units = s_units.at(locale_index);
  693. for (auto const& units : locale_units) {
  694. if (unit == s_string_list[units.unit])
  695. return &units;
  696. };
  697. return nullptr;
  698. }
  699. Vector<Unicode::NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style)
  700. {
  701. Vector<Unicode::NumberFormat> formats;
  702. if (auto const* units = find_units(locale, unit); units != nullptr) {
  703. Span<NumberFormat const> number_formats;
  704. switch (style) {
  705. case Style::Long:
  706. number_formats = units->long_formats;
  707. break;
  708. case Style::Short:
  709. number_formats = units->short_formats;
  710. break;
  711. case Style::Narrow:
  712. number_formats = units->narrow_formats;
  713. break;
  714. default:
  715. VERIFY_NOT_REACHED();
  716. }
  717. formats.ensure_capacity(number_formats.size());
  718. for (auto const& number_format : number_formats)
  719. formats.append(number_format.to_unicode_number_format());
  720. }
  721. return formats;
  722. }
  723. }
  724. )~~~");
  725. file.write(generator.as_string_view());
  726. }
  727. int main(int argc, char** argv)
  728. {
  729. char const* generated_header_path = nullptr;
  730. char const* generated_implementation_path = nullptr;
  731. char const* core_path = nullptr;
  732. char const* numbers_path = nullptr;
  733. char const* units_path = nullptr;
  734. Core::ArgsParser args_parser;
  735. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  736. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  737. args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
  738. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  739. args_parser.add_option(units_path, "Path to cldr-units directory", "units-path", 'u', "units-path");
  740. args_parser.parse(argc, argv);
  741. auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
  742. if (path.is_empty()) {
  743. warnln("{} is required", flags);
  744. args_parser.print_usage(stderr, argv[0]);
  745. exit(1);
  746. }
  747. auto file_or_error = Core::File::open(path, mode);
  748. if (file_or_error.is_error()) {
  749. warnln("Failed to open {}: {}", path, file_or_error.release_error());
  750. exit(1);
  751. }
  752. return file_or_error.release_value();
  753. };
  754. auto generated_header_file = open_file(generated_header_path, "-h/--generated-header-path", Core::OpenMode::ReadWrite);
  755. auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite);
  756. UnicodeLocaleData locale_data;
  757. parse_all_locales(core_path, numbers_path, units_path, locale_data);
  758. generate_unicode_locale_header(generated_header_file, locale_data);
  759. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  760. return 0;
  761. }