GenerateUnicodeNumberFormat.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/AllOf.h>
  8. #include <AK/Array.h>
  9. #include <AK/CharacterTypes.h>
  10. #include <AK/Find.h>
  11. #include <AK/Format.h>
  12. #include <AK/HashMap.h>
  13. #include <AK/JsonObject.h>
  14. #include <AK/JsonParser.h>
  15. #include <AK/JsonValue.h>
  16. #include <AK/LexicalPath.h>
  17. #include <AK/QuickSort.h>
  18. #include <AK/SourceGenerator.h>
  19. #include <AK/String.h>
  20. #include <AK/StringBuilder.h>
  21. #include <AK/Utf8View.h>
  22. #include <LibCore/ArgsParser.h>
  23. #include <LibCore/DirIterator.h>
  24. #include <LibCore/File.h>
  25. #include <LibUnicode/Locale.h>
  26. #include <LibUnicode/NumberFormat.h>
  27. #include <math.h>
  28. using StringIndexType = u16;
  29. constexpr auto s_string_index_type = "u16"sv;
  30. enum class NumberFormatType {
  31. Standard,
  32. Compact,
  33. };
  34. struct NumberFormat : public Unicode::NumberFormat {
  35. using Base = Unicode::NumberFormat;
  36. static Base::Plurality plurality_from_string(StringView plurality)
  37. {
  38. if (plurality == "other"sv)
  39. return Base::Plurality::Other;
  40. if (plurality == "1"sv)
  41. return Base::Plurality::Single;
  42. if (plurality == "zero"sv)
  43. return Base::Plurality::Zero;
  44. if (plurality == "one"sv)
  45. return Base::Plurality::One;
  46. if (plurality == "two"sv)
  47. return Base::Plurality::Two;
  48. if (plurality == "few"sv)
  49. return Base::Plurality::Few;
  50. if (plurality == "many"sv)
  51. return Base::Plurality::Many;
  52. VERIFY_NOT_REACHED();
  53. }
  54. StringIndexType zero_format_index { 0 };
  55. StringIndexType positive_format_index { 0 };
  56. StringIndexType negative_format_index { 0 };
  57. Vector<StringIndexType> identifier_indices {};
  58. };
  59. struct NumberSystem {
  60. StringIndexType system { 0 };
  61. HashMap<String, StringIndexType> symbols {};
  62. u8 primary_grouping_size { 0 };
  63. u8 secondary_grouping_size { 0 };
  64. NumberFormat decimal_format {};
  65. Vector<NumberFormat> decimal_long_formats {};
  66. Vector<NumberFormat> decimal_short_formats {};
  67. NumberFormat currency_format {};
  68. NumberFormat accounting_format {};
  69. Vector<NumberFormat> currency_unit_formats {};
  70. Vector<NumberFormat> currency_short_formats {};
  71. NumberFormat percent_format {};
  72. NumberFormat scientific_format {};
  73. };
  74. struct Unit {
  75. StringIndexType unit { 0 };
  76. Vector<NumberFormat> long_formats {};
  77. Vector<NumberFormat> short_formats {};
  78. Vector<NumberFormat> narrow_formats {};
  79. };
  80. struct Locale {
  81. HashMap<String, NumberSystem> number_systems;
  82. HashMap<String, Unit> units {};
  83. };
  84. struct UnicodeLocaleData {
  85. UniqueStringStorage<StringIndexType> unique_strings;
  86. HashMap<String, Locale> locales;
  87. Vector<String> numeric_symbols;
  88. size_t max_identifier_count { 0 };
  89. };
  90. static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
  91. {
  92. static Utf8View whitespace { "\u0020\u00a0\u200f"sv };
  93. while (true) {
  94. Utf8View utf8_pattern { pattern };
  95. Optional<size_t> start_index;
  96. Optional<size_t> end_index;
  97. bool inside_replacement = false;
  98. for (auto it = utf8_pattern.begin(); it != utf8_pattern.end(); ++it) {
  99. if (*it == '{') {
  100. if (start_index.has_value()) {
  101. end_index = utf8_pattern.byte_offset_of(it);
  102. break;
  103. }
  104. inside_replacement = true;
  105. } else if (*it == '}') {
  106. inside_replacement = false;
  107. } else if (!inside_replacement && !start_index.has_value() && !whitespace.contains(*it)) {
  108. start_index = utf8_pattern.byte_offset_of(it);
  109. }
  110. }
  111. if (!start_index.has_value())
  112. return pattern;
  113. end_index = end_index.value_or(pattern.length());
  114. utf8_pattern = utf8_pattern.substring_view(*start_index, *end_index - *start_index);
  115. utf8_pattern = utf8_pattern.trim(whitespace);
  116. auto identifier = utf8_pattern.as_string().replace("'.'"sv, "."sv);
  117. auto identifier_index = locale_data.unique_strings.ensure(move(identifier));
  118. size_t replacement_index = 0;
  119. if (auto index = format.identifier_indices.find_first_index(identifier_index); index.has_value()) {
  120. replacement_index = *index;
  121. } else {
  122. replacement_index = format.identifier_indices.size();
  123. format.identifier_indices.append(identifier_index);
  124. locale_data.max_identifier_count = max(locale_data.max_identifier_count, format.identifier_indices.size());
  125. }
  126. pattern = String::formatted("{}{{{}:{}}}{}",
  127. *start_index > 0 ? pattern.substring_view(0, *start_index) : ""sv,
  128. replacement,
  129. replacement_index,
  130. pattern.substring_view(*start_index + utf8_pattern.byte_length()));
  131. }
  132. }
  133. static void parse_number_pattern(Vector<String> patterns, UnicodeLocaleData& locale_data, NumberFormatType type, NumberFormat& format, NumberSystem* number_system_for_groupings = nullptr)
  134. {
  135. // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns
  136. // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns
  137. VERIFY((patterns.size() == 1) || (patterns.size() == 2));
  138. auto replace_patterns = [&](String pattern) {
  139. static HashMap<StringView, StringView> replacements = {
  140. { "{0}"sv, "{number}"sv },
  141. { "{1}"sv, "{currency}"sv },
  142. { "%"sv, "{percentSign}"sv },
  143. { "+"sv, "{plusSign}"sv },
  144. { "-"sv, "{minusSign}"sv },
  145. { "¤"sv, "{currency}"sv }, // U+00A4 Currency Sign
  146. { "E"sv, "{scientificSeparator}"sv },
  147. };
  148. for (auto const& replacement : replacements)
  149. pattern = pattern.replace(replacement.key, replacement.value, true);
  150. if (auto start_number_index = pattern.find_any_of("#0"sv, String::SearchDirection::Forward); start_number_index.has_value()) {
  151. auto end_number_index = *start_number_index + 1;
  152. for (; end_number_index < pattern.length(); ++end_number_index) {
  153. auto ch = pattern[end_number_index];
  154. if ((ch != '#') && (ch != '0') && (ch != ',') && (ch != '.'))
  155. break;
  156. }
  157. if (number_system_for_groupings) {
  158. auto number_pattern = pattern.substring_view(*start_number_index, end_number_index - *start_number_index);
  159. auto group_separators = number_pattern.find_all(","sv);
  160. VERIFY((group_separators.size() == 1) || (group_separators.size() == 2));
  161. auto decimal = number_pattern.find('.');
  162. VERIFY(decimal.has_value());
  163. if (group_separators.size() == 1) {
  164. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[0] - 1;
  165. number_system_for_groupings->secondary_grouping_size = number_system_for_groupings->primary_grouping_size;
  166. } else {
  167. number_system_for_groupings->primary_grouping_size = *decimal - group_separators[1] - 1;
  168. number_system_for_groupings->secondary_grouping_size = group_separators[1] - group_separators[0] - 1;
  169. }
  170. }
  171. pattern = String::formatted("{}{{number}}{}",
  172. *start_number_index > 0 ? pattern.substring_view(0, *start_number_index) : ""sv,
  173. pattern.substring_view(end_number_index));
  174. // This is specifically handled here rather than in the replacements HashMap above so
  175. // that we do not errantly replace zeroes in number patterns.
  176. if (pattern.contains(*replacements.get("E"sv)))
  177. pattern = pattern.replace("0"sv, "{scientificExponent}"sv);
  178. }
  179. if (type == NumberFormatType::Compact)
  180. return parse_identifiers(move(pattern), "compactIdentifier"sv, locale_data, format);
  181. return pattern;
  182. };
  183. auto zero_format = replace_patterns(move(patterns[0]));
  184. format.positive_format_index = locale_data.unique_strings.ensure(String::formatted("{{plusSign}}{}", zero_format));
  185. if (patterns.size() == 2) {
  186. auto negative_format = replace_patterns(move(patterns[1]));
  187. format.negative_format_index = locale_data.unique_strings.ensure(move(negative_format));
  188. } else {
  189. format.negative_format_index = locale_data.unique_strings.ensure(String::formatted("{{minusSign}}{}", zero_format));
  190. }
  191. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  192. }
  193. static ErrorOr<void> parse_number_systems(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
  194. {
  195. LexicalPath numbers_path(move(locale_numbers_path));
  196. numbers_path = numbers_path.append("numbers.json"sv);
  197. auto numbers_file = TRY(Core::File::open(numbers_path.string(), Core::OpenMode::ReadOnly));
  198. auto numbers = TRY(JsonValue::from_string(numbers_file->read_all()));
  199. auto const& main_object = numbers.as_object().get("main"sv);
  200. auto const& locale_object = main_object.as_object().get(numbers_path.parent().basename());
  201. auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv);
  202. auto ensure_number_system = [&](auto const& system) -> NumberSystem& {
  203. return locale.number_systems.ensure(system, [&]() {
  204. auto system_index = locale_data.unique_strings.ensure(system);
  205. return NumberSystem { .system = system_index };
  206. });
  207. };
  208. auto parse_number_format = [&](auto const& format_object) {
  209. Vector<NumberFormat> result;
  210. result.ensure_capacity(format_object.size());
  211. format_object.for_each_member([&](auto const& key, JsonValue const& value) {
  212. auto split_key = key.split_view('-');
  213. if (split_key.size() != 3)
  214. return;
  215. auto patterns = value.as_string().split(';');
  216. NumberFormat format {};
  217. if (auto type = split_key[0].template to_uint<u64>(); type.has_value()) {
  218. VERIFY(*type % 10 == 0);
  219. format.magnitude = static_cast<u8>(log10(*type));
  220. if (patterns[0] != "0"sv) {
  221. auto number_of_zeroes_in_pattern = patterns[0].count("0"sv);
  222. VERIFY(format.magnitude >= number_of_zeroes_in_pattern);
  223. format.exponent = format.magnitude + 1 - number_of_zeroes_in_pattern;
  224. }
  225. } else {
  226. VERIFY(split_key[0] == "unitPattern"sv);
  227. }
  228. format.plurality = NumberFormat::plurality_from_string(split_key[2]);
  229. parse_number_pattern(move(patterns), locale_data, NumberFormatType::Compact, format);
  230. result.append(move(format));
  231. });
  232. return result;
  233. };
  234. locale_numbers_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
  235. constexpr auto symbols_prefix = "symbols-numberSystem-"sv;
  236. constexpr auto decimal_formats_prefix = "decimalFormats-numberSystem-"sv;
  237. constexpr auto currency_formats_prefix = "currencyFormats-numberSystem-"sv;
  238. constexpr auto percent_formats_prefix = "percentFormats-numberSystem-"sv;
  239. constexpr auto scientific_formats_prefix = "scientificFormats-numberSystem-"sv;
  240. if (key.starts_with(symbols_prefix)) {
  241. auto system = key.substring(symbols_prefix.length());
  242. auto& number_system = ensure_number_system(system);
  243. value.as_object().for_each_member([&](auto const& symbol, JsonValue const& localization) {
  244. auto symbol_index = locale_data.unique_strings.ensure(localization.as_string());
  245. number_system.symbols.set(symbol, symbol_index);
  246. if (!locale_data.numeric_symbols.contains_slow(symbol))
  247. locale_data.numeric_symbols.append(symbol);
  248. });
  249. } else if (key.starts_with(decimal_formats_prefix)) {
  250. auto system = key.substring(decimal_formats_prefix.length());
  251. auto& number_system = ensure_number_system(system);
  252. auto format_object = value.as_object().get("standard"sv);
  253. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.decimal_format, &number_system);
  254. auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv);
  255. number_system.decimal_long_formats = parse_number_format(long_format.as_object());
  256. auto const& short_format = value.as_object().get("short"sv).as_object().get("decimalFormat"sv);
  257. number_system.decimal_short_formats = parse_number_format(short_format.as_object());
  258. } else if (key.starts_with(currency_formats_prefix)) {
  259. auto system = key.substring(currency_formats_prefix.length());
  260. auto& number_system = ensure_number_system(system);
  261. auto format_object = value.as_object().get("standard"sv);
  262. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.currency_format);
  263. format_object = value.as_object().get("accounting"sv);
  264. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.accounting_format);
  265. number_system.currency_unit_formats = parse_number_format(value.as_object());
  266. if (value.as_object().has("short"sv)) {
  267. auto const& short_format = value.as_object().get("short"sv).as_object().get("standard"sv);
  268. number_system.currency_short_formats = parse_number_format(short_format.as_object());
  269. }
  270. } else if (key.starts_with(percent_formats_prefix)) {
  271. auto system = key.substring(percent_formats_prefix.length());
  272. auto& number_system = ensure_number_system(system);
  273. auto format_object = value.as_object().get("standard"sv);
  274. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.percent_format);
  275. } else if (key.starts_with(scientific_formats_prefix)) {
  276. auto system = key.substring(scientific_formats_prefix.length());
  277. auto& number_system = ensure_number_system(system);
  278. auto format_object = value.as_object().get("standard"sv);
  279. parse_number_pattern(format_object.as_string().split(';'), locale_data, NumberFormatType::Standard, number_system.scientific_format);
  280. }
  281. });
  282. return {};
  283. }
  284. static ErrorOr<void> parse_units(String locale_units_path, UnicodeLocaleData& locale_data, Locale& locale)
  285. {
  286. LexicalPath units_path(move(locale_units_path));
  287. units_path = units_path.append("units.json"sv);
  288. auto units_file = TRY(Core::File::open(units_path.string(), Core::OpenMode::ReadOnly));
  289. auto units = TRY(JsonValue::from_string(units_file->read_all()));
  290. auto const& main_object = units.as_object().get("main"sv);
  291. auto const& locale_object = main_object.as_object().get(units_path.parent().basename());
  292. auto const& locale_units_object = locale_object.as_object().get("units"sv);
  293. auto const& long_object = locale_units_object.as_object().get("long"sv);
  294. auto const& short_object = locale_units_object.as_object().get("short"sv);
  295. auto const& narrow_object = locale_units_object.as_object().get("narrow"sv);
  296. auto ensure_unit = [&](auto const& unit) -> Unit& {
  297. return locale.units.ensure(unit, [&]() {
  298. auto unit_index = locale_data.unique_strings.ensure(unit);
  299. return Unit { .unit = unit_index };
  300. });
  301. };
  302. auto is_sanctioned_unit = [](StringView unit_name) {
  303. // This is a copy of the units sanctioned for use within ECMA-402. LibUnicode generally tries to
  304. // avoid being directly dependent on ECMA-402, but this rather significantly reduces the amount
  305. // of data generated here, and ECMA-402 is currently the only consumer of this data.
  306. // https://tc39.es/ecma402/#table-sanctioned-simple-unit-identifiers
  307. constexpr auto sanctioned_units = AK::Array { "acre"sv, "bit"sv, "byte"sv, "celsius"sv, "centimeter"sv, "day"sv, "degree"sv, "fahrenheit"sv, "fluid-ounce"sv, "foot"sv, "gallon"sv, "gigabit"sv, "gigabyte"sv, "gram"sv, "hectare"sv, "hour"sv, "inch"sv, "kilobit"sv, "kilobyte"sv, "kilogram"sv, "kilometer"sv, "liter"sv, "megabit"sv, "megabyte"sv, "meter"sv, "mile"sv, "mile-scandinavian"sv, "milliliter"sv, "millimeter"sv, "millisecond"sv, "minute"sv, "month"sv, "ounce"sv, "percent"sv, "petabyte"sv, "pound"sv, "second"sv, "stone"sv, "terabit"sv, "terabyte"sv, "week"sv, "yard"sv, "year"sv };
  308. return find(sanctioned_units.begin(), sanctioned_units.end(), unit_name) != sanctioned_units.end();
  309. };
  310. auto parse_units_object = [&](auto const& units_object, Unicode::Style style) {
  311. constexpr auto unit_pattern_prefix = "unitPattern-count-"sv;
  312. constexpr auto combined_unit_separator = "-per-"sv;
  313. units_object.for_each_member([&](auto const& key, JsonValue const& value) {
  314. auto end_of_category = key.find('-');
  315. if (!end_of_category.has_value())
  316. return;
  317. auto unit_name = key.substring(*end_of_category + 1);
  318. if (!is_sanctioned_unit(unit_name)) {
  319. auto indices = unit_name.find_all(combined_unit_separator);
  320. if (indices.size() != 1)
  321. return;
  322. auto numerator = unit_name.substring_view(0, indices[0]);
  323. auto denominator = unit_name.substring_view(indices[0] + combined_unit_separator.length());
  324. if (!is_sanctioned_unit(numerator) || !is_sanctioned_unit(denominator))
  325. return;
  326. }
  327. value.as_object().for_each_member([&](auto const& unit_key, JsonValue const& pattern_value) {
  328. if (!unit_key.starts_with(unit_pattern_prefix))
  329. return;
  330. auto& unit = ensure_unit(unit_name);
  331. NumberFormat format {};
  332. auto plurality = unit_key.substring_view(unit_pattern_prefix.length());
  333. format.plurality = NumberFormat::plurality_from_string(plurality);
  334. auto zero_format = pattern_value.as_string().replace("{0}"sv, "{number}"sv);
  335. zero_format = parse_identifiers(zero_format, "unitIdentifier"sv, locale_data, format);
  336. format.positive_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{plusSign}{number}"sv));
  337. format.negative_format_index = locale_data.unique_strings.ensure(zero_format.replace("{number}"sv, "{minusSign}{number}"sv));
  338. format.zero_format_index = locale_data.unique_strings.ensure(move(zero_format));
  339. switch (style) {
  340. case Unicode::Style::Long:
  341. unit.long_formats.append(move(format));
  342. break;
  343. case Unicode::Style::Short:
  344. unit.short_formats.append(move(format));
  345. break;
  346. case Unicode::Style::Narrow:
  347. unit.narrow_formats.append(move(format));
  348. break;
  349. default:
  350. VERIFY_NOT_REACHED();
  351. }
  352. });
  353. });
  354. };
  355. parse_units_object(long_object.as_object(), Unicode::Style::Long);
  356. parse_units_object(short_object.as_object(), Unicode::Style::Short);
  357. parse_units_object(narrow_object.as_object(), Unicode::Style::Narrow);
  358. return {};
  359. }
  360. static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, UnicodeLocaleData& locale_data)
  361. {
  362. auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
  363. auto units_iterator = TRY(path_to_dir_iterator(move(units_path)));
  364. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  365. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
  366. StringBuilder builder;
  367. builder.append(locale_data.unique_strings.get(parsed_locale.language));
  368. if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
  369. builder.appendff("-{}", script);
  370. if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
  371. builder.appendff("-{}", region);
  372. return builder.build();
  373. };
  374. while (numbers_iterator.has_next()) {
  375. auto numbers_path = TRY(next_path_from_dir_iterator(numbers_iterator));
  376. auto language = TRY(remove_variants_from_path(numbers_path));
  377. auto& locale = locale_data.locales.ensure(language);
  378. TRY(parse_number_systems(numbers_path, locale_data, locale));
  379. }
  380. while (units_iterator.has_next()) {
  381. auto units_path = TRY(next_path_from_dir_iterator(units_iterator));
  382. auto language = TRY(remove_variants_from_path(units_path));
  383. auto& locale = locale_data.locales.ensure(language);
  384. TRY(parse_units(units_path, locale_data, locale));
  385. }
  386. return {};
  387. }
  388. static String format_identifier(StringView owner, String identifier)
  389. {
  390. identifier = identifier.replace("-"sv, "_"sv, true);
  391. if (all_of(identifier, is_ascii_digit))
  392. return String::formatted("{}_{}", owner[0], identifier);
  393. if (is_ascii_lower_alpha(identifier[0]))
  394. return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
  395. return identifier;
  396. }
  397. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
  398. {
  399. StringBuilder builder;
  400. SourceGenerator generator { builder };
  401. generator.append(R"~~~(
  402. #pragma once
  403. #include <AK/Optional.h>
  404. #include <AK/StringView.h>
  405. #include <AK/Types.h>
  406. #include <AK/Vector.h>
  407. #include <LibUnicode/Forward.h>
  408. namespace Unicode {
  409. )~~~");
  410. generate_enum(generator, format_identifier, "NumericSymbol"sv, {}, locale_data.numeric_symbols);
  411. generator.append(R"~~~(
  412. namespace Detail {
  413. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView numeric_symbol);
  414. Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
  415. Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
  416. Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
  417. Vector<Unicode::NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
  418. Optional<NumericSymbol> numeric_symbol_from_string(StringView numeric_symbol);
  419. }
  420. }
  421. )~~~");
  422. VERIFY(file.write(generator.as_string_view()));
  423. }
  424. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  425. {
  426. StringBuilder builder;
  427. SourceGenerator generator { builder };
  428. generator.set("string_index_type"sv, s_string_index_type);
  429. generator.set("numeric_symbols_size", String::number(locale_data.numeric_symbols.size()));
  430. generator.set("identifier_count", String::number(locale_data.max_identifier_count));
  431. generator.append(R"~~~(
  432. #include <AK/Array.h>
  433. #include <AK/BinarySearch.h>
  434. #include <AK/Span.h>
  435. #include <LibUnicode/Locale.h>
  436. #include <LibUnicode/NumberFormat.h>
  437. #include <LibUnicode/UnicodeNumberFormat.h>
  438. namespace Unicode::Detail {
  439. )~~~");
  440. locale_data.unique_strings.generate(generator);
  441. generator.append(R"~~~(
  442. struct NumberFormat {
  443. Unicode::NumberFormat to_unicode_number_format() const {
  444. Unicode::NumberFormat number_format {};
  445. number_format.magnitude = magnitude;
  446. number_format.exponent = exponent;
  447. number_format.plurality = static_cast<Unicode::NumberFormat::Plurality>(plurality);
  448. number_format.zero_format = s_string_list[zero_format];
  449. number_format.positive_format = s_string_list[positive_format];
  450. number_format.negative_format = s_string_list[negative_format];
  451. number_format.identifiers.ensure_capacity(identifiers.size());
  452. for (@string_index_type@ identifier : identifiers)
  453. number_format.identifiers.append(s_string_list[identifier]);
  454. return number_format;
  455. }
  456. u8 magnitude { 0 };
  457. u8 exponent { 0 };
  458. u8 plurality { 0 };
  459. @string_index_type@ zero_format { 0 };
  460. @string_index_type@ positive_format { 0 };
  461. @string_index_type@ negative_format { 0 };
  462. Array<@string_index_type@, @identifier_count@> identifiers {};
  463. };
  464. struct NumberSystem {
  465. @string_index_type@ system { 0 };
  466. Array<@string_index_type@, @numeric_symbols_size@> symbols {};
  467. u8 primary_grouping_size { 0 };
  468. u8 secondary_grouping_size { 0 };
  469. NumberFormat decimal_format {};
  470. Span<NumberFormat const> decimal_long_formats {};
  471. Span<NumberFormat const> decimal_short_formats {};
  472. NumberFormat currency_format {};
  473. NumberFormat accounting_format {};
  474. Span<NumberFormat const> currency_unit_formats {};
  475. Span<NumberFormat const> currency_short_formats {};
  476. NumberFormat percent_format {};
  477. NumberFormat scientific_format {};
  478. };
  479. struct Unit {
  480. @string_index_type@ unit { 0 };
  481. Span<NumberFormat const> long_formats {};
  482. Span<NumberFormat const> short_formats {};
  483. Span<NumberFormat const> narrow_formats {};
  484. };
  485. )~~~");
  486. auto append_number_format = [&](auto const& number_format) {
  487. generator.set("magnitude"sv, String::number(number_format.magnitude));
  488. generator.set("exponent"sv, String::number(number_format.exponent));
  489. generator.set("plurality"sv, String::number(static_cast<u8>(number_format.plurality)));
  490. generator.set("zero_format"sv, String::number(number_format.zero_format_index));
  491. generator.set("positive_format"sv, String::number(number_format.positive_format_index));
  492. generator.set("negative_format"sv, String::number(number_format.negative_format_index));
  493. generator.append("{ @magnitude@, @exponent@, @plurality@, @zero_format@, @positive_format@, @negative_format@, { ");
  494. bool first = true;
  495. for (auto identifier_index : number_format.identifier_indices) {
  496. if (!first)
  497. generator.append(", ");
  498. generator.append(String::number(identifier_index));
  499. first = false;
  500. }
  501. generator.append(" } },");
  502. };
  503. auto append_number_formats = [&](String name, auto const& number_formats) {
  504. generator.set("name"sv, move(name));
  505. generator.set("size"sv, String::number(number_formats.size()));
  506. generator.append(R"~~~(
  507. static constexpr Array<NumberFormat, @size@> @name@ { {
  508. )~~~");
  509. constexpr size_t max_values_per_row = 10;
  510. size_t values_in_current_row = 0;
  511. for (auto const& number_format : number_formats) {
  512. if (values_in_current_row++ > 0)
  513. generator.append(" ");
  514. append_number_format(number_format);
  515. if (values_in_current_row == max_values_per_row) {
  516. values_in_current_row = 0;
  517. generator.append("\n ");
  518. }
  519. }
  520. generator.append(R"~~~(
  521. } };
  522. )~~~");
  523. };
  524. auto append_number_systems = [&](String name, auto const& number_systems) {
  525. auto format_name = [&](StringView system, StringView format) {
  526. return String::formatted("{}_{}_{}", name, system, format);
  527. };
  528. for (auto const& number_system : number_systems) {
  529. append_number_formats(format_name(number_system.key, "dl"sv), number_system.value.decimal_long_formats);
  530. append_number_formats(format_name(number_system.key, "ds"sv), number_system.value.decimal_short_formats);
  531. append_number_formats(format_name(number_system.key, "cu"sv), number_system.value.currency_unit_formats);
  532. append_number_formats(format_name(number_system.key, "cs"sv), number_system.value.currency_short_formats);
  533. }
  534. generator.set("name", name);
  535. generator.set("size", String::number(number_systems.size()));
  536. generator.append(R"~~~(
  537. static constexpr Array<NumberSystem, @size@> @name@ { {)~~~");
  538. for (auto const& number_system : number_systems) {
  539. generator.set("system"sv, String::number(number_system.value.system));
  540. generator.set("primary_grouping_size"sv, String::number(number_system.value.primary_grouping_size));
  541. generator.set("secondary_grouping_size"sv, String::number(number_system.value.secondary_grouping_size));
  542. generator.set("decimal_long_formats"sv, format_name(number_system.key, "dl"sv));
  543. generator.set("decimal_short_formats"sv, format_name(number_system.key, "ds"sv));
  544. generator.set("currency_unit_formats"sv, format_name(number_system.key, "cu"sv));
  545. generator.set("currency_short_formats"sv, format_name(number_system.key, "cs"sv));
  546. generator.append(R"~~~(
  547. { @system@, {)~~~");
  548. for (auto const& symbol : locale_data.numeric_symbols) {
  549. auto index = number_system.value.symbols.get(symbol).value_or(0);
  550. generator.set("index", String::number(index));
  551. generator.append(" @index@,");
  552. }
  553. generator.append(" }, @primary_grouping_size@, @secondary_grouping_size@, ");
  554. append_number_format(number_system.value.decimal_format);
  555. generator.append(" @decimal_long_formats@.span(), @decimal_short_formats@.span(), ");
  556. append_number_format(number_system.value.currency_format);
  557. generator.append(" ");
  558. append_number_format(number_system.value.accounting_format);
  559. generator.append(" @currency_unit_formats@.span(), @currency_short_formats@.span(), ");
  560. append_number_format(number_system.value.percent_format);
  561. generator.append(" ");
  562. append_number_format(number_system.value.scientific_format);
  563. generator.append(" },");
  564. }
  565. generator.append(R"~~~(
  566. } };
  567. )~~~");
  568. };
  569. auto append_units = [&](String name, auto const& units) {
  570. auto format_name = [&](String unit, StringView format) {
  571. unit = unit.replace("-"sv, "_"sv, true);
  572. return String::formatted("{}_{}_{}", name, unit, format);
  573. };
  574. for (auto const& unit : units) {
  575. append_number_formats(format_name(unit.key, "l"sv), unit.value.long_formats);
  576. append_number_formats(format_name(unit.key, "s"sv), unit.value.short_formats);
  577. append_number_formats(format_name(unit.key, "n"sv), unit.value.narrow_formats);
  578. }
  579. generator.set("name", name);
  580. generator.set("size", String::number(units.size()));
  581. generator.append(R"~~~(
  582. static constexpr Array<Unit, @size@> @name@ { {)~~~");
  583. for (auto const& unit : units) {
  584. generator.set("unit"sv, String::number(unit.value.unit));
  585. generator.set("long_formats"sv, format_name(unit.key, "l"sv));
  586. generator.set("short_formats"sv, format_name(unit.key, "s"sv));
  587. generator.set("narrow_formats"sv, format_name(unit.key, "n"sv));
  588. generator.append(R"~~~(
  589. { @unit@, @long_formats@.span(), @short_formats@.span(), @narrow_formats@.span() },)~~~");
  590. }
  591. generator.append(R"~~~(
  592. } };
  593. )~~~");
  594. };
  595. generate_mapping(generator, locale_data.locales, "NumberSystem"sv, "s_number_systems"sv, "s_number_systems_{}", [&](auto const& name, auto const& value) { append_number_systems(name, value.number_systems); });
  596. generate_mapping(generator, locale_data.locales, "Unit"sv, "s_units"sv, "s_units_{}", [&](auto const& name, auto const& value) { append_units(name, value.units); });
  597. auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values) {
  598. HashValueMap<String> hashes;
  599. hashes.ensure_capacity(values.size());
  600. for (auto const& value : values)
  601. hashes.set(value.hash(), format_identifier(enum_title, value));
  602. generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
  603. };
  604. append_from_string("NumericSymbol"sv, "numeric_symbol"sv, locale_data.numeric_symbols);
  605. generator.append(R"~~~(
  606. static NumberSystem const* find_number_system(StringView locale, StringView system)
  607. {
  608. auto locale_value = locale_from_string(locale);
  609. if (!locale_value.has_value())
  610. return nullptr;
  611. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  612. auto const& number_systems = s_number_systems.at(locale_index);
  613. for (auto const& number_system : number_systems) {
  614. if (system == s_string_list[number_system.system])
  615. return &number_system;
  616. };
  617. return nullptr;
  618. }
  619. Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol)
  620. {
  621. auto symbol_value = numeric_symbol_from_string(symbol);
  622. if (!symbol_value.has_value())
  623. return {};
  624. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  625. auto symbol_index = to_underlying(*symbol_value);
  626. return s_string_list[number_system->symbols[symbol_index]];
  627. }
  628. return {};
  629. }
  630. Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system)
  631. {
  632. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr)
  633. return NumberGroupings { number_system->primary_grouping_size, number_system->secondary_grouping_size };
  634. return {};
  635. }
  636. Optional<Unicode::NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type)
  637. {
  638. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  639. switch (type) {
  640. case StandardNumberFormatType::Decimal:
  641. return number_system->decimal_format.to_unicode_number_format();
  642. case StandardNumberFormatType::Currency:
  643. return number_system->currency_format.to_unicode_number_format();
  644. case StandardNumberFormatType::Accounting:
  645. return number_system->accounting_format.to_unicode_number_format();
  646. case StandardNumberFormatType::Percent:
  647. return number_system->percent_format.to_unicode_number_format();
  648. case StandardNumberFormatType::Scientific:
  649. return number_system->scientific_format.to_unicode_number_format();
  650. }
  651. }
  652. return {};
  653. }
  654. Vector<Unicode::NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type)
  655. {
  656. Vector<Unicode::NumberFormat> formats;
  657. if (auto const* number_system = find_number_system(locale, system); number_system != nullptr) {
  658. Span<NumberFormat const> number_formats;
  659. switch (type) {
  660. case CompactNumberFormatType::DecimalLong:
  661. number_formats = number_system->decimal_long_formats;
  662. break;
  663. case CompactNumberFormatType::DecimalShort:
  664. number_formats = number_system->decimal_short_formats;
  665. break;
  666. case CompactNumberFormatType::CurrencyUnit:
  667. number_formats = number_system->currency_unit_formats;
  668. break;
  669. case CompactNumberFormatType::CurrencyShort:
  670. number_formats = number_system->currency_short_formats;
  671. break;
  672. }
  673. formats.ensure_capacity(number_formats.size());
  674. for (auto const& number_format : number_formats)
  675. formats.append(number_format.to_unicode_number_format());
  676. }
  677. return formats;
  678. }
  679. static Unit const* find_units(StringView locale, StringView unit)
  680. {
  681. auto locale_value = locale_from_string(locale);
  682. if (!locale_value.has_value())
  683. return nullptr;
  684. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  685. auto const& locale_units = s_units.at(locale_index);
  686. for (auto const& units : locale_units) {
  687. if (unit == s_string_list[units.unit])
  688. return &units;
  689. };
  690. return nullptr;
  691. }
  692. Vector<Unicode::NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style)
  693. {
  694. Vector<Unicode::NumberFormat> formats;
  695. if (auto const* units = find_units(locale, unit); units != nullptr) {
  696. Span<NumberFormat const> number_formats;
  697. switch (style) {
  698. case Style::Long:
  699. number_formats = units->long_formats;
  700. break;
  701. case Style::Short:
  702. number_formats = units->short_formats;
  703. break;
  704. case Style::Narrow:
  705. number_formats = units->narrow_formats;
  706. break;
  707. default:
  708. VERIFY_NOT_REACHED();
  709. }
  710. formats.ensure_capacity(number_formats.size());
  711. for (auto const& number_format : number_formats)
  712. formats.append(number_format.to_unicode_number_format());
  713. }
  714. return formats;
  715. }
  716. }
  717. )~~~");
  718. VERIFY(file.write(generator.as_string_view()));
  719. }
  720. ErrorOr<int> serenity_main(Main::Arguments arguments)
  721. {
  722. StringView generated_header_path = nullptr;
  723. StringView generated_implementation_path = nullptr;
  724. StringView numbers_path = nullptr;
  725. StringView units_path = nullptr;
  726. Core::ArgsParser args_parser;
  727. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  728. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  729. args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
  730. args_parser.add_option(units_path, "Path to cldr-units directory", "units-path", 'u', "units-path");
  731. args_parser.parse(arguments);
  732. auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
  733. if (path.is_empty()) {
  734. args_parser.print_usage(stderr, arguments.argv[0]);
  735. return Error::from_string_literal("Must provide all command line options"sv);
  736. }
  737. return Core::File::open(path, Core::OpenMode::ReadWrite);
  738. };
  739. auto generated_header_file = TRY(open_file(generated_header_path));
  740. auto generated_implementation_file = TRY(open_file(generated_implementation_path));
  741. UnicodeLocaleData locale_data;
  742. TRY(parse_all_locales(numbers_path, units_path, locale_data));
  743. generate_unicode_locale_header(generated_header_file, locale_data);
  744. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  745. return 0;
  746. }