GenerateRelativeTimeFormatData.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /*
  2. * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
  7. #include <AK/DeprecatedString.h>
  8. #include <AK/Format.h>
  9. #include <AK/HashMap.h>
  10. #include <AK/JsonObject.h>
  11. #include <AK/JsonParser.h>
  12. #include <AK/JsonValue.h>
  13. #include <AK/LexicalPath.h>
  14. #include <AK/SourceGenerator.h>
  15. #include <AK/StringBuilder.h>
  16. #include <LibCore/ArgsParser.h>
  17. #include <LibCore/DirIterator.h>
  18. #include <LibLocale/Locale.h>
  19. #include <LibLocale/RelativeTimeFormat.h>
  20. struct RelativeTimeFormat {
  21. unsigned hash() const
  22. {
  23. auto hash = time_unit.hash();
  24. hash = pair_int_hash(hash, style.hash());
  25. hash = pair_int_hash(hash, plurality.hash());
  26. hash = pair_int_hash(hash, tense_or_number);
  27. hash = pair_int_hash(hash, pattern);
  28. return hash;
  29. }
  30. bool operator==(RelativeTimeFormat const& other) const
  31. {
  32. return (time_unit == other.time_unit)
  33. && (plurality == other.plurality)
  34. && (style == other.style)
  35. && (tense_or_number == other.tense_or_number)
  36. && (pattern == other.pattern);
  37. }
  38. DeprecatedString time_unit;
  39. DeprecatedString style;
  40. DeprecatedString plurality;
  41. size_t tense_or_number { 0 };
  42. size_t pattern { 0 };
  43. };
  44. template<>
  45. struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
  46. ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
  47. {
  48. return Formatter<FormatString>::format(builder,
  49. "{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv,
  50. format.time_unit,
  51. format.style,
  52. format.plurality,
  53. format.tense_or_number,
  54. format.pattern);
  55. }
  56. };
  57. template<>
  58. struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
  59. static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
  60. };
  61. struct LocaleData {
  62. Vector<size_t> time_units;
  63. };
  64. struct CLDR {
  65. UniqueStringStorage unique_strings;
  66. UniqueStorage<RelativeTimeFormat> unique_formats;
  67. HashMap<DeprecatedString, LocaleData> locales;
  68. };
  69. static ErrorOr<void> parse_date_fields(DeprecatedString locale_dates_path, CLDR& cldr, LocaleData& locale)
  70. {
  71. LexicalPath date_fields_path(move(locale_dates_path));
  72. date_fields_path = date_fields_path.append("dateFields.json"sv);
  73. auto date_fields = TRY(read_json_file(date_fields_path.string()));
  74. auto const& main_object = date_fields.as_object().get_object("main"sv).value();
  75. auto const& locale_object = main_object.get_object(date_fields_path.parent().basename()).value();
  76. auto const& dates_object = locale_object.get_object("dates"sv).value();
  77. auto const& fields_object = dates_object.get_object("fields"sv).value();
  78. auto is_sanctioned_unit = [](auto unit) {
  79. // This is a copy of the time units sanctioned for use within ECMA-402.
  80. // https://tc39.es/ecma402/#sec-singularrelativetimeunit
  81. return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
  82. };
  83. auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
  84. RelativeTimeFormat format {};
  85. format.time_unit = unit.to_titlecase_string();
  86. format.style = style.to_titlecase_string();
  87. format.plurality = plurality.to_titlecase_string();
  88. format.tense_or_number = cldr.unique_strings.ensure(tense_or_number);
  89. format.pattern = cldr.unique_strings.ensure(pattern.as_string());
  90. locale.time_units.append(cldr.unique_formats.ensure(move(format)));
  91. };
  92. fields_object.for_each_member([&](auto const& unit_and_style, auto const& patterns) {
  93. auto segments = unit_and_style.split_view('-');
  94. auto unit = segments[0];
  95. auto style = (segments.size() > 1) ? segments[1] : "long"sv;
  96. if (!is_sanctioned_unit(unit))
  97. return;
  98. patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
  99. constexpr auto number_key = "relative-type-"sv;
  100. constexpr auto tense_key = "relativeTime-type-"sv;
  101. constexpr auto plurality_key = "relativeTimePattern-count-"sv;
  102. if (type.starts_with(number_key)) {
  103. auto number = type.substring_view(number_key.length());
  104. parse_pattern(unit, style, "Other"sv, number, pattern_value);
  105. } else if (type.starts_with(tense_key)) {
  106. pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
  107. VERIFY(key.starts_with(plurality_key));
  108. auto plurality = key.substring_view(plurality_key.length());
  109. auto tense = type.substring_view(tense_key.length());
  110. parse_pattern(unit, style, plurality, tense, pattern);
  111. });
  112. }
  113. });
  114. });
  115. return {};
  116. }
  117. static ErrorOr<void> parse_all_locales(DeprecatedString dates_path, CLDR& cldr)
  118. {
  119. auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
  120. auto remove_variants_from_path = [&](DeprecatedString path) -> ErrorOr<DeprecatedString> {
  121. auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path)));
  122. StringBuilder builder;
  123. builder.append(cldr.unique_strings.get(parsed_locale.language));
  124. if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty())
  125. builder.appendff("-{}", script);
  126. if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty())
  127. builder.appendff("-{}", region);
  128. return builder.to_deprecated_string();
  129. };
  130. while (dates_iterator.has_next()) {
  131. auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
  132. auto language = TRY(remove_variants_from_path(dates_path));
  133. auto& locale = cldr.locales.ensure(language);
  134. TRY(parse_date_fields(move(dates_path), cldr, locale));
  135. }
  136. return {};
  137. }
  138. static ErrorOr<void> generate_unicode_locale_header(Core::BufferedFile& file, CLDR&)
  139. {
  140. StringBuilder builder;
  141. SourceGenerator generator { builder };
  142. generator.append(R"~~~(
  143. #pragma once
  144. #include <LibLocale/Forward.h>
  145. namespace Locale {
  146. )~~~");
  147. generator.append(R"~~~(
  148. }
  149. )~~~");
  150. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  151. return {};
  152. }
  153. static ErrorOr<void> generate_unicode_locale_implementation(Core::BufferedFile& file, CLDR& cldr)
  154. {
  155. StringBuilder builder;
  156. SourceGenerator generator { builder };
  157. generator.set("string_index_type"sv, cldr.unique_strings.type_that_fits());
  158. generator.set("relative_time_format_index_type"sv, cldr.unique_formats.type_that_fits());
  159. generator.append(R"~~~(
  160. #include <AK/Array.h>
  161. #include <AK/StringView.h>
  162. #include <AK/Vector.h>
  163. #include <LibLocale/Locale.h>
  164. #include <LibLocale/PluralRules.h>
  165. #include <LibLocale/RelativeTimeFormat.h>
  166. #include <LibLocale/RelativeTimeFormatData.h>
  167. namespace Locale {
  168. )~~~");
  169. cldr.unique_strings.generate(generator);
  170. generator.append(R"~~~(
  171. struct RelativeTimeFormatImpl {
  172. RelativeTimeFormat to_relative_time_format() const
  173. {
  174. RelativeTimeFormat relative_time_format {};
  175. relative_time_format.plurality = plurality;
  176. relative_time_format.pattern = decode_string(pattern);
  177. return relative_time_format;
  178. }
  179. TimeUnit time_unit;
  180. Style style;
  181. PluralCategory plurality;
  182. @string_index_type@ tense_or_number { 0 };
  183. @string_index_type@ pattern { 0 };
  184. };
  185. )~~~");
  186. cldr.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
  187. auto append_list = [&](DeprecatedString name, auto const& list) {
  188. generator.set("name", name);
  189. generator.set("size", DeprecatedString::number(list.size()));
  190. generator.append(R"~~~(
  191. static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
  192. bool first = true;
  193. for (auto index : list) {
  194. generator.append(first ? " "sv : ", "sv);
  195. generator.append(DeprecatedString::number(index));
  196. first = false;
  197. }
  198. generator.append(" } };");
  199. };
  200. generate_mapping(generator, cldr.locales, cldr.unique_formats.type_that_fits(), "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
  201. generator.append(R"~~~(
  202. ErrorOr<Vector<RelativeTimeFormat>> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
  203. {
  204. Vector<RelativeTimeFormat> formats;
  205. auto locale_value = locale_from_string(locale);
  206. if (!locale_value.has_value())
  207. return formats;
  208. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  209. auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
  210. for (auto const& locale_format_index : locale_formats) {
  211. auto const& locale_format = s_relative_time_formats.at(locale_format_index);
  212. if (locale_format.time_unit != time_unit)
  213. continue;
  214. if (locale_format.style != style)
  215. continue;
  216. if (decode_string(locale_format.tense_or_number) != tense_or_number)
  217. continue;
  218. TRY(formats.try_append(locale_format.to_relative_time_format()));
  219. }
  220. return formats;
  221. }
  222. }
  223. )~~~");
  224. TRY(file.write_until_depleted(generator.as_string_view().bytes()));
  225. return {};
  226. }
  227. ErrorOr<int> serenity_main(Main::Arguments arguments)
  228. {
  229. StringView generated_header_path;
  230. StringView generated_implementation_path;
  231. StringView dates_path;
  232. Core::ArgsParser args_parser;
  233. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  234. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  235. args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
  236. args_parser.parse(arguments);
  237. auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
  238. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
  239. CLDR cldr;
  240. TRY(parse_all_locales(dates_path, cldr));
  241. TRY(generate_unicode_locale_header(*generated_header_file, cldr));
  242. TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr));
  243. return 0;
  244. }