GenerateRelativeTimeFormatData.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
  7. #include <AK/Format.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/JsonObject.h>
  10. #include <AK/JsonParser.h>
  11. #include <AK/JsonValue.h>
  12. #include <AK/LexicalPath.h>
  13. #include <AK/SourceGenerator.h>
  14. #include <AK/String.h>
  15. #include <AK/StringBuilder.h>
  16. #include <LibCore/ArgsParser.h>
  17. #include <LibCore/DirIterator.h>
  18. #include <LibCore/Stream.h>
  19. #include <LibLocale/Locale.h>
  20. #include <LibLocale/RelativeTimeFormat.h>
  21. struct RelativeTimeFormat {
  22. unsigned hash() const
  23. {
  24. auto hash = time_unit.hash();
  25. hash = pair_int_hash(hash, style.hash());
  26. hash = pair_int_hash(hash, plurality.hash());
  27. hash = pair_int_hash(hash, tense_or_number);
  28. hash = pair_int_hash(hash, pattern);
  29. return hash;
  30. }
  31. bool operator==(RelativeTimeFormat const& other) const
  32. {
  33. return (time_unit == other.time_unit)
  34. && (plurality == other.plurality)
  35. && (style == other.style)
  36. && (tense_or_number == other.tense_or_number)
  37. && (pattern == other.pattern);
  38. }
  39. String time_unit;
  40. String style;
  41. String plurality;
  42. size_t tense_or_number { 0 };
  43. size_t pattern { 0 };
  44. };
  45. template<>
  46. struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
  47. ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
  48. {
  49. return Formatter<FormatString>::format(builder,
  50. "{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv,
  51. format.time_unit,
  52. format.style,
  53. format.plurality,
  54. format.tense_or_number,
  55. format.pattern);
  56. }
  57. };
  58. template<>
  59. struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
  60. static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
  61. };
  62. struct LocaleData {
  63. Vector<size_t> time_units;
  64. };
  65. struct CLDR {
  66. UniqueStringStorage unique_strings;
  67. UniqueStorage<RelativeTimeFormat> unique_formats;
  68. HashMap<String, LocaleData> locales;
  69. };
  70. static ErrorOr<void> parse_date_fields(String locale_dates_path, CLDR& cldr, LocaleData& locale)
  71. {
  72. LexicalPath date_fields_path(move(locale_dates_path));
  73. date_fields_path = date_fields_path.append("dateFields.json"sv);
  74. auto date_fields = TRY(read_json_file(date_fields_path.string()));
  75. auto const& main_object = date_fields.as_object().get("main"sv);
  76. auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
  77. auto const& dates_object = locale_object.as_object().get("dates"sv);
  78. auto const& fields_object = dates_object.as_object().get("fields"sv);
  79. auto is_sanctioned_unit = [](auto unit) {
  80. // This is a copy of the time units sanctioned for use within ECMA-402.
  81. // https://tc39.es/ecma402/#sec-singularrelativetimeunit
  82. return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
  83. };
  84. auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
  85. RelativeTimeFormat format {};
  86. format.time_unit = unit.to_titlecase_string();
  87. format.style = style.to_titlecase_string();
  88. format.plurality = plurality.to_titlecase_string();
  89. format.tense_or_number = cldr.unique_strings.ensure(tense_or_number);
  90. format.pattern = cldr.unique_strings.ensure(pattern.as_string());
  91. locale.time_units.append(cldr.unique_formats.ensure(move(format)));
  92. };
  93. fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
  94. auto segments = unit_and_style.split_view('-');
  95. auto unit = segments[0];
  96. auto style = (segments.size() > 1) ? segments[1] : "long"sv;
  97. if (!is_sanctioned_unit(unit))
  98. return;
  99. patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
  100. constexpr auto number_key = "relative-type-"sv;
  101. constexpr auto tense_key = "relativeTime-type-"sv;
  102. constexpr auto plurality_key = "relativeTimePattern-count-"sv;
  103. if (type.starts_with(number_key)) {
  104. auto number = type.substring_view(number_key.length());
  105. parse_pattern(unit, style, "Other"sv, number, pattern_value);
  106. } else if (type.starts_with(tense_key)) {
  107. pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
  108. VERIFY(key.starts_with(plurality_key));
  109. auto plurality = key.substring_view(plurality_key.length());
  110. auto tense = type.substring_view(tense_key.length());
  111. parse_pattern(unit, style, plurality, tense, pattern);
  112. });
  113. }
  114. });
  115. });
  116. return {};
  117. }
  118. static ErrorOr<void> parse_all_locales(String dates_path, CLDR& cldr)
  119. {
  120. auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
  121. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  122. auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path)));
  123. StringBuilder builder;
  124. builder.append(cldr.unique_strings.get(parsed_locale.language));
  125. if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty())
  126. builder.appendff("-{}", script);
  127. if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty())
  128. builder.appendff("-{}", region);
  129. return builder.build();
  130. };
  131. while (dates_iterator.has_next()) {
  132. auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
  133. auto language = TRY(remove_variants_from_path(dates_path));
  134. auto& locale = cldr.locales.ensure(language);
  135. TRY(parse_date_fields(move(dates_path), cldr, locale));
  136. }
  137. return {};
  138. }
  139. static ErrorOr<void> generate_unicode_locale_header(Core::Stream::BufferedFile& file, CLDR&)
  140. {
  141. StringBuilder builder;
  142. SourceGenerator generator { builder };
  143. generator.append(R"~~~(
  144. #pragma once
  145. #include <LibLocale/Forward.h>
  146. namespace Locale {
  147. )~~~");
  148. generator.append(R"~~~(
  149. }
  150. )~~~");
  151. TRY(file.write(generator.as_string_view().bytes()));
  152. return {};
  153. }
  154. static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, CLDR& cldr)
  155. {
  156. StringBuilder builder;
  157. SourceGenerator generator { builder };
  158. generator.set("string_index_type"sv, cldr.unique_strings.type_that_fits());
  159. generator.set("relative_time_format_index_type"sv, cldr.unique_formats.type_that_fits());
  160. generator.append(R"~~~(
  161. #include <AK/Array.h>
  162. #include <AK/StringView.h>
  163. #include <AK/Vector.h>
  164. #include <LibLocale/Locale.h>
  165. #include <LibLocale/PluralRules.h>
  166. #include <LibLocale/RelativeTimeFormat.h>
  167. #include <LibLocale/RelativeTimeFormatData.h>
  168. namespace Locale {
  169. )~~~");
  170. cldr.unique_strings.generate(generator);
  171. generator.append(R"~~~(
  172. struct RelativeTimeFormatImpl {
  173. RelativeTimeFormat to_relative_time_format() const
  174. {
  175. RelativeTimeFormat relative_time_format {};
  176. relative_time_format.plurality = plurality;
  177. relative_time_format.pattern = decode_string(pattern);
  178. return relative_time_format;
  179. }
  180. TimeUnit time_unit;
  181. Style style;
  182. PluralCategory plurality;
  183. @string_index_type@ tense_or_number { 0 };
  184. @string_index_type@ pattern { 0 };
  185. };
  186. )~~~");
  187. cldr.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
  188. auto append_list = [&](String name, auto const& list) {
  189. generator.set("name", name);
  190. generator.set("size", String::number(list.size()));
  191. generator.append(R"~~~(
  192. static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
  193. bool first = true;
  194. for (auto index : list) {
  195. generator.append(first ? " "sv : ", "sv);
  196. generator.append(String::number(index));
  197. first = false;
  198. }
  199. generator.append(" } };");
  200. };
  201. generate_mapping(generator, cldr.locales, cldr.unique_formats.type_that_fits(), "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
  202. generator.append(R"~~~(
  203. Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
  204. {
  205. Vector<RelativeTimeFormat> formats;
  206. auto locale_value = locale_from_string(locale);
  207. if (!locale_value.has_value())
  208. return formats;
  209. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  210. auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
  211. for (auto const& locale_format_index : locale_formats) {
  212. auto const& locale_format = s_relative_time_formats.at(locale_format_index);
  213. if (locale_format.time_unit != time_unit)
  214. continue;
  215. if (locale_format.style != style)
  216. continue;
  217. if (decode_string(locale_format.tense_or_number) != tense_or_number)
  218. continue;
  219. formats.append(locale_format.to_relative_time_format());
  220. }
  221. return formats;
  222. }
  223. }
  224. )~~~");
  225. TRY(file.write(generator.as_string_view().bytes()));
  226. return {};
  227. }
  228. ErrorOr<int> serenity_main(Main::Arguments arguments)
  229. {
  230. StringView generated_header_path;
  231. StringView generated_implementation_path;
  232. StringView dates_path;
  233. Core::ArgsParser args_parser;
  234. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  235. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  236. args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
  237. args_parser.parse(arguments);
  238. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
  239. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
  240. CLDR cldr;
  241. TRY(parse_all_locales(dates_path, cldr));
  242. TRY(generate_unicode_locale_header(*generated_header_file, cldr));
  243. TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr));
  244. return 0;
  245. }