GenerateUnicodeRelativeTimeFormat.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/Format.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/JsonObject.h>
  10. #include <AK/JsonParser.h>
  11. #include <AK/JsonValue.h>
  12. #include <AK/LexicalPath.h>
  13. #include <AK/SourceGenerator.h>
  14. #include <AK/String.h>
  15. #include <AK/StringBuilder.h>
  16. #include <LibCore/ArgsParser.h>
  17. #include <LibCore/DirIterator.h>
  18. #include <LibCore/Stream.h>
  19. #include <LibUnicode/Locale.h>
  20. #include <LibUnicode/RelativeTimeFormat.h>
  21. using StringIndexType = u16;
  22. constexpr auto s_string_index_type = "u16"sv;
  23. using RelativeTimeFormatIndexType = u16;
  24. constexpr auto s_relative_time_format_index_type = "u16"sv;
  25. struct RelativeTimeFormat {
  26. unsigned hash() const
  27. {
  28. auto hash = time_unit.hash();
  29. hash = pair_int_hash(hash, style.hash());
  30. hash = pair_int_hash(hash, plurality.hash());
  31. hash = pair_int_hash(hash, tense_or_number);
  32. hash = pair_int_hash(hash, pattern);
  33. return hash;
  34. }
  35. bool operator==(RelativeTimeFormat const& other) const
  36. {
  37. return (time_unit == other.time_unit)
  38. && (plurality == other.plurality)
  39. && (style == other.style)
  40. && (tense_or_number == other.tense_or_number)
  41. && (pattern == other.pattern);
  42. }
  43. String time_unit;
  44. String style;
  45. String plurality;
  46. StringIndexType tense_or_number { 0 };
  47. StringIndexType pattern { 0 };
  48. };
  49. template<>
  50. struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
  51. ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
  52. {
  53. return Formatter<FormatString>::format(builder,
  54. "{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv,
  55. format.time_unit,
  56. format.style,
  57. format.plurality,
  58. format.tense_or_number,
  59. format.pattern);
  60. }
  61. };
  62. template<>
  63. struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
  64. static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
  65. };
  66. struct Locale {
  67. Vector<RelativeTimeFormatIndexType> time_units;
  68. };
  69. struct UnicodeLocaleData {
  70. UniqueStringStorage<StringIndexType> unique_strings;
  71. UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
  72. HashMap<String, Locale> locales;
  73. };
  74. static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
  75. {
  76. LexicalPath date_fields_path(move(locale_dates_path));
  77. date_fields_path = date_fields_path.append("dateFields.json"sv);
  78. auto date_fields = TRY(read_json_file(date_fields_path.string()));
  79. auto const& main_object = date_fields.as_object().get("main"sv);
  80. auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
  81. auto const& dates_object = locale_object.as_object().get("dates"sv);
  82. auto const& fields_object = dates_object.as_object().get("fields"sv);
  83. auto is_sanctioned_unit = [](auto unit) {
  84. // This is a copy of the time units sanctioned for use within ECMA-402.
  85. // https://tc39.es/ecma402/#sec-singularrelativetimeunit
  86. return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
  87. };
  88. auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
  89. RelativeTimeFormat format {};
  90. format.time_unit = unit.to_titlecase_string();
  91. format.style = style.to_titlecase_string();
  92. format.plurality = plurality.to_titlecase_string();
  93. format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
  94. format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
  95. locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
  96. };
  97. fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
  98. auto segments = unit_and_style.split_view('-');
  99. auto unit = segments[0];
  100. auto style = (segments.size() > 1) ? segments[1] : "long"sv;
  101. if (!is_sanctioned_unit(unit))
  102. return;
  103. patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
  104. constexpr auto number_key = "relative-type-"sv;
  105. constexpr auto tense_key = "relativeTime-type-"sv;
  106. constexpr auto plurality_key = "relativeTimePattern-count-"sv;
  107. if (type.starts_with(number_key)) {
  108. auto number = type.substring_view(number_key.length());
  109. parse_pattern(unit, style, "Other"sv, number, pattern_value);
  110. } else if (type.starts_with(tense_key)) {
  111. pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
  112. VERIFY(key.starts_with(plurality_key));
  113. auto plurality = key.substring_view(plurality_key.length());
  114. auto tense = type.substring_view(tense_key.length());
  115. parse_pattern(unit, style, plurality, tense, pattern);
  116. });
  117. }
  118. });
  119. });
  120. return {};
  121. }
  122. static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
  123. {
  124. auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
  125. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  126. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
  127. StringBuilder builder;
  128. builder.append(locale_data.unique_strings.get(parsed_locale.language));
  129. if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
  130. builder.appendff("-{}", script);
  131. if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
  132. builder.appendff("-{}", region);
  133. return builder.build();
  134. };
  135. while (dates_iterator.has_next()) {
  136. auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
  137. auto language = TRY(remove_variants_from_path(dates_path));
  138. auto& locale = locale_data.locales.ensure(language);
  139. TRY(parse_date_fields(move(dates_path), locale_data, locale));
  140. }
  141. return {};
  142. }
  143. static ErrorOr<void> generate_unicode_locale_header(Core::Stream::BufferedFile& file, UnicodeLocaleData&)
  144. {
  145. StringBuilder builder;
  146. SourceGenerator generator { builder };
  147. generator.append(R"~~~(
  148. #pragma once
  149. #include <LibUnicode/Forward.h>
  150. namespace Unicode {
  151. )~~~");
  152. generator.append(R"~~~(
  153. }
  154. )~~~");
  155. TRY(file.write(generator.as_string_view().bytes()));
  156. return {};
  157. }
  158. static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::BufferedFile& file, UnicodeLocaleData& locale_data)
  159. {
  160. StringBuilder builder;
  161. SourceGenerator generator { builder };
  162. generator.set("string_index_type"sv, s_string_index_type);
  163. generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
  164. generator.append(R"~~~(
  165. #include <AK/Array.h>
  166. #include <AK/StringView.h>
  167. #include <AK/Vector.h>
  168. #include <LibUnicode/Locale.h>
  169. #include <LibUnicode/PluralRules.h>
  170. #include <LibUnicode/RelativeTimeFormat.h>
  171. #include <LibUnicode/UnicodeRelativeTimeFormat.h>
  172. namespace Unicode {
  173. )~~~");
  174. locale_data.unique_strings.generate(generator);
  175. generator.append(R"~~~(
  176. struct RelativeTimeFormatImpl {
  177. RelativeTimeFormat to_relative_time_format() const
  178. {
  179. RelativeTimeFormat relative_time_format {};
  180. relative_time_format.plurality = plurality;
  181. relative_time_format.pattern = decode_string(pattern);
  182. return relative_time_format;
  183. }
  184. TimeUnit time_unit;
  185. Style style;
  186. PluralCategory plurality;
  187. @string_index_type@ tense_or_number { 0 };
  188. @string_index_type@ pattern { 0 };
  189. };
  190. )~~~");
  191. locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
  192. auto append_list = [&](String name, auto const& list) {
  193. generator.set("name", name);
  194. generator.set("size", String::number(list.size()));
  195. generator.append(R"~~~(
  196. static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
  197. bool first = true;
  198. for (auto index : list) {
  199. generator.append(first ? " "sv : ", "sv);
  200. generator.append(String::number(index));
  201. first = false;
  202. }
  203. generator.append(" } };");
  204. };
  205. generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
  206. generator.append(R"~~~(
  207. Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
  208. {
  209. Vector<RelativeTimeFormat> formats;
  210. auto locale_value = locale_from_string(locale);
  211. if (!locale_value.has_value())
  212. return formats;
  213. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  214. auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
  215. for (auto const& locale_format_index : locale_formats) {
  216. auto const& locale_format = s_relative_time_formats.at(locale_format_index);
  217. if (locale_format.time_unit != time_unit)
  218. continue;
  219. if (locale_format.style != style)
  220. continue;
  221. if (decode_string(locale_format.tense_or_number) != tense_or_number)
  222. continue;
  223. formats.append(locale_format.to_relative_time_format());
  224. }
  225. return formats;
  226. }
  227. }
  228. )~~~");
  229. TRY(file.write(generator.as_string_view().bytes()));
  230. return {};
  231. }
  232. ErrorOr<int> serenity_main(Main::Arguments arguments)
  233. {
  234. StringView generated_header_path;
  235. StringView generated_implementation_path;
  236. StringView dates_path;
  237. Core::ArgsParser args_parser;
  238. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  239. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  240. args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
  241. args_parser.parse(arguments);
  242. auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write));
  243. auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write));
  244. UnicodeLocaleData locale_data;
  245. TRY(parse_all_locales(dates_path, locale_data));
  246. TRY(generate_unicode_locale_header(*generated_header_file, locale_data));
  247. TRY(generate_unicode_locale_implementation(*generated_implementation_file, locale_data));
  248. return 0;
  249. }