GenerateUnicodeRelativeTimeFormat.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "GeneratorUtil.h"
  7. #include <AK/Format.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/JsonObject.h>
  10. #include <AK/JsonParser.h>
  11. #include <AK/JsonValue.h>
  12. #include <AK/LexicalPath.h>
  13. #include <AK/SourceGenerator.h>
  14. #include <AK/String.h>
  15. #include <AK/StringBuilder.h>
  16. #include <LibCore/ArgsParser.h>
  17. #include <LibCore/DirIterator.h>
  18. #include <LibCore/File.h>
  19. #include <LibUnicode/Locale.h>
  20. #include <LibUnicode/RelativeTimeFormat.h>
  21. using StringIndexType = u16;
  22. constexpr auto s_string_index_type = "u16"sv;
  23. using RelativeTimeFormatIndexType = u16;
  24. constexpr auto s_relative_time_format_index_type = "u16"sv;
  25. struct RelativeTimeFormat {
  26. unsigned hash() const
  27. {
  28. auto hash = time_unit.hash();
  29. hash = pair_int_hash(hash, style.hash());
  30. hash = pair_int_hash(hash, plurality.hash());
  31. hash = pair_int_hash(hash, tense_or_number);
  32. hash = pair_int_hash(hash, pattern);
  33. return hash;
  34. }
  35. bool operator==(RelativeTimeFormat const& other) const
  36. {
  37. return (time_unit == other.time_unit)
  38. && (plurality == other.plurality)
  39. && (style == other.style)
  40. && (tense_or_number == other.tense_or_number)
  41. && (pattern == other.pattern);
  42. }
  43. String time_unit;
  44. String style;
  45. String plurality;
  46. StringIndexType tense_or_number { 0 };
  47. StringIndexType pattern { 0 };
  48. };
  49. template<>
  50. struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
  51. ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
  52. {
  53. return Formatter<FormatString>::format(builder,
  54. "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}",
  55. format.time_unit,
  56. format.style,
  57. format.plurality,
  58. format.tense_or_number,
  59. format.pattern);
  60. }
  61. };
  62. template<>
  63. struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
  64. static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
  65. };
  66. struct Locale {
  67. Vector<RelativeTimeFormatIndexType> time_units;
  68. };
  69. struct UnicodeLocaleData {
  70. UniqueStringStorage<StringIndexType> unique_strings;
  71. UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
  72. HashMap<String, Locale> locales;
  73. };
  74. static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
  75. {
  76. LexicalPath date_fields_path(move(locale_dates_path));
  77. date_fields_path = date_fields_path.append("dateFields.json"sv);
  78. auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly));
  79. auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all()));
  80. auto const& main_object = date_fields.as_object().get("main"sv);
  81. auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
  82. auto const& dates_object = locale_object.as_object().get("dates"sv);
  83. auto const& fields_object = dates_object.as_object().get("fields"sv);
  84. auto is_sanctioned_unit = [](auto unit) {
  85. // This is a copy of the time units sanctioned for use within ECMA-402.
  86. // https://tc39.es/ecma402/#sec-singularrelativetimeunit
  87. return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
  88. };
  89. auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
  90. RelativeTimeFormat format {};
  91. format.time_unit = unit.to_titlecase_string();
  92. format.style = style.to_titlecase_string();
  93. format.plurality = plurality.to_titlecase_string();
  94. format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
  95. format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
  96. locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
  97. };
  98. fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
  99. auto segments = unit_and_style.split_view('-');
  100. auto unit = segments[0];
  101. auto style = (segments.size() > 1) ? segments[1] : "long"sv;
  102. if (!is_sanctioned_unit(unit))
  103. return;
  104. patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
  105. constexpr auto number_key = "relative-type-"sv;
  106. constexpr auto tense_key = "relativeTime-type-"sv;
  107. constexpr auto plurality_key = "relativeTimePattern-count-"sv;
  108. if (type.starts_with(number_key)) {
  109. auto number = type.substring_view(number_key.length());
  110. parse_pattern(unit, style, "Other"sv, number, pattern_value);
  111. } else if (type.starts_with(tense_key)) {
  112. pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
  113. VERIFY(key.starts_with(plurality_key));
  114. auto plurality = key.substring_view(plurality_key.length());
  115. auto tense = type.substring_view(tense_key.length());
  116. parse_pattern(unit, style, plurality, tense, pattern);
  117. });
  118. }
  119. });
  120. });
  121. return {};
  122. }
  123. static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
  124. {
  125. auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
  126. auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
  127. auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
  128. StringBuilder builder;
  129. builder.append(locale_data.unique_strings.get(parsed_locale.language));
  130. if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
  131. builder.appendff("-{}", script);
  132. if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
  133. builder.appendff("-{}", region);
  134. return builder.build();
  135. };
  136. while (dates_iterator.has_next()) {
  137. auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
  138. auto language = TRY(remove_variants_from_path(dates_path));
  139. auto& locale = locale_data.locales.ensure(language);
  140. TRY(parse_date_fields(move(dates_path), locale_data, locale));
  141. }
  142. return {};
  143. }
  144. static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData&)
  145. {
  146. StringBuilder builder;
  147. SourceGenerator generator { builder };
  148. generator.append(R"~~~(
  149. #pragma once
  150. #include <LibUnicode/Forward.h>
  151. namespace Unicode {
  152. )~~~");
  153. generator.append(R"~~~(
  154. }
  155. )~~~");
  156. VERIFY(file.write(generator.as_string_view()));
  157. }
  158. static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
  159. {
  160. StringBuilder builder;
  161. SourceGenerator generator { builder };
  162. generator.set("string_index_type"sv, s_string_index_type);
  163. generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
  164. generator.append(R"~~~(
  165. #include <AK/Array.h>
  166. #include <AK/StringView.h>
  167. #include <AK/Vector.h>
  168. #include <LibUnicode/Locale.h>
  169. #include <LibUnicode/RelativeTimeFormat.h>
  170. #include <LibUnicode/UnicodeRelativeTimeFormat.h>
  171. namespace Unicode {
  172. )~~~");
  173. locale_data.unique_strings.generate(generator);
  174. generator.append(R"~~~(
  175. struct RelativeTimeFormatImpl {
  176. RelativeTimeFormat to_relative_time_format() const
  177. {
  178. RelativeTimeFormat relative_time_format {};
  179. relative_time_format.plurality = plurality;
  180. relative_time_format.pattern = s_string_list[pattern];
  181. return relative_time_format;
  182. }
  183. TimeUnit time_unit;
  184. Style style;
  185. RelativeTimeFormat::Plurality plurality;
  186. @string_index_type@ tense_or_number { 0 };
  187. @string_index_type@ pattern { 0 };
  188. };
  189. )~~~");
  190. locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
  191. auto append_list = [&](String name, auto const& list) {
  192. generator.set("name", name);
  193. generator.set("size", String::number(list.size()));
  194. generator.append(R"~~~(
  195. static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
  196. bool first = true;
  197. for (auto index : list) {
  198. generator.append(first ? " " : ", ");
  199. generator.append(String::number(index));
  200. first = false;
  201. }
  202. generator.append(" } };");
  203. };
  204. generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
  205. generator.append(R"~~~(
  206. Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
  207. {
  208. Vector<RelativeTimeFormat> formats;
  209. auto locale_value = locale_from_string(locale);
  210. if (!locale_value.has_value())
  211. return formats;
  212. auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
  213. auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
  214. for (auto const& locale_format_index : locale_formats) {
  215. auto const& locale_format = s_relative_time_formats.at(locale_format_index);
  216. if (locale_format.time_unit != time_unit)
  217. continue;
  218. if (locale_format.style != style)
  219. continue;
  220. if (s_string_list[locale_format.tense_or_number] != tense_or_number)
  221. continue;
  222. formats.append(locale_format.to_relative_time_format());
  223. }
  224. return formats;
  225. }
  226. }
  227. )~~~");
  228. VERIFY(file.write(generator.as_string_view()));
  229. }
  230. ErrorOr<int> serenity_main(Main::Arguments arguments)
  231. {
  232. StringView generated_header_path;
  233. StringView generated_implementation_path;
  234. StringView dates_path;
  235. Core::ArgsParser args_parser;
  236. args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
  237. args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  238. args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
  239. args_parser.parse(arguments);
  240. auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
  241. if (path.is_empty()) {
  242. args_parser.print_usage(stderr, arguments.argv[0]);
  243. return Error::from_string_literal("Must provide all command line options"sv);
  244. }
  245. return Core::File::open(path, Core::OpenMode::ReadWrite);
  246. };
  247. auto generated_header_file = TRY(open_file(generated_header_path));
  248. auto generated_implementation_file = TRY(open_file(generated_implementation_path));
  249. UnicodeLocaleData locale_data;
  250. TRY(parse_all_locales(dates_path, locale_data));
  251. generate_unicode_locale_header(generated_header_file, locale_data);
  252. generate_unicode_locale_implementation(generated_implementation_file, locale_data);
  253. return 0;
  254. }