GenerateTimeZoneData.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. /*
  2. * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
  7. #include <AK/Format.h>
  8. #include <AK/HashMap.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <AK/String.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/Vector.h>
  13. #include <LibCore/ArgsParser.h>
  14. #include <LibCore/File.h>
  15. namespace {
  16. struct DateTime {
  17. u16 year { 0 };
  18. Optional<u8> month;
  19. Optional<u8> day;
  20. Optional<u8> last_weekday;
  21. Optional<u8> after_weekday;
  22. Optional<u8> before_weekday;
  23. Optional<u8> hour;
  24. Optional<u8> minute;
  25. Optional<u8> second;
  26. };
  27. struct TimeZoneOffset {
  28. i64 offset { 0 };
  29. Optional<DateTime> until;
  30. };
  31. struct DaylightSavingsOffset {
  32. i64 offset { 0 };
  33. u16 year_from { 0 };
  34. u16 year_to { 0 };
  35. DateTime in_effect;
  36. };
  37. struct TimeZoneData {
  38. HashMap<String, Vector<TimeZoneOffset>> time_zones;
  39. Vector<String> time_zone_names;
  40. Vector<Alias> time_zone_aliases;
  41. HashMap<String, Vector<DaylightSavingsOffset>> dst_offsets;
  42. Vector<String> dst_offset_names;
  43. };
  44. }
  45. template<>
  46. struct AK::Formatter<DateTime> : Formatter<FormatString> {
  47. ErrorOr<void> format(FormatBuilder& builder, DateTime const& date_time)
  48. {
  49. return Formatter<FormatString>::format(builder,
  50. "{{ {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
  51. date_time.year,
  52. date_time.month.value_or(1),
  53. date_time.day.value_or(1),
  54. date_time.last_weekday.value_or(0),
  55. date_time.after_weekday.value_or(0),
  56. date_time.before_weekday.value_or(0),
  57. date_time.hour.value_or(0),
  58. date_time.minute.value_or(0),
  59. date_time.second.value_or(0));
  60. }
  61. };
  62. template<>
  63. struct AK::Formatter<TimeZoneOffset> : Formatter<FormatString> {
  64. ErrorOr<void> format(FormatBuilder& builder, TimeZoneOffset const& time_zone_offset)
  65. {
  66. return Formatter<FormatString>::format(builder,
  67. "{{ {}, {}, {} }}",
  68. time_zone_offset.offset,
  69. time_zone_offset.until.value_or({}),
  70. time_zone_offset.until.has_value());
  71. }
  72. };
  73. template<>
  74. struct AK::Formatter<DaylightSavingsOffset> : Formatter<FormatString> {
  75. ErrorOr<void> format(FormatBuilder& builder, DaylightSavingsOffset const& dst_offset)
  76. {
  77. return Formatter<FormatString>::format(builder,
  78. "{{ {}, {}, {}, {} }}",
  79. dst_offset.offset,
  80. dst_offset.year_from,
  81. dst_offset.year_to,
  82. dst_offset.in_effect);
  83. }
  84. };
  85. static Optional<DateTime> parse_date_time(Span<StringView const> segments)
  86. {
  87. constexpr auto months = Array { "Jan"sv, "Feb"sv, "Mar"sv, "Apr"sv, "May"sv, "Jun"sv, "Jul"sv, "Aug"sv, "Sep"sv, "Oct"sv, "Nov"sv, "Dec"sv };
  88. constexpr auto weekdays = Array { "Sun"sv, "Mon"sv, "Tue"sv, "Wed"sv, "Thu"sv, "Fri"sv, "Sat"sv };
  89. auto comment_index = find_index(segments.begin(), segments.end(), "#"sv);
  90. if (comment_index != segments.size())
  91. segments = segments.slice(0, comment_index);
  92. if (segments.is_empty())
  93. return {};
  94. DateTime date_time {};
  95. date_time.year = segments[0].to_uint().value();
  96. if (segments.size() > 1)
  97. date_time.month = find_index(months.begin(), months.end(), segments[1]) + 1;
  98. if (segments.size() > 2) {
  99. if (segments[2].starts_with("last"sv)) {
  100. auto weekday = segments[2].substring_view("last"sv.length());
  101. date_time.last_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
  102. } else if (auto index = segments[2].find(">="sv); index.has_value()) {
  103. auto weekday = segments[2].substring_view(0, *index);
  104. date_time.after_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
  105. auto day = segments[2].substring_view(*index + ">="sv.length());
  106. date_time.day = day.to_uint().value();
  107. } else if (auto index = segments[2].find("<="sv); index.has_value()) {
  108. auto weekday = segments[2].substring_view(0, *index);
  109. date_time.before_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
  110. auto day = segments[2].substring_view(*index + "<="sv.length());
  111. date_time.day = day.to_uint().value();
  112. } else {
  113. date_time.day = segments[2].to_uint().value();
  114. }
  115. }
  116. if (segments.size() > 3) {
  117. // FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
  118. auto time_segments = segments[3].split_view(':');
  119. date_time.hour = time_segments[0].to_int().value();
  120. date_time.minute = time_segments.size() > 1 ? time_segments[1].substring_view(0, 2).to_uint().value() : 0;
  121. date_time.second = time_segments.size() > 2 ? time_segments[2].substring_view(0, 2).to_uint().value() : 0;
  122. }
  123. return date_time;
  124. }
  125. static i64 parse_time_offset(StringView segment)
  126. {
  127. auto segments = segment.split_view(':');
  128. i64 hours = segments[0].to_int().value();
  129. i64 minutes = segments.size() > 1 ? segments[1].to_uint().value() : 0;
  130. i64 seconds = segments.size() > 2 ? segments[2].to_uint().value() : 0;
  131. i64 sign = ((hours < 0) || (segments[0] == "-0"sv)) ? -1 : 1;
  132. return (hours * 3600) + sign * ((minutes * 60) + seconds);
  133. }
  134. static Vector<TimeZoneOffset>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
  135. {
  136. auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
  137. // "Zone" NAME STDOFF RULES FORMAT [UNTIL]
  138. VERIFY(segments[0] == "Zone"sv);
  139. auto name = segments[1];
  140. TimeZoneOffset time_zone {};
  141. time_zone.offset = parse_time_offset(segments[2]);
  142. if (segments.size() > 5)
  143. time_zone.until = parse_date_time(segments.span().slice(5));
  144. auto& time_zones = time_zone_data.time_zones.ensure(name);
  145. time_zones.append(move(time_zone));
  146. if (!time_zone_data.time_zone_names.contains_slow(name))
  147. time_zone_data.time_zone_names.append(name);
  148. return time_zones;
  149. }
  150. static void parse_zone_continuation(StringView zone_line, Vector<TimeZoneOffset>& time_zones)
  151. {
  152. auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
  153. // STDOFF RULES FORMAT [UNTIL]
  154. TimeZoneOffset time_zone {};
  155. time_zone.offset = parse_time_offset(segments[0]);
  156. if (segments.size() > 3)
  157. time_zone.until = parse_date_time(segments.span().slice(3));
  158. time_zones.append(move(time_zone));
  159. }
  160. static void parse_link(StringView link_line, TimeZoneData& time_zone_data)
  161. {
  162. auto segments = link_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
  163. // Link TARGET LINK-NAME
  164. VERIFY(segments[0] == "Link"sv);
  165. auto target = segments[1];
  166. auto alias = segments[2];
  167. time_zone_data.time_zone_aliases.append({ target, alias });
  168. }
  169. static void parse_rule(StringView rule_line, TimeZoneData& time_zone_data)
  170. {
  171. auto segments = rule_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
  172. // Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
  173. VERIFY(segments[0] == "Rule"sv);
  174. auto name = segments[1];
  175. DaylightSavingsOffset dst_offset {};
  176. dst_offset.offset = parse_time_offset(segments[8]);
  177. dst_offset.year_from = segments[2].to_uint().value();
  178. if (segments[3] == "only")
  179. dst_offset.year_to = dst_offset.year_from;
  180. else if (segments[3] == "max"sv)
  181. dst_offset.year_to = NumericLimits<u16>::max();
  182. else
  183. dst_offset.year_to = segments[3].to_uint().value();
  184. auto in_effect = Array { "0"sv, segments[5], segments[6], segments[7] };
  185. dst_offset.in_effect = parse_date_time(in_effect).release_value();
  186. auto& dst_offsets = time_zone_data.dst_offsets.ensure(name);
  187. dst_offsets.append(move(dst_offset));
  188. if (!time_zone_data.dst_offset_names.contains_slow(name))
  189. time_zone_data.dst_offset_names.append(name);
  190. }
  191. static ErrorOr<void> parse_time_zones(StringView time_zone_path, TimeZoneData& time_zone_data)
  192. {
  193. // For reference, the man page for `zic` has the best documentation of the TZDB file format.
  194. auto file = TRY(Core::File::open(time_zone_path, Core::OpenMode::ReadOnly));
  195. Vector<TimeZoneOffset>* last_parsed_zone = nullptr;
  196. while (file->can_read_line()) {
  197. auto line = file->read_line();
  198. if (line.is_empty() || line.trim_whitespace(TrimMode::Left).starts_with('#'))
  199. continue;
  200. if (line.starts_with("Zone"sv)) {
  201. last_parsed_zone = &parse_zone(line, time_zone_data);
  202. } else if (line.starts_with('\t')) {
  203. VERIFY(last_parsed_zone != nullptr);
  204. parse_zone_continuation(line, *last_parsed_zone);
  205. } else {
  206. last_parsed_zone = nullptr;
  207. if (line.starts_with("Link"sv))
  208. parse_link(line, time_zone_data);
  209. else if (line.starts_with("Rule"sv))
  210. parse_rule(line, time_zone_data);
  211. }
  212. }
  213. return {};
  214. }
  215. static String format_identifier(StringView owner, String identifier)
  216. {
  217. constexpr auto gmt_time_zones = Array { "Etc/GMT"sv, "GMT"sv };
  218. for (auto gmt_time_zone : gmt_time_zones) {
  219. if (identifier.starts_with(gmt_time_zone)) {
  220. auto offset = identifier.substring_view(gmt_time_zone.length());
  221. if (offset.starts_with('+'))
  222. identifier = String::formatted("{}_Ahead_{}", gmt_time_zone, offset.substring_view(1));
  223. else if (offset.starts_with('-'))
  224. identifier = String::formatted("{}_Behind_{}", gmt_time_zone, offset.substring_view(1));
  225. }
  226. }
  227. identifier = identifier.replace("-"sv, "_"sv, true);
  228. identifier = identifier.replace("/"sv, "_"sv, true);
  229. if (all_of(identifier, is_ascii_digit))
  230. return String::formatted("{}_{}", owner[0], identifier);
  231. if (is_ascii_lower_alpha(identifier[0]))
  232. return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
  233. return identifier;
  234. }
  235. static void generate_time_zone_data_header(Core::File& file, TimeZoneData& time_zone_data)
  236. {
  237. StringBuilder builder;
  238. SourceGenerator generator { builder };
  239. generator.append(R"~~~(
  240. #pragma once
  241. #include <AK/Types.h>
  242. namespace TimeZone {
  243. )~~~");
  244. generate_enum(generator, format_identifier, "TimeZone"sv, {}, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
  245. generate_enum(generator, format_identifier, "DaylightSavingsRule"sv, {}, time_zone_data.dst_offset_names);
  246. generator.append(R"~~~(
  247. }
  248. )~~~");
  249. VERIFY(file.write(generator.as_string_view()));
  250. }
  251. static void generate_time_zone_data_implementation(Core::File& file, TimeZoneData& time_zone_data)
  252. {
  253. StringBuilder builder;
  254. SourceGenerator generator { builder };
  255. generator.append(R"~~~(
  256. #include <AK/Array.h>
  257. #include <AK/BinarySearch.h>
  258. #include <AK/Optional.h>
  259. #include <AK/Span.h>
  260. #include <AK/StringView.h>
  261. #include <AK/Time.h>
  262. #include <LibTimeZone/TimeZone.h>
  263. #include <LibTimeZone/TimeZoneData.h>
  264. namespace TimeZone {
  265. struct DateTime {
  266. AK::Time time_since_epoch() const
  267. {
  268. // FIXME: This implementation does not take last_weekday, after_weekday, or before_weekday into account.
  269. return AK::Time::from_timestamp(year, month, day, hour, minute, second, 0);
  270. }
  271. u16 year { 0 };
  272. u8 month { 1 };
  273. u8 day { 1 };
  274. u8 last_weekday { 0 };
  275. u8 after_weekday { 0 };
  276. u8 before_weekday { 0 };
  277. u8 hour { 0 };
  278. u8 minute { 0 };
  279. u8 second { 0 };
  280. };
  281. struct TimeZoneOffset {
  282. i64 offset { 0 };
  283. DateTime until {};
  284. bool has_until { false };
  285. };
  286. struct DaylightSavingsOffset {
  287. i64 offset { 0 };
  288. u16 year_from { 0 };
  289. u16 year_to { 0 };
  290. DateTime in_effect {};
  291. };
  292. )~~~");
  293. auto append_offsets = [&](auto const& name, auto type, auto const& offsets) {
  294. generator.set("name", name);
  295. generator.set("type", type);
  296. generator.set("size", String::number(offsets.size()));
  297. generator.append(R"~~~(
  298. static constexpr Array<@type@, @size@> @name@ { {
  299. )~~~");
  300. for (auto const& offset : offsets)
  301. generator.append(String::formatted(" {},\n", offset));
  302. generator.append("} };\n");
  303. };
  304. generate_mapping(generator, time_zone_data.time_zone_names, "TimeZoneOffset"sv, "s_time_zone_offsets"sv, "s_time_zone_offsets_{}", format_identifier,
  305. [&](auto const& name, auto const& value) {
  306. auto const& time_zone_offsets = time_zone_data.time_zones.find(value)->value;
  307. append_offsets(name, "TimeZoneOffset"sv, time_zone_offsets);
  308. });
  309. generate_mapping(generator, time_zone_data.dst_offset_names, "DaylightSavingsOffset"sv, "s_dst_offsets"sv, "s_dst_offsets_{}", format_identifier,
  310. [&](auto const& name, auto const& value) {
  311. auto const& dst_offsets = time_zone_data.dst_offsets.find(value)->value;
  312. append_offsets(name, "DaylightSavingsOffset"sv, dst_offsets);
  313. });
  314. auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) {
  315. HashValueMap<String> hashes;
  316. hashes.ensure_capacity(values.size());
  317. auto hash = [](auto const& value) {
  318. return CaseInsensitiveStringViewTraits::hash(value);
  319. };
  320. for (auto const& value : values)
  321. hashes.set(hash(value), format_identifier(enum_title, value));
  322. for (auto const& alias : aliases)
  323. hashes.set(hash(alias.alias), format_identifier(enum_title, alias.alias));
  324. ValueFromStringOptions options {};
  325. options.sensitivity = CaseSensitivity::CaseInsensitive;
  326. generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes), options);
  327. generate_value_to_string(generator, "{}_to_string"sv, enum_title, enum_snake, format_identifier, values);
  328. };
  329. append_string_conversions("TimeZone"sv, "time_zone"sv, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
  330. append_string_conversions("DaylightSavingsRule"sv, "daylight_savings_rule"sv, time_zone_data.dst_offset_names);
  331. generator.append(R"~~~(
  332. Optional<i64> get_time_zone_offset(TimeZone time_zone, AK::Time time)
  333. {
  334. // FIXME: This implementation completely ignores DST.
  335. auto const& time_zone_offsets = s_time_zone_offsets[to_underlying(time_zone)];
  336. size_t index = 0;
  337. for (; index < time_zone_offsets.size(); ++index) {
  338. auto const& time_zone_offset = time_zone_offsets[index];
  339. if (!time_zone_offset.has_until || (time_zone_offset.until.time_since_epoch() > time))
  340. break;
  341. }
  342. VERIFY(index < time_zone_offsets.size());
  343. return time_zone_offsets[index].offset;
  344. }
  345. }
  346. )~~~");
  347. VERIFY(file.write(generator.as_string_view()));
  348. }
  349. ErrorOr<int> serenity_main(Main::Arguments arguments)
  350. {
  351. StringView generated_header_path;
  352. StringView generated_implementation_path;
  353. Vector<StringView> time_zone_paths;
  354. Core::ArgsParser args_parser;
  355. args_parser.add_option(generated_header_path, "Path to the time zone data header file to generate", "generated-header-path", 'h', "generated-header-path");
  356. args_parser.add_option(generated_implementation_path, "Path to the time zone data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
  357. args_parser.add_positional_argument(time_zone_paths, "Paths to the time zone database files", "time-zone-paths");
  358. args_parser.parse(arguments);
  359. auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
  360. if (path.is_empty()) {
  361. args_parser.print_usage(stderr, arguments.argv[0]);
  362. return Error::from_string_literal("Must provide all command line options"sv);
  363. }
  364. return Core::File::open(path, Core::OpenMode::ReadWrite);
  365. };
  366. auto generated_header_file = TRY(open_file(generated_header_path));
  367. auto generated_implementation_file = TRY(open_file(generated_implementation_path));
  368. TimeZoneData time_zone_data {};
  369. for (auto time_zone_path : time_zone_paths)
  370. TRY(parse_time_zones(time_zone_path, time_zone_data));
  371. generate_time_zone_data_header(generated_header_file, time_zone_data);
  372. generate_time_zone_data_implementation(generated_implementation_file, time_zone_data);
  373. return 0;
  374. }