ladybird/Meta/Lagom/Tools/CodeGenerators/LibTimeZone/GenerateTimeZoneData.cpp
Timothy Flynn fc0c88344b LibTimeZone: Slightly refactor the generated DST rule finding method
This just splits up the method to find the active DST rule for specified
time and time zone. This is to allow re-using the now split-off function
in upcoming commits.
2022-01-25 18:39:36 +00:00

639 lines
22 KiB
C++

/*
* Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/SourceGenerator.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Vector.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
namespace {
using StringIndexType = u8;
constexpr auto s_string_index_type = "u8"sv;
struct DateTime {
u16 year { 0 };
Optional<u8> month;
Optional<u8> day;
Optional<u8> last_weekday;
Optional<u8> after_weekday;
Optional<u8> before_weekday;
Optional<u8> hour;
Optional<u8> minute;
Optional<u8> second;
};
struct TimeZoneOffset {
i64 offset { 0 };
Optional<DateTime> until;
Optional<String> dst_rule;
Optional<i32> dst_rule_index;
i64 dst_offset { 0 };
StringIndexType standard_format { 0 };
StringIndexType daylight_format { 0 };
};
struct DaylightSavingsOffset {
i64 offset { 0 };
u16 year_from { 0 };
u16 year_to { 0 };
DateTime in_effect;
StringIndexType format { 0 };
};
struct TimeZoneData {
UniqueStringStorage<StringIndexType> unique_strings;
HashMap<String, Vector<TimeZoneOffset>> time_zones;
Vector<String> time_zone_names;
Vector<Alias> time_zone_aliases;
HashMap<String, Vector<DaylightSavingsOffset>> dst_offsets;
Vector<String> dst_offset_names;
};
}
template<>
struct AK::Formatter<DateTime> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, DateTime const& date_time)
{
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
date_time.year,
date_time.month.value_or(1),
date_time.day.value_or(1),
date_time.last_weekday.value_or(0),
date_time.after_weekday.value_or(0),
date_time.before_weekday.value_or(0),
date_time.hour.value_or(0),
date_time.minute.value_or(0),
date_time.second.value_or(0));
}
};
template<>
struct AK::Formatter<TimeZoneOffset> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, TimeZoneOffset const& time_zone_offset)
{
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {}, {}, {}, {}, {} }}",
time_zone_offset.offset,
time_zone_offset.until.value_or({}),
time_zone_offset.until.has_value(),
time_zone_offset.dst_rule_index.value_or(-1),
time_zone_offset.dst_offset,
time_zone_offset.standard_format,
time_zone_offset.daylight_format);
}
};
template<>
struct AK::Formatter<DaylightSavingsOffset> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, DaylightSavingsOffset const& dst_offset)
{
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {}, {}, {} }}",
dst_offset.offset,
dst_offset.year_from,
dst_offset.year_to,
dst_offset.in_effect,
dst_offset.format);
}
};
static Optional<DateTime> parse_date_time(Span<StringView const> segments)
{
constexpr auto months = Array { "Jan"sv, "Feb"sv, "Mar"sv, "Apr"sv, "May"sv, "Jun"sv, "Jul"sv, "Aug"sv, "Sep"sv, "Oct"sv, "Nov"sv, "Dec"sv };
constexpr auto weekdays = Array { "Sun"sv, "Mon"sv, "Tue"sv, "Wed"sv, "Thu"sv, "Fri"sv, "Sat"sv };
auto comment_index = find_index(segments.begin(), segments.end(), "#"sv);
if (comment_index != segments.size())
segments = segments.slice(0, comment_index);
if (segments.is_empty())
return {};
DateTime date_time {};
date_time.year = segments[0].to_uint().value();
if (segments.size() > 1)
date_time.month = find_index(months.begin(), months.end(), segments[1]) + 1;
if (segments.size() > 2) {
if (segments[2].starts_with("last"sv)) {
auto weekday = segments[2].substring_view("last"sv.length());
date_time.last_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
} else if (auto index = segments[2].find(">="sv); index.has_value()) {
auto weekday = segments[2].substring_view(0, *index);
date_time.after_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
auto day = segments[2].substring_view(*index + ">="sv.length());
date_time.day = day.to_uint().value();
} else if (auto index = segments[2].find("<="sv); index.has_value()) {
auto weekday = segments[2].substring_view(0, *index);
date_time.before_weekday = find_index(weekdays.begin(), weekdays.end(), weekday);
auto day = segments[2].substring_view(*index + "<="sv.length());
date_time.day = day.to_uint().value();
} else {
date_time.day = segments[2].to_uint().value();
}
}
if (segments.size() > 3) {
// FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
auto time_segments = segments[3].split_view(':');
date_time.hour = time_segments[0].to_int().value();
date_time.minute = time_segments.size() > 1 ? time_segments[1].substring_view(0, 2).to_uint().value() : 0;
date_time.second = time_segments.size() > 2 ? time_segments[2].substring_view(0, 2).to_uint().value() : 0;
}
return date_time;
}
static i64 parse_time_offset(StringView segment)
{
auto segments = segment.split_view(':');
i64 hours = segments[0].to_int().value();
i64 minutes = segments.size() > 1 ? segments[1].to_uint().value() : 0;
i64 seconds = segments.size() > 2 ? segments[2].to_uint().value() : 0;
i64 sign = ((hours < 0) || (segments[0] == "-0"sv)) ? -1 : 1;
return (hours * 3600) + sign * ((minutes * 60) + seconds);
}
static void parse_dst_rule(StringView segment, TimeZoneOffset& time_zone)
{
if (segment.contains(':'))
time_zone.dst_offset = parse_time_offset(segment);
else if (segment != "-"sv)
time_zone.dst_rule = segment;
}
static void parse_format(StringView format, TimeZoneData& time_zone_data, TimeZoneOffset& time_zone)
{
auto formats = format.replace("%s"sv, "{}"sv).split('/');
VERIFY(formats.size() <= 2);
time_zone.standard_format = time_zone_data.unique_strings.ensure(formats[0]);
if (formats.size() == 2)
time_zone.daylight_format = time_zone_data.unique_strings.ensure(formats[1]);
else
time_zone.daylight_format = time_zone.standard_format;
}
static Vector<TimeZoneOffset>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
{
auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
// "Zone" NAME STDOFF RULES FORMAT [UNTIL]
VERIFY(segments[0] == "Zone"sv);
auto name = segments[1];
TimeZoneOffset time_zone {};
time_zone.offset = parse_time_offset(segments[2]);
parse_dst_rule(segments[3], time_zone);
parse_format(segments[4], time_zone_data, time_zone);
if (segments.size() > 5)
time_zone.until = parse_date_time(segments.span().slice(5));
auto& time_zones = time_zone_data.time_zones.ensure(name);
time_zones.append(move(time_zone));
if (!time_zone_data.time_zone_names.contains_slow(name))
time_zone_data.time_zone_names.append(name);
return time_zones;
}
static void parse_zone_continuation(StringView zone_line, TimeZoneData& time_zone_data, Vector<TimeZoneOffset>& time_zones)
{
auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
// STDOFF RULES FORMAT [UNTIL]
TimeZoneOffset time_zone {};
time_zone.offset = parse_time_offset(segments[0]);
parse_dst_rule(segments[1], time_zone);
parse_format(segments[2], time_zone_data, time_zone);
if (segments.size() > 3)
time_zone.until = parse_date_time(segments.span().slice(3));
time_zones.append(move(time_zone));
}
static void parse_link(StringView link_line, TimeZoneData& time_zone_data)
{
auto segments = link_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
// Link TARGET LINK-NAME
VERIFY(segments[0] == "Link"sv);
auto target = segments[1];
auto alias = segments[2];
time_zone_data.time_zone_aliases.append({ target, alias });
}
static void parse_rule(StringView rule_line, TimeZoneData& time_zone_data)
{
auto segments = rule_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
// Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
VERIFY(segments[0] == "Rule"sv);
auto name = segments[1];
DaylightSavingsOffset dst_offset {};
dst_offset.offset = parse_time_offset(segments[8]);
dst_offset.year_from = segments[2].to_uint().value();
if (segments[3] == "only")
dst_offset.year_to = dst_offset.year_from;
else if (segments[3] == "max"sv)
dst_offset.year_to = NumericLimits<u16>::max();
else
dst_offset.year_to = segments[3].to_uint().value();
auto in_effect = Array { "0"sv, segments[5], segments[6], segments[7] };
dst_offset.in_effect = parse_date_time(in_effect).release_value();
if (segments[9] != "-"sv)
dst_offset.format = time_zone_data.unique_strings.ensure(segments[9]);
auto& dst_offsets = time_zone_data.dst_offsets.ensure(name);
dst_offsets.append(move(dst_offset));
if (!time_zone_data.dst_offset_names.contains_slow(name))
time_zone_data.dst_offset_names.append(name);
}
static ErrorOr<void> parse_time_zones(StringView time_zone_path, TimeZoneData& time_zone_data)
{
// For reference, the man page for `zic` has the best documentation of the TZDB file format.
auto file = TRY(Core::File::open(time_zone_path, Core::OpenMode::ReadOnly));
Vector<TimeZoneOffset>* last_parsed_zone = nullptr;
while (file->can_read_line()) {
auto line = file->read_line();
if (line.is_empty() || line.trim_whitespace(TrimMode::Left).starts_with('#'))
continue;
if (line.starts_with("Zone"sv)) {
last_parsed_zone = &parse_zone(line, time_zone_data);
} else if (line.starts_with('\t')) {
VERIFY(last_parsed_zone != nullptr);
parse_zone_continuation(line, time_zone_data, *last_parsed_zone);
} else {
last_parsed_zone = nullptr;
if (line.starts_with("Link"sv))
parse_link(line, time_zone_data);
else if (line.starts_with("Rule"sv))
parse_rule(line, time_zone_data);
}
}
return {};
}
static void set_dst_rule_indices(TimeZoneData& time_zone_data)
{
for (auto& time_zone : time_zone_data.time_zones) {
for (auto& time_zone_offset : time_zone.value) {
if (!time_zone_offset.dst_rule.has_value())
continue;
auto dst_rule_index = time_zone_data.dst_offset_names.find_first_index(*time_zone_offset.dst_rule);
time_zone_offset.dst_rule_index = static_cast<i32>(dst_rule_index.value());
}
}
}
static String format_identifier(StringView owner, String identifier)
{
constexpr auto gmt_time_zones = Array { "Etc/GMT"sv, "GMT"sv };
for (auto gmt_time_zone : gmt_time_zones) {
if (identifier.starts_with(gmt_time_zone)) {
auto offset = identifier.substring_view(gmt_time_zone.length());
if (offset.starts_with('+'))
identifier = String::formatted("{}_Ahead_{}", gmt_time_zone, offset.substring_view(1));
else if (offset.starts_with('-'))
identifier = String::formatted("{}_Behind_{}", gmt_time_zone, offset.substring_view(1));
}
}
identifier = identifier.replace("-"sv, "_"sv, true);
identifier = identifier.replace("/"sv, "_"sv, true);
if (all_of(identifier, is_ascii_digit))
return String::formatted("{}_{}", owner[0], identifier);
if (is_ascii_lower_alpha(identifier[0]))
return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1));
return identifier;
}
static void generate_time_zone_data_header(Core::File& file, TimeZoneData& time_zone_data)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.append(R"~~~(
#pragma once
#include <AK/Types.h>
namespace TimeZone {
)~~~");
generate_enum(generator, format_identifier, "TimeZone"sv, {}, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
generate_enum(generator, format_identifier, "DaylightSavingsRule"sv, {}, time_zone_data.dst_offset_names);
generator.append(R"~~~(
}
)~~~");
VERIFY(file.write(generator.as_string_view()));
}
static void generate_time_zone_data_implementation(Core::File& file, TimeZoneData& time_zone_data)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.set("string_index_type"sv, s_string_index_type);
set_dst_rule_indices(time_zone_data);
generator.append(R"~~~(
#include <AK/Array.h>
#include <AK/BinarySearch.h>
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <LibTimeZone/TimeZone.h>
#include <LibTimeZone/TimeZoneData.h>
namespace TimeZone {
struct DateTime {
AK::Time time_since_epoch() const
{
// FIXME: This implementation does not take last_weekday, after_weekday, or before_weekday into account.
return AK::Time::from_timestamp(year, month, day, hour, minute, second, 0);
}
u16 year { 0 };
u8 month { 1 };
u8 day { 1 };
u8 last_weekday { 0 };
u8 after_weekday { 0 };
u8 before_weekday { 0 };
u8 hour { 0 };
u8 minute { 0 };
u8 second { 0 };
};
struct TimeZoneOffset {
i64 offset { 0 };
DateTime until {};
bool has_until { false };
i32 dst_rule { -1 };
i64 dst_offset { 0 };
@string_index_type@ standard_format { 0 };
@string_index_type@ daylight_format { 0 };
};
struct DaylightSavingsOffset {
AK::Time time_in_effect(AK::Time time) const
{
auto in_effect = this->in_effect;
in_effect.year = seconds_since_epoch_to_year(time.to_seconds());
return in_effect.time_since_epoch();
}
i64 offset { 0 };
u16 year_from { 0 };
u16 year_to { 0 };
DateTime in_effect {};
@string_index_type@ format { 0 };
};
)~~~");
time_zone_data.unique_strings.generate(generator);
auto append_offsets = [&](auto const& name, auto type, auto const& offsets) {
generator.set("name", name);
generator.set("type", type);
generator.set("size", String::number(offsets.size()));
generator.append(R"~~~(
static constexpr Array<@type@, @size@> @name@ { {
)~~~");
for (auto const& offset : offsets)
generator.append(String::formatted(" {},\n", offset));
generator.append("} };\n");
};
generate_mapping(generator, time_zone_data.time_zone_names, "TimeZoneOffset"sv, "s_time_zone_offsets"sv, "s_time_zone_offsets_{}", format_identifier,
[&](auto const& name, auto const& value) {
auto const& time_zone_offsets = time_zone_data.time_zones.find(value)->value;
append_offsets(name, "TimeZoneOffset"sv, time_zone_offsets);
});
generate_mapping(generator, time_zone_data.dst_offset_names, "DaylightSavingsOffset"sv, "s_dst_offsets"sv, "s_dst_offsets_{}", format_identifier,
[&](auto const& name, auto const& value) {
auto const& dst_offsets = time_zone_data.dst_offsets.find(value)->value;
append_offsets(name, "DaylightSavingsOffset"sv, dst_offsets);
});
auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) {
HashValueMap<String> hashes;
hashes.ensure_capacity(values.size());
auto hash = [](auto const& value) {
return CaseInsensitiveStringViewTraits::hash(value);
};
for (auto const& value : values)
hashes.set(hash(value), format_identifier(enum_title, value));
for (auto const& alias : aliases)
hashes.set(hash(alias.alias), format_identifier(enum_title, alias.alias));
ValueFromStringOptions options {};
options.sensitivity = CaseSensitivity::CaseInsensitive;
generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes), options);
generate_value_to_string(generator, "{}_to_string"sv, enum_title, enum_snake, format_identifier, values);
};
append_string_conversions("TimeZone"sv, "time_zone"sv, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
append_string_conversions("DaylightSavingsRule"sv, "daylight_savings_rule"sv, time_zone_data.dst_offset_names);
generator.append(R"~~~(
static Array<DaylightSavingsOffset const*, 2> find_dst_offsets(TimeZoneOffset const& time_zone_offset, AK::Time time)
{
auto const& dst_rules = s_dst_offsets[time_zone_offset.dst_rule];
DaylightSavingsOffset const* standard_offset = nullptr;
DaylightSavingsOffset const* daylight_offset = nullptr;
auto preferred_rule = [&](auto* current_offset, auto& new_offset) {
if (!current_offset)
return &new_offset;
auto new_time_in_effect = new_offset.time_in_effect(time);
return (time >= new_time_in_effect) ? &new_offset : current_offset;
};
for (size_t index = 0; (index < dst_rules.size()) && (!standard_offset || !daylight_offset); ++index) {
auto const& dst_rule = dst_rules[index];
auto year_from = AK::Time::from_timestamp(dst_rule.year_from, 1, 1, 0, 0, 0, 0);
auto year_to = AK::Time::from_timestamp(dst_rule.year_to + 1, 1, 1, 0, 0, 0, 0);
if ((time < year_from) || (time >= year_to))
continue;
if (dst_rule.offset == 0)
standard_offset = preferred_rule(standard_offset, dst_rule);
else
daylight_offset = preferred_rule(daylight_offset, dst_rule);
}
// In modern times, there will always be a standard rule in the TZDB, but that isn't true in
// all time zones in or before the early 1900s. For example, the "US" rules begin in 1918.
if (!standard_offset) {
static DaylightSavingsOffset const empty_offset {};
return { &empty_offset, &empty_offset };
}
return { standard_offset, daylight_offset ? daylight_offset : standard_offset };
}
static Offset get_active_dst_offset(TimeZoneOffset const& time_zone_offset, AK::Time time)
{
auto offsets = find_dst_offsets(time_zone_offset, time);
if (offsets[0] == offsets[1])
return { offsets[0]->offset, InDST::No };
auto standard_time_in_effect = offsets[0]->time_in_effect(time);
auto daylight_time_in_effect = offsets[1]->time_in_effect(time);
if ((time < daylight_time_in_effect) || (time >= standard_time_in_effect))
return { offsets[0]->offset, InDST::No };
return { offsets[1]->offset, InDST::Yes };
}
static TimeZoneOffset const& find_time_zone_offset(TimeZone time_zone, AK::Time time)
{
auto const& time_zone_offsets = s_time_zone_offsets[to_underlying(time_zone)];
size_t index = 0;
for (; index < time_zone_offsets.size(); ++index) {
auto const& time_zone_offset = time_zone_offsets[index];
if (!time_zone_offset.has_until || (time_zone_offset.until.time_since_epoch() > time))
break;
}
VERIFY(index < time_zone_offsets.size());
return time_zone_offsets[index];
}
Optional<Offset> get_time_zone_offset(TimeZone time_zone, AK::Time time)
{
auto const& time_zone_offset = find_time_zone_offset(time_zone, time);
Offset dst_offset {};
if (time_zone_offset.dst_rule != -1) {
dst_offset = get_active_dst_offset(time_zone_offset, time);
} else {
auto in_dst = time_zone_offset.dst_offset == 0 ? InDST::No : InDST::Yes;
dst_offset = { time_zone_offset.dst_offset, in_dst };
}
dst_offset.seconds += time_zone_offset.offset;
return dst_offset;
}
Span<StringView const> all_time_zones()
{
static constexpr auto all_time_zones = Array {
)~~~");
for (auto const& time_zone : time_zone_data.time_zone_names) {
generator.set("time_zone", time_zone);
generator.append("\"@time_zone@\"sv, ");
}
generator.append(R"~~~(
};
return all_time_zones;
}
}
)~~~");
VERIFY(file.write(generator.as_string_view()));
}
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView generated_header_path;
StringView generated_implementation_path;
Vector<StringView> time_zone_paths;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the time zone data header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the time zone data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_positional_argument(time_zone_paths, "Paths to the time zone database files", "time-zone-paths");
args_parser.parse(arguments);
auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
if (path.is_empty()) {
args_parser.print_usage(stderr, arguments.argv[0]);
return Error::from_string_literal("Must provide all command line options"sv);
}
return Core::File::open(path, Core::OpenMode::ReadWrite);
};
auto generated_header_file = TRY(open_file(generated_header_path));
auto generated_implementation_file = TRY(open_file(generated_implementation_path));
TimeZoneData time_zone_data {};
for (auto time_zone_path : time_zone_paths)
TRY(parse_time_zones(time_zone_path, time_zone_data));
generate_time_zone_data_header(generated_header_file, time_zone_data);
generate_time_zone_data_implementation(generated_implementation_file, time_zone_data);
return 0;
}