Просмотр исходного кода

LibTimeZone: Begin generating GMT offset rules for each time zone

This is a rather naive implementation, but serves as a first pass at
determining the GMT offset for a time zone at a particular point in
time. This implementation ignores DST (because we are not parsing any
RULE entries yet), and ignores any offset patterns of the form "Mon>4"
or "lastSun".
Timothy Flynn 3 лет назад
Родитель
Сommit
09c0324880

+ 142 - 33
Meta/Lagom/Tools/CodeGenerators/LibTimeZone/GenerateTimeZoneData.cpp

@@ -5,6 +5,7 @@
  */
 
 #include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
+#include <AK/Format.h>
 #include <AK/HashMap.h>
 #include <AK/SourceGenerator.h>
 #include <AK/String.h>
@@ -15,44 +16,60 @@
 
 namespace {
 
-struct Time {
-    i8 hour { 0 };
-    u8 minute { 0 };
-    u8 second { 0 };
-};
-
 struct DateTime {
     u16 year { 0 };
     Optional<u8> month;
+    Optional<u8> day;
+
     Optional<u8> last_weekday;
     Optional<u8> after_weekday;
-    Optional<u8> day;
-    Optional<Time> time;
+
+    Optional<u8> hour;
+    Optional<u8> minute;
+    Optional<u8> second;
 };
 
-struct TimeZone {
-    Time offset;
+struct TimeZoneOffset {
+    i64 offset { 0 };
     Optional<DateTime> until;
 };
 
 struct TimeZoneData {
-    HashMap<String, Vector<TimeZone>> time_zones;
+    HashMap<String, Vector<TimeZoneOffset>> time_zones;
     Vector<String> time_zone_names;
     Vector<Alias> time_zone_aliases;
 };
 
-static Time parse_time(StringView segment)
-{
-    // FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
-    auto segments = segment.split_view(':');
+}
 
-    Time time {};
-    time.hour = segments[0].to_int().value();
-    time.minute = segments.size() > 1 ? segments[1].substring_view(0, 2).to_uint().value() : 0;
-    time.second = segments.size() > 2 ? segments[2].substring_view(0, 2).to_uint().value() : 0;
+template<>
+struct AK::Formatter<DateTime> : Formatter<FormatString> {
+    ErrorOr<void> format(FormatBuilder& builder, DateTime const& date_time)
+    {
+        return Formatter<FormatString>::format(builder,
+            "{{ {}, {}, {}, {}, {}, {}, {}, {} }}",
+            date_time.year,
+            date_time.month.value_or(1),
+            date_time.day.value_or(1),
+            date_time.last_weekday.value_or(0),
+            date_time.after_weekday.value_or(0),
+            date_time.hour.value_or(0),
+            date_time.minute.value_or(0),
+            date_time.second.value_or(0));
+    }
+};
 
-    return time;
-}
+template<>
+struct AK::Formatter<TimeZoneOffset> : Formatter<FormatString> {
+    ErrorOr<void> format(FormatBuilder& builder, TimeZoneOffset const& time_zone_offset)
+    {
+        return Formatter<FormatString>::format(builder,
+            "{{ {}, {}, {} }}",
+            time_zone_offset.offset,
+            time_zone_offset.until.value_or({}),
+            time_zone_offset.until.has_value());
+    }
+};
 
 static Optional<DateTime> parse_date_time(Span<StringView const> segments)
 {
@@ -69,7 +86,7 @@ static Optional<DateTime> parse_date_time(Span<StringView const> segments)
     date_time.year = segments[0].to_uint().value();
 
     if (segments.size() > 1)
-        date_time.month = find_index(months.begin(), months.end(), segments[1]);
+        date_time.month = find_index(months.begin(), months.end(), segments[1]) + 1;
 
     if (segments.size() > 2) {
         if (segments[2].starts_with("last"sv)) {
@@ -86,13 +103,31 @@ static Optional<DateTime> parse_date_time(Span<StringView const> segments)
         }
     }
 
-    if (segments.size() > 3)
-        date_time.time = parse_time(segments[3]);
+    if (segments.size() > 3) {
+        // FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
+        auto time_segments = segments[3].split_view(':');
+
+        date_time.hour = time_segments[0].to_int().value();
+        date_time.minute = time_segments.size() > 1 ? time_segments[1].substring_view(0, 2).to_uint().value() : 0;
+        date_time.second = time_segments.size() > 2 ? time_segments[2].substring_view(0, 2).to_uint().value() : 0;
+    }
 
     return date_time;
 }
 
-static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
+static i64 parse_time_offset(StringView segment)
+{
+    auto segments = segment.split_view(':');
+
+    i64 hours = segments[0].to_int().value();
+    i64 minutes = segments.size() > 1 ? segments[1].to_uint().value() : 0;
+    i64 seconds = segments.size() > 2 ? segments[2].to_uint().value() : 0;
+
+    i64 sign = ((hours < 0) || (segments[0] == "-0"sv)) ? -1 : 1;
+    return (hours * 3600) + sign * ((minutes * 60) + seconds);
+}
+
+static Vector<TimeZoneOffset>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
 {
     auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
 
@@ -100,8 +135,8 @@ static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zon
     VERIFY(segments[0] == "Zone"sv);
     auto name = segments[1];
 
-    TimeZone time_zone {};
-    time_zone.offset = parse_time(segments[2]);
+    TimeZoneOffset time_zone {};
+    time_zone.offset = parse_time_offset(segments[2]);
 
     if (segments.size() > 5)
         time_zone.until = parse_date_time(segments.span().slice(5));
@@ -115,13 +150,13 @@ static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zon
     return time_zones;
 }
 
-static void parse_zone_continuation(StringView zone_line, Vector<TimeZone>& time_zones)
+static void parse_zone_continuation(StringView zone_line, Vector<TimeZoneOffset>& time_zones)
 {
     auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
 
     // STDOFF RULES FORMAT [UNTIL]
-    TimeZone time_zone {};
-    time_zone.offset = parse_time(segments[0]);
+    TimeZoneOffset time_zone {};
+    time_zone.offset = parse_time_offset(segments[0]);
 
     if (segments.size() > 3)
         time_zone.until = parse_date_time(segments.span().slice(3));
@@ -145,7 +180,7 @@ static ErrorOr<void> parse_time_zones(StringView time_zone_path, TimeZoneData& t
 {
     // For reference, the man page for `zic` has the best documentation of the TZDB file format.
     auto file = TRY(Core::File::open(time_zone_path, Core::OpenMode::ReadOnly));
-    Vector<TimeZone>* last_parsed_zone = nullptr;
+    Vector<TimeZoneOffset>* last_parsed_zone = nullptr;
 
     while (file->can_read_line()) {
         auto line = file->read_line();
@@ -224,13 +259,72 @@ static void generate_time_zone_data_implementation(Core::File& file, TimeZoneDat
 #include <AK/Array.h>
 #include <AK/BinarySearch.h>
 #include <AK/Optional.h>
+#include <AK/Span.h>
 #include <AK/StringView.h>
+#include <AK/Time.h>
 #include <LibTimeZone/TimeZone.h>
 #include <LibTimeZone/TimeZoneData.h>
 
 namespace TimeZone {
+
+static constexpr auto seconds_per_day = 86'400;
+static constexpr auto seconds_per_hour = 3'600;
+static constexpr auto seconds_per_minute = 60;
+
+struct DateTime {
+    AK::Time time_since_epoch() const
+    {
+        // FIXME: This implementation does not take last_weekday or after_weekday into account.
+        i64 seconds_since_epoch = AK::days_since_epoch(year, month, day);
+        seconds_since_epoch *= seconds_per_day;
+
+        seconds_since_epoch += hour * seconds_per_hour;
+        seconds_since_epoch += minute * seconds_per_minute;
+        seconds_since_epoch += second;
+
+        return AK::Time::from_seconds(seconds_since_epoch);
+    }
+
+    u16 year { 0 };
+    u8 month { 1 };
+    u8 day { 1 };
+
+    u8 last_weekday { 0 };
+    u8 after_weekday { 0 };
+
+    u8 hour { 0 };
+    u8 minute { 0 };
+    u8 second { 0 };
+};
+
+struct TimeZoneOffset {
+    i64 offset { 0 };
+
+    DateTime until {};
+    bool has_until { false };
+};
+)~~~");
+
+    auto append_time_zone_offsets = [&](auto const& name, auto const& time_zone_offsets) {
+        generator.set("name", name);
+        generator.set("size", String::number(time_zone_offsets.size()));
+
+        generator.append(R"~~~(
+static constexpr Array<TimeZoneOffset, @size@> @name@ { {
 )~~~");
 
+        for (auto const& time_zone_offset : time_zone_offsets)
+            generator.append(String::formatted("    {},\n", time_zone_offset));
+
+        generator.append("} };\n");
+    };
+
+    generate_mapping(generator, time_zone_data.time_zone_names, "TimeZoneOffset"sv, "s_time_zone_offsets"sv, "s_time_zone_offsets_{}", format_identifier,
+        [&](auto const& name, auto const& value) {
+            auto const& time_zone_offsets = time_zone_data.time_zones.find(value)->value;
+            append_time_zone_offsets(name, time_zone_offsets);
+        });
+
     auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values, auto const& aliases) {
         HashValueMap<String> hashes;
         hashes.ensure_capacity(values.size());
@@ -254,12 +348,27 @@ namespace TimeZone {
     append_string_conversions("TimeZone"sv, "time_zone"sv, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
 
     generator.append(R"~~~(
+Optional<i64> get_time_zone_offset(TimeZone time_zone, AK::Time time)
+{
+    // FIXME: This implementation completely ignores DST.
+    auto const& time_zone_offsets = s_time_zone_offsets[to_underlying(time_zone)];
+
+    size_t index = 0;
+    for (; index < time_zone_offsets.size(); ++index) {
+        auto const& time_zone_offset = time_zone_offsets[index];
+
+        if (!time_zone_offset.has_until || (time_zone_offset.until.time_since_epoch() > time))
+            break;
+    }
+
+    VERIFY(index < time_zone_offsets.size());
+    return time_zone_offsets[index].offset;
 }
-)~~~");
 
-    VERIFY(file.write(generator.as_string_view()));
 }
+)~~~");
 
+    VERIFY(file.write(generator.as_string_view()));
 }
 
 ErrorOr<int> serenity_main(Main::Arguments arguments)

+ 42 - 0
Tests/LibTimeZone/TestTimeZone.cpp

@@ -7,6 +7,7 @@
 #include <LibTest/TestCase.h>
 
 #include <AK/StringView.h>
+#include <AK/Time.h>
 #include <LibTimeZone/TimeZone.h>
 
 #if ENABLE_TIME_ZONE_DATA
@@ -84,4 +85,45 @@ TEST_CASE(canonicalize_time_zone)
     EXPECT(!TimeZone::canonicalize_time_zone("I don't exist"sv).has_value());
 }
 
+TEST_CASE(get_time_zone_offset)
+{
+    auto offset = [](i64 sign, i64 hours, i64 minutes, i64 seconds) {
+        return sign * ((hours * 3600) + (minutes * 60) + seconds);
+    };
+
+    auto test_offset = [](auto time_zone, i64 time, i64 expected_offset) {
+        auto actual_offset = TimeZone::get_time_zone_offset(time_zone, AK::Time::from_seconds(time));
+        VERIFY(actual_offset.has_value());
+        EXPECT_EQ(*actual_offset, expected_offset);
+    };
+
+    test_offset("America/Chicago"sv, -2717668237, offset(-1, 5, 50, 36)); // Sunday, November 18, 1883 12:09:23 PM
+    test_offset("America/Chicago"sv, -2717668236, offset(-1, 6, 00, 00)); // Sunday, November 18, 1883 12:09:24 PM
+    test_offset("America/Chicago"sv, -1067810460, offset(-1, 6, 00, 00)); // Sunday, March 1, 1936 1:59:00 AM
+    test_offset("America/Chicago"sv, -1067810400, offset(-1, 5, 00, 00)); // Sunday, March 1, 1936 2:00:00 AM
+    test_offset("America/Chicago"sv, -1045432860, offset(-1, 5, 00, 00)); // Sunday, November 15, 1936 1:59:00 AM
+    test_offset("America/Chicago"sv, -1045432800, offset(-1, 6, 00, 00)); // Sunday, November 15, 1936 2:00:00 AM
+
+    test_offset("Europe/London"sv, -3852662401, offset(-1, 0, 01, 15)); // Tuesday, November 30, 1847 11:59:59 PM
+    test_offset("Europe/London"sv, -3852662400, offset(+1, 0, 00, 00)); // Wednesday, December 1, 1847 12:00:00 AM
+    test_offset("Europe/London"sv, -37238401, offset(+1, 0, 00, 00));   // Saturday, October 26, 1968 11:59:59 PM
+    test_offset("Europe/London"sv, -37238400, offset(+1, 1, 00, 00));   // Sunday, October 27, 1968 12:00:00 AM
+    test_offset("Europe/London"sv, 57722399, offset(+1, 1, 00, 00));    // Sunday, October 31, 1971 1:59:59 AM
+    test_offset("Europe/London"sv, 57722400, offset(+1, 0, 00, 00));    // Sunday, October 31, 1971 2:00:00 AM
+
+    test_offset("UTC"sv, -1641846268, offset(+1, 0, 00, 00));
+    test_offset("UTC"sv, 0, offset(+1, 0, 00, 00));
+    test_offset("UTC"sv, 1641846268, offset(+1, 0, 00, 00));
+
+    test_offset("Etc/GMT+4"sv, -1641846268, offset(-1, 4, 00, 00));
+    test_offset("Etc/GMT+5"sv, 0, offset(-1, 5, 00, 00));
+    test_offset("Etc/GMT+6"sv, 1641846268, offset(-1, 6, 00, 00));
+
+    test_offset("Etc/GMT-12"sv, -1641846268, offset(+1, 12, 00, 00));
+    test_offset("Etc/GMT-13"sv, 0, offset(+1, 13, 00, 00));
+    test_offset("Etc/GMT-14"sv, 1641846268, offset(+1, 14, 00, 00));
+
+    EXPECT(!TimeZone::get_time_zone_offset("I don't exist"sv, {}).has_value());
+}
+
 #endif

+ 9 - 0
Userland/Libraries/LibTimeZone/TimeZone.cpp

@@ -24,4 +24,13 @@ Optional<StringView> canonicalize_time_zone(StringView time_zone)
     return canonical_time_zone;
 }
 
+Optional<i64> __attribute__((weak)) get_time_zone_offset(TimeZone, AK::Time) { return {}; }
+
+Optional<i64> get_time_zone_offset(StringView time_zone, AK::Time time)
+{
+    if (auto maybe_time_zone = time_zone_from_string(time_zone); maybe_time_zone.has_value())
+        return get_time_zone_offset(*maybe_time_zone, time);
+    return {};
+}
+
 }

+ 5 - 0
Userland/Libraries/LibTimeZone/TimeZone.h

@@ -8,6 +8,8 @@
 
 #include <AK/Optional.h>
 #include <AK/StringView.h>
+#include <AK/Time.h>
+#include <AK/Types.h>
 #include <LibTimeZone/Forward.h>
 
 namespace TimeZone {
@@ -16,4 +18,7 @@ Optional<TimeZone> time_zone_from_string(StringView time_zone);
 StringView time_zone_to_string(TimeZone time_zone);
 Optional<StringView> canonicalize_time_zone(StringView time_zone);
 
+Optional<i64> get_time_zone_offset(TimeZone time_zone, AK::Time time);
+Optional<i64> get_time_zone_offset(StringView time_zone, AK::Time time);
+
 }