소스 검색

LibUnicode: Parse and generate relative-time format patterns

Relative-time format patterns are of one of two forms:

    * Tensed - refer to the past or the future, e.g. "N years ago" or
      "in N years".
    * Numbered - refer to a specific numeric value, e.g. "in 1 year"
      becomes "next year" and "in 0 years" becomes "this year".

In ECMA-402, tensed and numbered refer to the numeric formatting options
of "always" and "auto", respectively.
Timothy Flynn 3 년 전
부모
커밋
789f093b2e

+ 188 - 1
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp

@@ -17,18 +17,133 @@
 #include <LibCore/ArgsParser.h>
 #include <LibCore/ArgsParser.h>
 #include <LibCore/DirIterator.h>
 #include <LibCore/DirIterator.h>
 #include <LibCore/File.h>
 #include <LibCore/File.h>
+#include <LibUnicode/Locale.h>
+#include <LibUnicode/RelativeTimeFormat.h>
 
 
 using StringIndexType = u16;
 using StringIndexType = u16;
 constexpr auto s_string_index_type = "u16"sv;
 constexpr auto s_string_index_type = "u16"sv;
 
 
+using RelativeTimeFormatIndexType = u16;
+constexpr auto s_relative_time_format_index_type = "u16"sv;
+
+struct RelativeTimeFormat {
+    unsigned hash() const
+    {
+        auto hash = time_unit.hash();
+        hash = pair_int_hash(hash, style.hash());
+        hash = pair_int_hash(hash, plurality.hash());
+        hash = pair_int_hash(hash, tense_or_number);
+        hash = pair_int_hash(hash, pattern);
+        return hash;
+    }
+
+    bool operator==(RelativeTimeFormat const& other) const
+    {
+        return (time_unit == other.time_unit)
+            && (plurality == other.plurality)
+            && (style == other.style)
+            && (tense_or_number == other.tense_or_number)
+            && (pattern == other.pattern);
+    }
+
+    String time_unit;
+    String style;
+    String plurality;
+    StringIndexType tense_or_number { 0 };
+    StringIndexType pattern { 0 };
+};
+
+template<>
+struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
+    ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
+    {
+        return Formatter<FormatString>::format(builder,
+            "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}",
+            format.time_unit,
+            format.style,
+            format.plurality,
+            format.tense_or_number,
+            format.pattern);
+    }
+};
+
+template<>
+struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
+    static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
+};
+
 struct Locale {
 struct Locale {
+    Vector<RelativeTimeFormatIndexType> time_units;
 };
 };
 
 
 struct UnicodeLocaleData {
 struct UnicodeLocaleData {
     UniqueStringStorage<StringIndexType> unique_strings;
     UniqueStringStorage<StringIndexType> unique_strings;
+    UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
+
     HashMap<String, Locale> locales;
     HashMap<String, Locale> locales;
 };
 };
 
 
+static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
+{
+    LexicalPath date_fields_path(move(locale_dates_path));
+    date_fields_path = date_fields_path.append("dateFields.json"sv);
+
+    auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly));
+    auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all()));
+
+    auto const& main_object = date_fields.as_object().get("main"sv);
+    auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
+    auto const& dates_object = locale_object.as_object().get("dates"sv);
+    auto const& fields_object = dates_object.as_object().get("fields"sv);
+
+    auto is_sanctioned_unit = [](auto unit) {
+        // This is a copy of the time units sanctioned for use within ECMA-402.
+        // https://tc39.es/ecma402/#sec-singularrelativetimeunit
+        return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
+    };
+
+    auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
+        RelativeTimeFormat format {};
+        format.time_unit = unit.to_titlecase_string();
+        format.style = style.to_titlecase_string();
+        format.plurality = plurality.to_titlecase_string();
+        format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
+        format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
+
+        locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
+    };
+
+    fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
+        auto segments = unit_and_style.split_view('-');
+        auto unit = segments[0];
+        auto style = (segments.size() > 1) ? segments[1] : "long"sv;
+
+        if (!is_sanctioned_unit(unit))
+            return;
+
+        patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
+            constexpr auto number_key = "relative-type-"sv;
+            constexpr auto tense_key = "relativeTime-type-"sv;
+            constexpr auto plurality_key = "relativeTimePattern-count-"sv;
+
+            if (type.starts_with(number_key)) {
+                auto number = type.substring_view(number_key.length());
+                parse_pattern(unit, style, "Other"sv, number, pattern_value);
+            } else if (type.starts_with(tense_key)) {
+                pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
+                    VERIFY(key.starts_with(plurality_key));
+                    auto plurality = key.substring_view(plurality_key.length());
+                    auto tense = type.substring_view(tense_key.length());
+
+                    parse_pattern(unit, style, plurality, tense, pattern);
+                });
+            }
+        });
+    });
+
+    return {};
+}
+
 static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
 static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
 {
 {
     auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
     auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
@@ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc
         auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
         auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
         auto language = TRY(remove_variants_from_path(dates_path));
         auto language = TRY(remove_variants_from_path(dates_path));
 
 
-        [[maybe_unused]] auto& locale = locale_data.locales.ensure(language);
+        auto& locale = locale_data.locales.ensure(language);
+        TRY(parse_date_fields(move(dates_path), locale_data, locale));
     }
     }
 
 
     return {};
     return {};
@@ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
     StringBuilder builder;
     StringBuilder builder;
     SourceGenerator generator { builder };
     SourceGenerator generator { builder };
     generator.set("string_index_type"sv, s_string_index_type);
     generator.set("string_index_type"sv, s_string_index_type);
+    generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
 
 
     generator.append(R"~~~(
     generator.append(R"~~~(
 #include <AK/Array.h>
 #include <AK/Array.h>
 #include <AK/StringView.h>
 #include <AK/StringView.h>
+#include <AK/Vector.h>
+#include <LibUnicode/Locale.h>
+#include <LibUnicode/RelativeTimeFormat.h>
 #include <LibUnicode/UnicodeRelativeTimeFormat.h>
 #include <LibUnicode/UnicodeRelativeTimeFormat.h>
 
 
 namespace Unicode {
 namespace Unicode {
@@ -93,6 +213,73 @@ namespace Unicode {
     locale_data.unique_strings.generate(generator);
     locale_data.unique_strings.generate(generator);
 
 
     generator.append(R"~~~(
     generator.append(R"~~~(
+struct RelativeTimeFormatImpl {
+    RelativeTimeFormat to_relative_time_format() const
+    {
+        RelativeTimeFormat relative_time_format {};
+        relative_time_format.plurality = plurality;
+        relative_time_format.pattern = s_string_list[pattern];
+
+        return relative_time_format;
+    }
+
+    TimeUnit time_unit;
+    Style style;
+    RelativeTimeFormat::Plurality plurality;
+    @string_index_type@ tense_or_number { 0 };
+    @string_index_type@ pattern { 0 };
+};
+)~~~");
+
+    locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
+
+    auto append_list = [&](String name, auto const& list) {
+        generator.set("name", name);
+        generator.set("size", String::number(list.size()));
+
+        generator.append(R"~~~(
+static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
+
+        bool first = true;
+        for (auto index : list) {
+            generator.append(first ? " " : ", ");
+            generator.append(String::number(index));
+            first = false;
+        }
+
+        generator.append(" } };");
+    };
+
+    generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
+
+    generator.append(R"~~~(
+Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
+{
+    Vector<RelativeTimeFormat> formats;
+
+    auto locale_value = locale_from_string(locale);
+    if (!locale_value.has_value())
+        return formats;
+
+    auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
+    auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
+
+    for (auto const& locale_format_index : locale_formats) {
+        auto const& locale_format = s_relative_time_formats.at(locale_format_index);
+
+        if (locale_format.time_unit != time_unit)
+            continue;
+        if (locale_format.style != style)
+            continue;
+        if (s_string_list[locale_format.tense_or_number] != tense_or_number)
+            continue;
+
+        formats.append(locale_format.to_relative_time_format());
+    }
+
+    return formats;
+}
+
 }
 }
 )~~~");
 )~~~");
 
 

+ 1 - 0
Userland/Libraries/LibUnicode/CMakeLists.txt

@@ -13,6 +13,7 @@ set(SOURCES
     DateTimeFormat.cpp
     DateTimeFormat.cpp
     Locale.cpp
     Locale.cpp
     NumberFormat.cpp
     NumberFormat.cpp
+    RelativeTimeFormat.cpp
 )
 )
 
 
 serenity_lib(LibUnicode unicode)
 serenity_lib(LibUnicode unicode)

+ 58 - 0
Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp

@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibUnicode/RelativeTimeFormat.h>
+
+namespace Unicode {
+
+Optional<TimeUnit> time_unit_from_string(StringView time_unit)
+{
+    if (time_unit == "second"sv)
+        return TimeUnit::Second;
+    if (time_unit == "minute"sv)
+        return TimeUnit::Minute;
+    if (time_unit == "hour"sv)
+        return TimeUnit::Hour;
+    if (time_unit == "day"sv)
+        return TimeUnit::Day;
+    if (time_unit == "week"sv)
+        return TimeUnit::Week;
+    if (time_unit == "month"sv)
+        return TimeUnit::Month;
+    if (time_unit == "quarter"sv)
+        return TimeUnit::Quarter;
+    if (time_unit == "year"sv)
+        return TimeUnit::Year;
+    return {};
+}
+
+StringView time_unit_to_string(TimeUnit time_unit)
+{
+    switch (time_unit) {
+    case TimeUnit::Second:
+        return "second"sv;
+    case TimeUnit::Minute:
+        return "minute"sv;
+    case TimeUnit::Hour:
+        return "hour"sv;
+    case TimeUnit::Day:
+        return "day"sv;
+    case TimeUnit::Week:
+        return "week"sv;
+    case TimeUnit::Month:
+        return "month"sv;
+    case TimeUnit::Quarter:
+        return "quarter"sv;
+    case TimeUnit::Year:
+        return "year"sv;
+    default:
+        VERIFY_NOT_REACHED();
+    }
+}
+
+Vector<RelativeTimeFormat> __attribute__((weak)) get_relative_time_format_patterns(StringView, TimeUnit, StringView, Style) { return {}; }
+
+}

+ 48 - 0
Userland/Libraries/LibUnicode/RelativeTimeFormat.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Optional.h>
+#include <AK/StringView.h>
+#include <AK/Vector.h>
+#include <LibUnicode/Forward.h>
+#include <LibUnicode/Locale.h>
+
+namespace Unicode {
+
+// These are just the subset of fields in the CLDR required for ECMA-402.
+enum class TimeUnit {
+    Second,
+    Minute,
+    Hour,
+    Day,
+    Week,
+    Month,
+    Quarter,
+    Year,
+};
+
+struct RelativeTimeFormat {
+    enum class Plurality {
+        Zero,
+        One,
+        Two,
+        Few,
+        Many,
+        Other,
+    };
+
+    Plurality plurality { Plurality::Other };
+    StringView pattern;
+};
+
+Optional<TimeUnit> time_unit_from_string(StringView time_unit);
+StringView time_unit_to_string(TimeUnit time_unit);
+
+Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style);
+
+}