Sfoglia il codice sorgente

LibUnicode: Parse and generate format pattern skeletons

Pattern skeletons are more or less the "key" of format patterns. Every
format pattern is assigned a skeleton. Interval patterns (which are not
yet parsed) are also assigned a skeleton - this is used to match them to
an "owning" format pattern. So we will use the skeleton generated here
to match format patterns at runtime with their available interval
patterns.

An alternative approach would be to append interval patterns directly to
their owning format pattern, but this has some draw backs:

    1. Skeletons aren't totally unique. A skeleton may appear in both
       the "dateFormats" and "availableFormats" objects, in which case
       the same interval formats would be generated more than once.

    2. Otherwise unique format patterns may only differ by the interval
       patterns assigned to them. This would cause the UniqueStorage for
       the format patterns to increase in size, impacting both compile
       times and libunicode.so size.
Timothy Flynn 3 anni fa
parent
commit
fe84a365c2

+ 23 - 10
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp

@@ -45,6 +45,7 @@ struct CalendarPattern : public Unicode::CalendarPattern {
     unsigned hash() const
     {
         auto hash = pair_int_hash(pattern_index, pattern12_index);
+        hash = pair_int_hash(hash, skeleton_index);
 
         auto hash_field = [&](auto const& field) {
             if (field.has_value())
@@ -70,7 +71,8 @@ struct CalendarPattern : public Unicode::CalendarPattern {
 
     bool operator==(CalendarPattern const& other) const
     {
-        return (pattern_index == other.pattern_index)
+        return (skeleton_index == other.skeleton_index)
+            && (pattern_index == other.pattern_index)
             && (pattern12_index == other.pattern12_index)
             && (era == other.era)
             && (year == other.year)
@@ -85,6 +87,7 @@ struct CalendarPattern : public Unicode::CalendarPattern {
             && (time_zone_name == other.time_zone_name);
     }
 
+    StringIndexType skeleton_index { 0 };
     StringIndexType pattern_index { 0 };
     StringIndexType pattern12_index { 0 };
 };
@@ -100,7 +103,8 @@ struct AK::Formatter<CalendarPattern> : Formatter<FormatString> {
         };
 
         return Formatter<FormatString>::format(builder,
-            "{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
+            "{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
+            pattern.skeleton_index,
             pattern.pattern_index,
             pattern.pattern12_index,
             field_to_i8(pattern.era),
@@ -298,13 +302,16 @@ static String remove_period_from_pattern(String pattern)
     return pattern;
 }
 
-static Optional<CalendarPatternIndexType> parse_date_time_pattern(String pattern, UnicodeLocaleData& locale_data)
+static Optional<CalendarPatternIndexType> parse_date_time_pattern(String pattern, String skeleton, UnicodeLocaleData& locale_data)
 {
     // https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
     using Unicode::CalendarPatternStyle;
 
     CalendarPattern format {};
 
+    if (!skeleton.is_empty())
+        format.skeleton_index = locale_data.unique_strings.ensure(move(skeleton));
+
     GenericLexer lexer { pattern };
     StringBuilder builder;
     bool hour12 { false };
@@ -742,10 +749,12 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
         });
     };
 
-    auto parse_patterns = [&](auto& formats, auto const& patterns_object, Vector<CalendarPattern>* patterns) {
+    auto parse_patterns = [&](auto& formats, auto const& patterns_object, auto const& skeletons_object, Vector<CalendarPattern>* patterns) {
         auto parse_pattern = [&](auto name) {
             auto format = patterns_object.get(name);
-            auto format_index = parse_date_time_pattern(format.as_string(), locale_data).value();
+            auto skeleton = skeletons_object.get(name);
+
+            auto format_index = parse_date_time_pattern(format.as_string(), skeleton.as_string_or(String::empty()), locale_data).value();
 
             if (patterns)
                 patterns->append(locale_data.unique_patterns.get(format_index));
@@ -774,17 +783,19 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
         Vector<CalendarPattern> time_formats;
 
         auto const& date_formats_object = value.as_object().get("dateFormats"sv);
-        parse_patterns(calendar.date_formats, date_formats_object.as_object(), &date_formats);
+        auto const& date_skeletons_object = value.as_object().get("dateSkeletons"sv);
+        parse_patterns(calendar.date_formats, date_formats_object.as_object(), date_skeletons_object.as_object(), &date_formats);
 
         auto const& time_formats_object = value.as_object().get("timeFormats"sv);
-        parse_patterns(calendar.time_formats, time_formats_object.as_object(), &time_formats);
+        auto const& time_skeletons_object = value.as_object().get("timeSkeletons"sv);
+        parse_patterns(calendar.time_formats, time_formats_object.as_object(), time_skeletons_object.as_object(), &time_formats);
 
         auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv);
-        parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), nullptr);
+        parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), JsonObject {}, nullptr);
 
         auto const& available_formats = date_time_formats_object.as_object().get("availableFormats"sv);
-        available_formats.as_object().for_each_member([&](auto const&, JsonValue const& pattern) {
-            auto pattern_index = parse_date_time_pattern(pattern.as_string(), locale_data);
+        available_formats.as_object().for_each_member([&](auto const& skeleton, JsonValue const& pattern) {
+            auto pattern_index = parse_date_time_pattern(pattern.as_string(), skeleton, locale_data);
             if (!pattern_index.has_value())
                 return;
 
@@ -992,6 +1003,7 @@ struct CalendarPattern {
     Unicode::CalendarPattern to_unicode_calendar_pattern() const {
         Unicode::CalendarPattern calendar_pattern {};
 
+        calendar_pattern.skeleton = s_string_list[skeleton];
         calendar_pattern.pattern = s_string_list[pattern];
         if (pattern12 != 0)
             calendar_pattern.pattern12 = s_string_list[pattern12];
@@ -1022,6 +1034,7 @@ struct CalendarPattern {
         return calendar_pattern;
     }
 
+    @string_index_type@ skeleton { 0 };
     @string_index_type@ pattern { 0 };
     @string_index_type@ pattern12 { 0 };
 

+ 1 - 0
Userland/Libraries/LibUnicode/DateTimeFormat.h

@@ -100,6 +100,7 @@ struct CalendarPattern {
         callback(time_zone_name, other.time_zone_name, Field::TimeZoneName);
     }
 
+    String skeleton {};
     String pattern {};
     Optional<String> pattern12 {};