mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibUnicode: Add minimal support for generic & offset-based time zones
ECMA-402 now supports short-offset, long-offset, short-generic, and long-generic time zone name formatting. For example, in the en-US locale the America/Eastern time zone would be formatted as: short-offset: GMT-5 long-offset: GMT-05:00 short-generic: ET long-generic: Eastern Time We currently only support the UTC time zone, however. Therefore, this very minimal implementation does not consider GMT offset or generic display names. Instead, the CLDR defines specific strings for UTC.
This commit is contained in:
parent
d2ac40bcd7
commit
126a3fe180
Notes:
sideshowbarker
2024-07-17 21:46:00 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/126a3fe180b Pull-request: https://github.com/SerenityOS/serenity/pull/11575 Reviewed-by: https://github.com/linusg ✅
3 changed files with 109 additions and 19 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -61,6 +61,9 @@ constexpr auto s_time_zone_index_type = "u16"sv;
|
|||
using TimeZoneListIndexType = u8;
|
||||
constexpr auto s_time_zone_list_index_type = "u8"sv;
|
||||
|
||||
using TimeZoneFormatIndexType = u8;
|
||||
constexpr auto s_time_zone_format_index_type = "u8"sv;
|
||||
|
||||
using DayPeriodIndexType = u8;
|
||||
constexpr auto s_day_period_index_type = "u8"sv;
|
||||
|
||||
|
@ -407,6 +410,33 @@ struct AK::Traits<TimeZone> : public GenericTraits<TimeZone> {
|
|||
static unsigned hash(TimeZone const& t) { return t.hash(); }
|
||||
};
|
||||
|
||||
struct TimeZoneFormat {
|
||||
unsigned hash() const
|
||||
{
|
||||
return int_hash(gmt_zero_format);
|
||||
}
|
||||
|
||||
bool operator==(TimeZoneFormat const& other) const
|
||||
{
|
||||
return gmt_zero_format == other.gmt_zero_format;
|
||||
}
|
||||
|
||||
StringIndexType gmt_zero_format { 0 };
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AK::Formatter<TimeZoneFormat> : Formatter<FormatString> {
|
||||
ErrorOr<void> format(FormatBuilder& builder, TimeZoneFormat const& time_zone_format)
|
||||
{
|
||||
return Formatter<FormatString>::format(builder, "{{ {} }}", time_zone_format.gmt_zero_format);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AK::Traits<TimeZoneFormat> : public GenericTraits<TimeZoneFormat> {
|
||||
static unsigned hash(TimeZoneFormat const& t) { return t.hash(); }
|
||||
};
|
||||
|
||||
struct DayPeriod {
|
||||
unsigned hash() const
|
||||
{
|
||||
|
@ -459,7 +489,10 @@ struct AK::Formatter<Unicode::HourCycle> : Formatter<FormatString> {
|
|||
|
||||
struct Locale {
|
||||
HashMap<String, CalendarIndexType> calendars;
|
||||
|
||||
TimeZoneListIndexType time_zones { 0 };
|
||||
TimeZoneFormatIndexType time_zone_formats { 0 };
|
||||
|
||||
DayPeriodListIndexType day_periods { 0 };
|
||||
};
|
||||
|
||||
|
@ -476,6 +509,7 @@ struct UnicodeLocaleData {
|
|||
UniqueStorage<Calendar, CalendarIndexType> unique_calendars;
|
||||
UniqueStorage<TimeZone, TimeZoneIndexType> unique_time_zones;
|
||||
UniqueStorage<TimeZoneList, TimeZoneListIndexType> unique_time_zone_lists;
|
||||
UniqueStorage<TimeZoneFormat, TimeZoneFormatIndexType> unique_time_zone_formats;
|
||||
UniqueStorage<DayPeriod, DayPeriodIndexType> unique_day_periods;
|
||||
UniqueStorage<DayPeriodList, DayPeriodListIndexType> unique_day_period_lists;
|
||||
UniqueStorage<HourCycleList, HourCycleListIndexType> unique_hour_cycle_lists;
|
||||
|
@ -821,13 +855,27 @@ static Optional<CalendarPattern> parse_date_time_pattern_raw(String pattern, Str
|
|||
}
|
||||
|
||||
// Zone
|
||||
else if (all_of(segment, is_any_of("zZOvVXx"))) {
|
||||
else if (all_of(segment, is_any_of("zV"sv))) {
|
||||
builder.append("{timeZoneName}");
|
||||
|
||||
if (segment.length() < 4)
|
||||
format.time_zone_name = CalendarPatternStyle::Short;
|
||||
else
|
||||
format.time_zone_name = CalendarPatternStyle::Long;
|
||||
} else if (all_of(segment, is_any_of("ZOXx"sv))) {
|
||||
builder.append("{timeZoneName}");
|
||||
|
||||
if (segment.length() < 4)
|
||||
format.time_zone_name = CalendarPatternStyle::ShortOffset;
|
||||
else
|
||||
format.time_zone_name = CalendarPatternStyle::LongOffset;
|
||||
} else if (all_of(segment, is_char('v'))) {
|
||||
builder.append("{timeZoneName}");
|
||||
|
||||
if (segment.length() < 4)
|
||||
format.time_zone_name = CalendarPatternStyle::ShortGeneric;
|
||||
else
|
||||
format.time_zone_name = CalendarPatternStyle::LongGeneric;
|
||||
}
|
||||
|
||||
// Non-patterns
|
||||
|
@ -1327,6 +1375,7 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
|
|||
auto const& dates_object = locale_object.as_object().get("dates"sv);
|
||||
auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv);
|
||||
auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv);
|
||||
auto const& gmt_zero_format_string = time_zone_names_object.as_object().get("gmtZeroFormat"sv);
|
||||
|
||||
if (meta_zone_object.is_null())
|
||||
return {};
|
||||
|
@ -1349,6 +1398,9 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
|
|||
|
||||
TimeZoneList time_zones;
|
||||
|
||||
TimeZoneFormat time_zone_formats {};
|
||||
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
|
||||
|
||||
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
|
||||
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
|
||||
TimeZone time_zone {};
|
||||
|
@ -1383,6 +1435,8 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
|
|||
parse_time_zone("UTC"sv, utc_object.as_object());
|
||||
|
||||
locale.time_zones = locale_data.unique_time_zone_lists.ensure(move(time_zones));
|
||||
locale.time_zone_formats = locale_data.unique_time_zone_formats.ensure(move(time_zone_formats));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -1689,6 +1743,10 @@ struct TimeZoneData {
|
|||
@string_index_type@ short_name { 0 };
|
||||
};
|
||||
|
||||
struct TimeZoneFormat {
|
||||
@string_index_type@ gmt_zero_format { 0 };
|
||||
};
|
||||
|
||||
struct DayPeriodData {
|
||||
u8 day_period { 0 };
|
||||
u8 begin { 0 };
|
||||
|
@ -1703,6 +1761,7 @@ struct DayPeriodData {
|
|||
locale_data.unique_calendars.generate(generator, "CalendarData"sv, "s_calendars"sv, 10);
|
||||
locale_data.unique_time_zones.generate(generator, "TimeZoneData"sv, "s_time_zones"sv, 30);
|
||||
locale_data.unique_time_zone_lists.generate(generator, s_time_zone_index_type, "s_time_zone_lists"sv);
|
||||
locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormat"sv, "s_time_zone_formats"sv, 30);
|
||||
locale_data.unique_day_periods.generate(generator, "DayPeriodData"sv, "s_day_periods"sv, 30);
|
||||
locale_data.unique_day_period_lists.generate(generator, s_day_period_index_type, "s_day_period_lists"sv);
|
||||
locale_data.unique_hour_cycle_lists.generate(generator, "u8"sv, "s_hour_cycle_lists"sv);
|
||||
|
@ -1752,6 +1811,7 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
|
|||
|
||||
generate_mapping(generator, locale_data.locales, s_calendar_index_type, "s_locale_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); });
|
||||
append_mapping(locales, locale_data.locales, s_time_zone_index_type, "s_locale_time_zones"sv, [](auto const& locale) { return locale.time_zones; });
|
||||
append_mapping(locales, locale_data.locales, s_time_zone_format_index_type, "s_locale_time_zone_formats"sv, [](auto const& locale) { return locale.time_zone_formats; });
|
||||
append_mapping(locales, locale_data.locales, s_day_period_index_type, "s_locale_day_periods"sv, [](auto const& locale) { return locale.day_periods; });
|
||||
append_mapping(locale_data.hour_cycle_regions, locale_data.hour_cycles, s_hour_cycle_list_index_type, "s_hour_cycles"sv, [](auto const& hour_cycles) { return hour_cycles; });
|
||||
generator.append("\n");
|
||||
|
@ -2011,6 +2071,18 @@ Optional<StringView> get_calendar_day_period_symbol_for_hour(StringView locale,
|
|||
return Detail::get_calendar_day_period_symbol(locale, calendar, style, Unicode::DayPeriod::PM);
|
||||
}
|
||||
|
||||
static TimeZoneFormat const* find_time_zone_formats(StringView locale)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return nullptr;
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
|
||||
auto time_zone_format_index = s_locale_time_zone_formats.at(locale_index);
|
||||
return &s_time_zone_formats.at(time_zone_format_index);
|
||||
}
|
||||
|
||||
static TimeZoneData const* find_time_zone_data(StringView locale, StringView time_zone)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
|
@ -2034,23 +2106,21 @@ static TimeZoneData const* find_time_zone_data(StringView locale, StringView tim
|
|||
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style) asm("unicode_get_time_zone_name");
|
||||
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style)
|
||||
{
|
||||
if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) {
|
||||
@string_index_type@ time_zone_index = 0;
|
||||
// FIXME: This becomes more complicated when time zones other than UTC are supported. We will need to know the GMT offset
|
||||
// of each time zone (which must be parsed from the time zone database, not the CLDR). For now, assuming UTC means
|
||||
// we can assume a GMT offset of 0, for which the CLDR has a specific format string for the offset styles. Further,
|
||||
// we will need to parse the "generic" time zone names from timeZoneNames.json.
|
||||
VERIFY(time_zone == "UTC"sv);
|
||||
|
||||
switch (style) {
|
||||
case CalendarPatternStyle::Long:
|
||||
time_zone_index = data->long_name;
|
||||
break;
|
||||
case CalendarPatternStyle::Short:
|
||||
time_zone_index = data->short_name;
|
||||
break;
|
||||
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
if ((style == CalendarPatternStyle::Long) || (style == CalendarPatternStyle::Short)) {
|
||||
if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) {
|
||||
auto time_zone_index = style == CalendarPatternStyle::Long ? data->long_name : data->short_name;
|
||||
if (time_zone_index != 0)
|
||||
return s_string_list[time_zone_index];
|
||||
}
|
||||
|
||||
if (time_zone_index != 0)
|
||||
return s_string_list[time_zone_index];
|
||||
} else {
|
||||
if (auto const* formats = find_time_zone_formats(locale); formats != nullptr)
|
||||
return s_string_list[formats->gmt_zero_format];
|
||||
}
|
||||
|
||||
return {};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -53,6 +53,14 @@ CalendarPatternStyle calendar_pattern_style_from_string(StringView style)
|
|||
return CalendarPatternStyle::Numeric;
|
||||
if (style == "2-digit"sv)
|
||||
return CalendarPatternStyle::TwoDigit;
|
||||
if (style == "shortOffset"sv)
|
||||
return CalendarPatternStyle::ShortOffset;
|
||||
if (style == "longOffset"sv)
|
||||
return CalendarPatternStyle::LongOffset;
|
||||
if (style == "shortGeneric"sv)
|
||||
return CalendarPatternStyle::ShortGeneric;
|
||||
if (style == "longGeneric"sv)
|
||||
return CalendarPatternStyle::LongGeneric;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
|
@ -69,6 +77,14 @@ StringView calendar_pattern_style_to_string(CalendarPatternStyle style)
|
|||
return "numeric"sv;
|
||||
case CalendarPatternStyle::TwoDigit:
|
||||
return "2-digit"sv;
|
||||
case CalendarPatternStyle::ShortOffset:
|
||||
return "shortOffset"sv;
|
||||
case CalendarPatternStyle::LongOffset:
|
||||
return "longOffset"sv;
|
||||
case CalendarPatternStyle::ShortGeneric:
|
||||
return "shortGeneric"sv;
|
||||
case CalendarPatternStyle::LongGeneric:
|
||||
return "longGeneric"sv;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -71,6 +71,10 @@ enum class CalendarPatternStyle : u8 {
|
|||
Long,
|
||||
Numeric,
|
||||
TwoDigit,
|
||||
ShortOffset,
|
||||
LongOffset,
|
||||
ShortGeneric,
|
||||
LongGeneric,
|
||||
};
|
||||
|
||||
struct CalendarPattern {
|
||||
|
|
Loading…
Reference in a new issue