diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index c041577ef54..794a674d98b 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -191,7 +191,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) Lagom::GenerateUnicodeDateTimeFormat "${UNICODE_DATE_TIME_FORMAT_HEADER}" "${UNICODE_DATE_TIME_FORMAT_IMPLEMENTATION}" - arguments -d "${CLDR_DATES_PATH}" + arguments -r "${CLDR_CORE_PATH}" -d "${CLDR_DATES_PATH}" ) invoke_generator( "UnicodeLocale" diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index f3fa4be49bc..27005716761 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -49,6 +49,9 @@ struct UnicodeLocaleData { UniqueStringStorage unique_strings; HashMap locales; + HashMap> hour_cycles; + Vector hour_cycle_regions; + Vector calendars; Vector calendar_aliases { // FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158 @@ -58,6 +61,50 @@ struct UnicodeLocaleData { size_t max_available_formats_size { 0 }; }; +static ErrorOr parse_hour_cycles(String core_path, UnicodeLocaleData& locale_data) +{ + // https://unicode.org/reports/tr35/tr35-dates.html#Time_Data + LexicalPath time_data_path(move(core_path)); + time_data_path = time_data_path.append("supplemental"sv); + time_data_path = time_data_path.append("timeData.json"sv); + + auto time_data_file = TRY(Core::File::open(time_data_path.string(), Core::OpenMode::ReadOnly)); + auto time_data = TRY(JsonValue::from_string(time_data_file->read_all())); + auto const& supplemental_object = time_data.as_object().get("supplemental"sv); + auto const& time_data_object = supplemental_object.as_object().get("timeData"sv); + + auto parse_hour_cycle = [](StringView hour_cycle) -> Optional { + if (hour_cycle == "h"sv) + return Unicode::HourCycle::H12; + if (hour_cycle == "H"sv) + return Unicode::HourCycle::H23; + if (hour_cycle == "K"sv) + return Unicode::HourCycle::H11; + if (hour_cycle == "k"sv) + return Unicode::HourCycle::H24; + return {}; + }; + + time_data_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { + auto allowed_hour_cycles_string = value.as_object().get("_allowed"sv).as_string(); + auto allowed_hour_cycles = allowed_hour_cycles_string.split_view(' '); + + Vector hour_cycles; + + for (auto allowed_hour_cycle : allowed_hour_cycles) { + if (auto hour_cycle = parse_hour_cycle(allowed_hour_cycle); hour_cycle.has_value()) + hour_cycles.append(*hour_cycle); + } + + locale_data.hour_cycles.set(key, move(hour_cycles)); + + if (!locale_data.hour_cycle_regions.contains_slow(key)) + locale_data.hour_cycle_regions.append(key); + }); + + return {}; +}; + static void parse_date_time_pattern(CalendarPattern& format, String pattern, UnicodeLocaleData& locale_data) { // FIXME: This is very incomplete. Similar to NumberFormat, the pattern string will need to be @@ -131,8 +178,9 @@ static ErrorOr parse_calendars(String locale_calendars_path, UnicodeLocale return {}; } -static ErrorOr parse_all_locales(String dates_path, UnicodeLocaleData& locale_data) +static ErrorOr parse_all_locales(String core_path, String dates_path, UnicodeLocaleData& locale_data) { + TRY(parse_hour_cycles(move(core_path), locale_data)); auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); auto remove_variants_from_path = [&](String path) -> ErrorOr { @@ -164,9 +212,15 @@ static ErrorOr parse_all_locales(String dates_path, UnicodeLocaleData& loc return {}; } -static String format_identifier(StringView, StringView identifier) +static String format_identifier(StringView owner, String identifier) { - return identifier.to_titlecase_string(); + identifier = identifier.replace("-"sv, "_"sv, true); + + if (all_of(identifier, is_ascii_digit)) + return String::formatted("{}_{}", owner[0], identifier); + if (is_ascii_lower_alpha(identifier[0])) + return String::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); + return identifier; } static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data) @@ -185,11 +239,16 @@ namespace Unicode { )~~~"); generate_enum(generator, format_identifier, "Calendar"sv, {}, locale_data.calendars, locale_data.calendar_aliases); + generate_enum(generator, format_identifier, "HourCycleRegion"sv, {}, locale_data.hour_cycle_regions); generator.append(R"~~~( namespace Detail { Optional calendar_from_string(StringView calendar); + +Optional hour_cycle_region_from_string(StringView hour_cycle_region); +Vector get_regional_hour_cycles(StringView region); + Optional get_calendar_date_format(StringView locale, StringView calendar); Optional get_calendar_time_format(StringView locale, StringView calendar); Optional get_calendar_date_time_format(StringView locale, StringView calendar); @@ -313,9 +372,25 @@ static constexpr Array @name@ { {)~~~"); )~~~"); }; - generate_mapping(generator, locale_data.locales, "CalendarData"sv, "s_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); }); + auto append_hour_cycles = [&](String name, auto const& hour_cycles) { + generator.set("name", name); + generator.set("size", String::number(hour_cycles.size())); - auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, auto const& aliases) { + generator.append(R"~~~( +static constexpr Array @name@ { { )~~~"); + + for (auto hour_cycle : hour_cycles) { + generator.set("hour_cycle", String::number(static_cast(hour_cycle))); + generator.append("@hour_cycle@, "); + } + + generator.append("} };"); + }; + + generate_mapping(generator, locale_data.locales, "CalendarData"sv, "s_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); }); + generate_mapping(generator, locale_data.hour_cycles, "u8"sv, "s_hour_cycles"sv, "s_hour_cycles_{}", [&](auto const& name, auto const& value) { append_hour_cycles(name, value); }); + + auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector const& aliases = {}) { HashValueMap hashes; hashes.ensure_capacity(values.size()); @@ -328,8 +403,27 @@ static constexpr Array @name@ { {)~~~"); }; append_from_string("Calendar"sv, "calendar"sv, locale_data.calendars, locale_data.calendar_aliases); + append_from_string("HourCycleRegion"sv, "hour_cycle_region"sv, locale_data.hour_cycle_regions); generator.append(R"~~~( +Vector get_regional_hour_cycles(StringView region) +{ + auto region_value = hour_cycle_region_from_string(region); + if (!region_value.has_value()) + return {}; + + auto region_index = to_underlying(*region_value); + auto const& regional_hour_cycles = s_hour_cycles.at(region_index); + + Vector hour_cycles; + hour_cycles.ensure_capacity(regional_hour_cycles.size()); + + for (auto hour_cycle : regional_hour_cycles) + hour_cycles.unchecked_append(static_cast(hour_cycle)); + + return hour_cycles; +} + static CalendarData const* find_calendar_data(StringView locale, StringView calendar) { auto locale_value = locale_from_string(locale); @@ -392,11 +486,13 @@ ErrorOr serenity_main(Main::Arguments arguments) { StringView generated_header_path; StringView generated_implementation_path; + StringView core_path; StringView dates_path; Core::ArgsParser args_parser; args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); + args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path"); args_parser.parse(arguments); @@ -413,7 +509,7 @@ ErrorOr serenity_main(Main::Arguments arguments) auto generated_implementation_file = TRY(open_file(generated_implementation_path)); UnicodeLocaleData locale_data; - TRY(parse_all_locales(dates_path, locale_data)); + TRY(parse_all_locales(core_path, dates_path, locale_data)); generate_unicode_locale_header(generated_header_file, locale_data); generate_unicode_locale_implementation(generated_implementation_file, locale_data); diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp index 2df9b8f2dc9..b63908d39fa 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp @@ -5,6 +5,7 @@ */ #include +#include #if ENABLE_UNICODE_DATA # include @@ -12,6 +13,35 @@ namespace Unicode { +HourCycle hour_cycle_from_string(StringView hour_cycle) +{ + if (hour_cycle == "h11"sv) + return Unicode::HourCycle::H11; + else if (hour_cycle == "h12"sv) + return Unicode::HourCycle::H12; + else if (hour_cycle == "h23"sv) + return Unicode::HourCycle::H23; + else if (hour_cycle == "h24"sv) + return Unicode::HourCycle::H24; + VERIFY_NOT_REACHED(); +} + +StringView hour_cycle_to_string(HourCycle hour_cycle) +{ + switch (hour_cycle) { + case HourCycle::H11: + return "h11"sv; + case HourCycle::H12: + return "h12"sv; + case HourCycle::H23: + return "h23"sv; + case HourCycle::H24: + return "h24"sv; + default: + VERIFY_NOT_REACHED(); + } +} + CalendarPatternStyle calendar_pattern_style_from_string(StringView style) { if (style == "narrow"sv) @@ -45,6 +75,44 @@ StringView calendar_pattern_style_to_string(CalendarPatternStyle style) } } +// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table +Vector get_regional_hour_cycles([[maybe_unused]] StringView locale) +{ +#if ENABLE_UNICODE_DATA + if (auto hour_cycles = Detail::get_regional_hour_cycles(locale); !hour_cycles.is_empty()) + return hour_cycles; + + auto return_default_hour_cycles = []() { + auto hour_cycles = Detail::get_regional_hour_cycles("001"sv); + VERIFY(!hour_cycles.is_empty()); + return hour_cycles; + }; + + auto language = parse_unicode_language_id(locale); + if (!language.has_value()) + return return_default_hour_cycles(); + + if (!language->region.has_value()) + language = add_likely_subtags(*language); + if (!language.has_value() || !language->region.has_value()) + return return_default_hour_cycles(); + + if (auto hour_cycles = Detail::get_regional_hour_cycles(*language->region); !hour_cycles.is_empty()) + return hour_cycles; + + return return_default_hour_cycles(); +#else + return {}; +#endif +} + +Optional get_default_regional_hour_cycle(StringView locale) +{ + if (auto hour_cycles = get_regional_hour_cycles(locale); !hour_cycles.is_empty()) + return hour_cycles.first(); + return {}; +} + Optional get_calendar_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView calendar, [[maybe_unused]] CalendarFormatType type) { #if ENABLE_UNICODE_DATA diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.h b/Userland/Libraries/LibUnicode/DateTimeFormat.h index 3faa31d0b55..0bb2abb27e0 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.h +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.h @@ -15,6 +15,13 @@ namespace Unicode { +enum class HourCycle : u8 { + H11, + H12, + H23, + H24, +}; + enum class CalendarPatternStyle : u8 { Narrow, Short, @@ -54,8 +61,12 @@ struct CalendarFormat { CalendarPattern short_format {}; }; +HourCycle hour_cycle_from_string(StringView hour_cycle); +StringView hour_cycle_to_string(HourCycle hour_cycle); CalendarPatternStyle calendar_pattern_style_from_string(StringView style); StringView calendar_pattern_style_to_string(CalendarPatternStyle style); +Vector get_regional_hour_cycles(StringView locale); +Optional get_default_regional_hour_cycle(StringView locale); Optional get_calendar_format(StringView locale, StringView calendar, CalendarFormatType type); Vector get_calendar_available_formats(StringView locale, StringView calendar); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 7db7514e8ac..88885086875 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -15,6 +15,7 @@ enum class CalendarPatternStyle : u8; enum class CompactNumberFormatType : u8; enum class Condition : u8; enum class GeneralCategory : u8; +enum class HourCycle : u8; enum class Language : u8; enum class ListPatternStyle : u8; enum class ListPatternType : u8;