diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake index 9ef4b83f35d..ec3ce56c1b4 100644 --- a/Meta/CMake/locale_data.cmake +++ b/Meta/CMake/locale_data.cmake @@ -21,9 +21,6 @@ set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}") set(CLDR_LOCALES_SOURCE cldr-localenames-modern) set(CLDR_LOCALES_PATH "${CLDR_PATH}/${CLDR_LOCALES_SOURCE}") -set(CLDR_MISC_SOURCE cldr-misc-modern) -set(CLDR_MISC_PATH "${CLDR_PATH}/${CLDR_MISC_SOURCE}") - set(CLDR_NUMBERS_SOURCE cldr-numbers-modern) set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}") @@ -39,7 +36,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}") - extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_MISC_SOURCE}/**" "${CLDR_MISC_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_UNITS_SOURCE}/**" "${CLDR_UNITS_PATH}") else() @@ -75,7 +71,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${CLDR_VERSION_FILE}" "${LOCALE_DATA_HEADER}" "${LOCALE_DATA_IMPLEMENTATION}" - arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -m "${CLDR_MISC_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" + arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" ) invoke_generator( "NumberFormatData" diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp index e57f2d9c709..6e27af0ded8 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp @@ -33,72 +33,19 @@ static ByteString format_identifier(StringView owner, ByteString identifier) return identifier; } -struct ListPatterns { - unsigned hash() const - { - auto hash = pair_int_hash(type.hash(), style.hash()); - hash = pair_int_hash(hash, start); - hash = pair_int_hash(hash, middle); - hash = pair_int_hash(hash, end); - hash = pair_int_hash(hash, pair); - return hash; - } - - bool operator==(ListPatterns const& other) const - { - return (type == other.type) - && (style == other.style) - && (start == other.start) - && (middle == other.middle) - && (end == other.end) - && (pair == other.pair); - } - - StringView type; - StringView style; - size_t start { 0 }; - size_t middle { 0 }; - size_t end { 0 }; - size_t pair { 0 }; -}; - -template<> -struct AK::Formatter : Formatter { - ErrorOr format(FormatBuilder& builder, ListPatterns const& patterns) - { - return Formatter::format(builder, - "{{ ListPatternType::{}, Style::{}, {}, {}, {}, {} }}"sv, - format_identifier({}, patterns.type), - format_identifier({}, patterns.style), - patterns.start, - patterns.middle, - patterns.end, - patterns.pair); - } -}; - -template<> -struct AK::Traits : public DefaultTraits { - static unsigned hash(ListPatterns const& p) { return p.hash(); } -}; - using KeywordList = Vector; -using ListPatternList = Vector; struct LocaleData { size_t calendar_keywords { 0 }; size_t collation_case_keywords { 0 }; size_t collation_numeric_keywords { 0 }; size_t number_system_keywords { 0 }; - size_t list_patterns { 0 }; size_t text_layout { 0 }; }; struct CLDR { UniqueStringStorage unique_strings; UniqueStorage unique_keyword_lists; - UniqueStorage unique_list_patterns; - UniqueStorage unique_list_pattern_lists; HashMap locales; Vector locale_aliases; @@ -106,8 +53,6 @@ struct CLDR { HashMap> keywords; HashMap> keyword_aliases; HashMap keyword_names; - - Vector list_pattern_types; }; // Some parsing is expected to fail. For example, the CLDR contains language mappings @@ -200,57 +145,6 @@ static Optional find_keyword_alias(StringView key, StringView calend return alias->name; } -static ErrorOr parse_locale_list_patterns(ByteString misc_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath list_patterns_path(move(misc_path)); - list_patterns_path = list_patterns_path.append("listPatterns.json"sv); - - auto locale_list_patterns = TRY(read_json_file(list_patterns_path.string())); - auto const& main_object = locale_list_patterns.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(list_patterns_path.parent().basename()).value(); - auto const& list_patterns_object = locale_object.get_object("listPatterns"sv).value(); - - auto list_pattern_type = [](StringView key) { - if (key.contains("type-standard"sv)) - return "conjunction"sv; - if (key.contains("type-or"sv)) - return "disjunction"sv; - if (key.contains("type-unit"sv)) - return "unit"sv; - VERIFY_NOT_REACHED(); - }; - - auto list_pattern_style = [](StringView key) { - if (key.contains("short"sv)) - return "short"sv; - if (key.contains("narrow"sv)) - return "narrow"sv; - return "long"sv; - }; - - ListPatternList list_patterns; - list_patterns.ensure_capacity(list_patterns_object.size()); - - list_patterns_object.for_each_member([&](auto const& key, JsonValue const& value) { - auto type = list_pattern_type(key); - auto style = list_pattern_style(key); - - auto start = cldr.unique_strings.ensure(value.as_object().get_byte_string("start"sv).value()); - auto middle = cldr.unique_strings.ensure(value.as_object().get_byte_string("middle"sv).value()); - auto end = cldr.unique_strings.ensure(value.as_object().get_byte_string("end"sv).value()); - auto pair = cldr.unique_strings.ensure(value.as_object().get_byte_string("2"sv).value()); - - if (!cldr.list_pattern_types.contains_slow(type)) - cldr.list_pattern_types.append(type); - - ListPatterns list_pattern { type, style, start, middle, end, pair }; - list_patterns.append(cldr.unique_list_patterns.ensure(move(list_pattern))); - }); - - locale.list_patterns = cldr.unique_list_pattern_lists.ensure(move(list_patterns)); - return {}; -} - static ErrorOr parse_number_system_keywords(ByteString locale_numbers_path, CLDR& cldr, LocaleData& locale) { LexicalPath numbers_path(move(locale_numbers_path)); @@ -430,7 +324,7 @@ static ErrorOr define_aliases_without_scripts(CLDR& cldr) return {}; } -static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString misc_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr) +static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr) { LexicalPath core_supplemental_path(core_path); core_supplemental_path = core_supplemental_path.append("supplemental"sv); @@ -455,15 +349,6 @@ static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_pa return IterationDecision::Continue; })); - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", misc_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto misc_path = LexicalPath::join(directory.path().string(), entry.name).string(); - auto language = TRY(remove_variants_from_path(misc_path)); - - auto& locale = cldr.locales.ensure(language); - TRY(parse_locale_list_patterns(misc_path, cldr, locale)); - return IterationDecision::Continue; - })); - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string(); auto language = TRY(remove_variants_from_path(numbers_path)); @@ -506,7 +391,6 @@ namespace Locale { auto keywords = cldr.keywords.keys(); generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, cldr.locale_aliases); - generate_enum(generator, format_identifier, "ListPatternType"sv, {}, cldr.list_pattern_types); generate_enum(generator, format_identifier, "Key"sv, {}, keywords); for (auto& keyword : cldr.keywords) { @@ -554,17 +438,6 @@ namespace Locale { cldr.unique_strings.generate(generator); - generator.append(R"~~~( -struct Patterns { - ListPatternType type; - Style style; - @string_index_type@ start { 0 }; - @string_index_type@ middle { 0 }; - @string_index_type@ end { 0 }; - @string_index_type@ pair { 0 }; -}; -)~~~"); - generate_available_values(generator, "get_available_calendars"sv, cldr.keywords.find("ca"sv)->value, cldr.keyword_aliases.find("ca"sv)->value, [](auto calendar) { // FIXME: Remove this filter when we support all calendars. @@ -607,8 +480,6 @@ ReadonlySpan get_available_keyword_values(StringView key) )~~~"); cldr.unique_keyword_lists.generate(generator, string_index_type, "s_keyword_lists"sv); - cldr.unique_list_patterns.generate(generator, "Patterns"sv, "s_list_patterns"sv, 10); - cldr.unique_list_pattern_lists.generate(generator, cldr.unique_list_patterns.type_that_fits(), "s_list_pattern_lists"sv); auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) { generator.set("type", type); @@ -638,7 +509,6 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_case_keywords"sv, [&](auto const& locale) { return locale.collation_case_keywords; }); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_numeric_keywords"sv, [&](auto const& locale) { return locale.collation_numeric_keywords; }); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_number_system_keywords"sv, [&](auto const& locale) { return locale.number_system_keywords; }); - append_mapping(locales, cldr.locales, cldr.unique_list_pattern_lists.type_that_fits(), "s_locale_list_patterns"sv, [&](auto const& locale) { return locale.list_patterns; }); auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector const& aliases = {}) -> ErrorOr { HashValueMap hashes; @@ -668,8 +538,6 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); TRY(append_from_string(enum_name, enum_snake, keyword.value)); } - TRY(append_from_string("ListPatternType"sv, "list_pattern_type"sv, cldr.list_pattern_types)); - generator.append(R"~~~( static ReadonlySpan<@string_index_type@> find_keyword_indices(StringView locale, StringView key) { @@ -765,37 +633,6 @@ Vector get_keywords_for_locale(StringView locale, StringView key) return keywords; } -Optional get_locale_list_patterns(StringView locale, StringView list_pattern_type, Style list_pattern_style) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return {}; - - auto type_value = list_pattern_type_from_string(list_pattern_type); - if (!type_value.has_value()) - return {}; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - - auto list_patterns_list_index = s_locale_list_patterns.at(locale_index); - auto const& locale_list_patterns = s_list_pattern_lists.at(list_patterns_list_index); - - for (auto list_patterns_index : locale_list_patterns) { - auto const& list_patterns = s_list_patterns.at(list_patterns_index); - - if ((list_patterns.type == type_value) && (list_patterns.style == list_pattern_style)) { - auto const& start = decode_string(list_patterns.start); - auto const& middle = decode_string(list_patterns.middle); - auto const& end = decode_string(list_patterns.end); - auto const& pair = decode_string(list_patterns.pair); - - return ListPatterns { start, middle, end, pair }; - } - } - - return {}; -} - } )~~~"); @@ -809,7 +646,6 @@ ErrorOr serenity_main(Main::Arguments arguments) StringView generated_implementation_path; StringView bcp47_path; StringView core_path; - StringView misc_path; StringView numbers_path; StringView dates_path; @@ -818,7 +654,6 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); args_parser.add_option(bcp47_path, "Path to cldr-bcp47 directory", "bcp47-path", 'b', "bcp47-path"); args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); - args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path"); args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path"); args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path"); args_parser.parse(arguments); @@ -827,7 +662,7 @@ ErrorOr serenity_main(Main::Arguments arguments) auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); CLDR cldr; - TRY(parse_all_locales(bcp47_path, core_path, misc_path, numbers_path, dates_path, cldr)); + TRY(parse_all_locales(bcp47_path, core_path, numbers_path, dates_path, cldr)); TRY(generate_unicode_locale_header(*generated_header_file, cldr)); TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.cpp index 471f52d62fd..073f1719f49 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.cpp @@ -340,7 +340,7 @@ ThrowCompletionOr get_duration_unit_options(VM& vm, String } // 1.1.7 PartitionDurationFormatPattern ( durationFormat, duration ), https://tc39.es/proposal-intl-duration-format/#sec-partitiondurationformatpattern -Vector partition_duration_format_pattern(VM& vm, DurationFormat const& duration_format, Temporal::DurationRecord const& duration) +Vector<::Locale::ListFormatPart> partition_duration_format_pattern(VM& vm, DurationFormat const& duration_format, Temporal::DurationRecord const& duration) { auto& realm = *vm.current_realm(); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.h b/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.h index 2eb5df9dbb6..d7c4fed7bcf 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/DurationFormat.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, Idan Horowitz - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -12,6 +12,7 @@ #include #include #include +#include namespace JS::Intl { @@ -226,6 +227,6 @@ ThrowCompletionOr to_duration_record(VM&, Value input) i8 duration_record_sign(Temporal::DurationRecord const&); bool is_valid_duration_record(Temporal::DurationRecord const&); ThrowCompletionOr get_duration_unit_options(VM&, String const& unit, Object const& options, StringView base_style, ReadonlySpan styles_list, StringView digital_base, StringView previous_style); -Vector partition_duration_format_pattern(VM&, DurationFormat const&, Temporal::DurationRecord const& duration); +Vector<::Locale::ListFormatPart> partition_duration_format_pattern(VM&, DurationFormat const&, Temporal::DurationRecord const& duration); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.cpp index ecc75d51a73..d578ebf2374 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -9,6 +9,7 @@ #include #include #include +#include namespace JS::Intl { @@ -20,184 +21,21 @@ ListFormat::ListFormat(Object& prototype) { } -void ListFormat::set_type(StringView type) -{ - if (type == "conjunction"sv) { - m_type = Type::Conjunction; - } else if (type == "disjunction"sv) { - m_type = Type::Disjunction; - } else if (type == "unit"sv) { - m_type = Type::Unit; - } else { - VERIFY_NOT_REACHED(); - } -} - -StringView ListFormat::type_string() const -{ - switch (m_type) { - case Type::Conjunction: - return "conjunction"sv; - case Type::Disjunction: - return "disjunction"sv; - case Type::Unit: - return "unit"sv; - default: - VERIFY_NOT_REACHED(); - } -} - -// 13.5.1 DeconstructPattern ( pattern, placeables ), https://tc39.es/ecma402/#sec-deconstructpattern -Vector deconstruct_pattern(StringView pattern, Placeables placeables) -{ - // 1. Let patternParts be ! PartitionPattern(pattern). - auto pattern_parts = partition_pattern(pattern); - - // 2. Let result be a new empty List. - Vector result {}; - - // 3. For each Record { [[Type]], [[Value]] } patternPart of patternParts, do - for (auto& pattern_part : pattern_parts) { - // a. Let part be patternPart.[[Type]]. - auto part = pattern_part.type; - - // b. If part is "literal", then - if (part == "literal"sv) { - // i. Append Record { [[Type]]: "literal", [[Value]]: patternPart.[[Value]] } to result. - result.append({ part, move(pattern_part.value) }); - } - // c. Else, - else { - // i. Assert: placeables has a field [[]]. - // ii. Let subst be placeables.[[]]. - auto subst = placeables.get(part); - VERIFY(subst.has_value()); - - subst.release_value().visit( - // iii. If Type(subst) is List, then - [&](Vector& partition) { - // 1. For each element s of subst, do - // a. Append s to result. - result.extend(move(partition)); - }, - // iv. Else, - [&](PatternPartition& partition) { - // 1. Append subst to result. - result.append(move(partition)); - }); - } - } - - // 4. Return result. - return result; -} - // 13.5.2 CreatePartsFromList ( listFormat, list ), https://tc39.es/ecma402/#sec-createpartsfromlist -Vector create_parts_from_list(ListFormat const& list_format, Vector const& list) +Vector<::Locale::ListFormatPart> create_parts_from_list(ListFormat const& list_format, Vector const& list) { - auto list_patterns = ::Locale::get_locale_list_patterns(list_format.locale(), list_format.type_string(), list_format.style()); - if (!list_patterns.has_value()) - return {}; - - // 1. Let size be the number of elements of list. - auto size = list.size(); - - // 2. If size is 0, then - if (size == 0) { - // a. Return a new empty List. - return {}; - } - - // 3. If size is 2, then - if (size == 2) { - // a. Let n be an index into listFormat.[[Templates]] based on listFormat.[[Locale]], list[0], and list[1]. - // b. Let pattern be listFormat.[[Templates]][n].[[Pair]]. - auto pattern = list_patterns->pair; - - // c. Let first be a new Record { [[Type]]: "element", [[Value]]: list[0] }. - PatternPartition first { "element"sv, list[0] }; - - // d. Let second be a new Record { [[Type]]: "element", [[Value]]: list[1] }. - PatternPartition second { "element"sv, list[1] }; - - // e. Let placeables be a new Record { [[0]]: first, [[1]]: second }. - Placeables placeables; - placeables.set("0"sv, move(first)); - placeables.set("1"sv, move(second)); - - // f. Return ! DeconstructPattern(pattern, placeables). - return deconstruct_pattern(pattern, move(placeables)); - } - - // 4. Let last be a new Record { [[Type]]: "element", [[Value]]: list[size - 1] }. - PatternPartition last { "element"sv, list[size - 1] }; - - // 5. Let parts be « last ». - Vector parts { move(last) }; - - // The spec does not say to do this, but because size_t is unsigned, we need to take care not to wrap around 0. - if (size == 1) - return parts; - - // 6. Let i be size - 2. - size_t i = size - 2; - - // 7. Repeat, while i ≥ 0, - do { - // a. Let head be a new Record { [[Type]]: "element", [[Value]]: list[i] }. - PatternPartition head { "element"sv, list[i] }; - - // b. Let n be an implementation-defined index into listFormat.[[Templates]] based on listFormat.[[Locale]], head, and parts. - StringView pattern; - - // c. If i is 0, then - if (i == 0) { - // i. Let pattern be listFormat.[[Templates]][n].[[Start]]. - pattern = list_patterns->start; - } - // d. Else if i is less than size - 2, then - else if (i < (size - 2)) { - // i. Let pattern be listFormat.[[Templates]][n].[[Middle]]. - pattern = list_patterns->middle; - } - // e. Else, - else { - // i. Let pattern be listFormat.[[Templates]][n].[[End]]. - pattern = list_patterns->end; - } - - // f. Let placeables be a new Record { [[0]]: head, [[1]]: parts }. - Placeables placeables; - placeables.set("0"sv, move(head)); - placeables.set("1"sv, move(parts)); - - // g. Set parts to ! DeconstructPattern(pattern, placeables). - parts = deconstruct_pattern(pattern, move(placeables)); - - // h. Decrement i by 1. - } while (i-- != 0); - - // 8. Return parts. - return parts; + return ::Locale::format_list_to_parts(list_format.locale(), list_format.type(), list_format.style(), list); } // 13.5.3 FormatList ( listFormat, list ), https://tc39.es/ecma402/#sec-formatlist String format_list(ListFormat const& list_format, Vector const& list) { // 1. Let parts be ! CreatePartsFromList(listFormat, list). - auto parts = create_parts_from_list(list_format, list); - // 2. Let result be an empty String. - StringBuilder result; - // 3. For each Record { [[Type]], [[Value]] } part in parts, do - for (auto& part : parts) { - // a. Set result to the string-concatenation of result and part.[[Value]]. - result.append(part.value); - } - + // a. Set result to the string-concatenation of result and part.[[Value]]. // 4. Return result. - return MUST(result.to_string()); + return ::Locale::format_list(list_format.locale(), list_format.type(), list_format.style(), list); } // 13.5.4 FormatListToParts ( listFormat, list ), https://tc39.es/ecma402/#sec-formatlisttoparts diff --git a/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.h b/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.h index 3dce50ceb79..75ff3c9a3ee 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/ListFormat.h @@ -1,18 +1,17 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once -#include #include #include -#include #include #include #include +#include #include namespace JS::Intl { @@ -34,9 +33,9 @@ public: String const& locale() const { return m_locale; } void set_locale(String locale) { m_locale = move(locale); } - Type type() const { return m_type; } - void set_type(StringView type); - StringView type_string() const; + ::Locale::ListFormatType type() const { return m_type; } + void set_type(StringView type) { m_type = ::Locale::list_format_type_from_string(type); } + StringView type_string() const { return ::Locale::list_format_type_to_string(m_type); } ::Locale::Style style() const { return m_style; } void set_style(StringView style) { m_style = ::Locale::style_from_string(style); } @@ -45,15 +44,12 @@ public: private: explicit ListFormat(Object& prototype); - String m_locale; // [[Locale]] - Type m_type { Type::Invalid }; // [[Type]] - ::Locale::Style m_style { ::Locale::Style::Long }; // [[Style]] + String m_locale; // [[Locale]] + ::Locale::ListFormatType m_type { ::Locale::ListFormatType::Conjunction }; // [[Type]] + ::Locale::Style m_style { ::Locale::Style::Long }; // [[Style]] }; -using Placeables = HashMap>>; - -Vector deconstruct_pattern(StringView pattern, Placeables); -Vector create_parts_from_list(ListFormat const&, Vector const& list); +Vector<::Locale::ListFormatPart> create_parts_from_list(ListFormat const&, Vector const& list); String format_list(ListFormat const&, Vector const& list); NonnullGCPtr format_list_to_parts(VM&, ListFormat const&, Vector const& list); ThrowCompletionOr> string_list_from_iterable(VM&, Value iterable); diff --git a/Userland/Libraries/LibLocale/CMakeLists.txt b/Userland/Libraries/LibLocale/CMakeLists.txt index a944cefdc97..df0ba54e170 100644 --- a/Userland/Libraries/LibLocale/CMakeLists.txt +++ b/Userland/Libraries/LibLocale/CMakeLists.txt @@ -13,6 +13,7 @@ set(SOURCES DateTimeFormat.cpp DisplayNames.cpp ICU.cpp + ListFormat.cpp Locale.cpp NumberFormat.cpp PluralRules.cpp diff --git a/Userland/Libraries/LibLocale/Forward.h b/Userland/Libraries/LibLocale/Forward.h index ee388387c93..f6abe2645c4 100644 --- a/Userland/Libraries/LibLocale/Forward.h +++ b/Userland/Libraries/LibLocale/Forward.h @@ -26,7 +26,6 @@ enum class KeywordColCaseFirst : u8; enum class KeywordColNumeric : u8; enum class KeywordHours : u8; enum class KeywordNumbers : u8; -enum class ListPatternType : u8; enum class Locale : u16; enum class MinimumDaysRegion : u8; enum class Month : u8; @@ -43,7 +42,7 @@ struct CalendarPattern; struct CalendarRangePattern; struct Keyword; struct LanguageID; -struct ListPatterns; +struct ListFormatPart; struct LocaleExtension; struct LocaleID; struct NumberFormat; diff --git a/Userland/Libraries/LibLocale/ICU.cpp b/Userland/Libraries/LibLocale/ICU.cpp index 35e2dc475fa..3c638eda89c 100644 --- a/Userland/Libraries/LibLocale/ICU.cpp +++ b/Userland/Libraries/LibLocale/ICU.cpp @@ -90,6 +90,20 @@ icu::StringPiece icu_string_piece(StringView string) return { string.characters_without_null_termination(), static_cast(string.length()) }; } +Vector icu_string_list(ReadonlySpan strings) +{ + Vector result; + result.ensure_capacity(strings.size()); + + for (auto const& string : strings) { + auto view = string.bytes_as_string_view(); + icu::UnicodeString icu_string(view.characters_without_null_termination(), static_cast(view.length())); + result.unchecked_append(move(icu_string)); + } + + return result; +} + String icu_string_to_string(icu::UnicodeString const& string) { return icu_string_to_string(string.getBuffer(), string.length()); diff --git a/Userland/Libraries/LibLocale/ICU.h b/Userland/Libraries/LibLocale/ICU.h index 99b998354ad..a45b3494732 100644 --- a/Userland/Libraries/LibLocale/ICU.h +++ b/Userland/Libraries/LibLocale/ICU.h @@ -61,6 +61,9 @@ static constexpr bool icu_failure(UErrorCode code) } icu::StringPiece icu_string_piece(StringView string); + +Vector icu_string_list(ReadonlySpan strings); + String icu_string_to_string(icu::UnicodeString const& string); String icu_string_to_string(UChar const*, i32 length); diff --git a/Userland/Libraries/LibLocale/ListFormat.cpp b/Userland/Libraries/LibLocale/ListFormat.cpp new file mode 100644 index 00000000000..5c5b117ef0b --- /dev/null +++ b/Userland/Libraries/LibLocale/ListFormat.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#define AK_DONT_REPLACE_STD + +#include +#include +#include + +#include + +namespace Locale { + +ListFormatType list_format_type_from_string(StringView list_format_type) +{ + if (list_format_type == "conjunction"sv) + return ListFormatType::Conjunction; + if (list_format_type == "disjunction"sv) + return ListFormatType::Disjunction; + if (list_format_type == "unit"sv) + return ListFormatType::Unit; + VERIFY_NOT_REACHED(); +} + +StringView list_format_type_to_string(ListFormatType list_format_type) +{ + switch (list_format_type) { + case ListFormatType::Conjunction: + return "conjunction"sv; + case ListFormatType::Disjunction: + return "disjunction"sv; + case ListFormatType::Unit: + return "unit"sv; + default: + VERIFY_NOT_REACHED(); + } +} + +static constexpr UListFormatterType icu_list_format_type(ListFormatType type) +{ + switch (type) { + case ListFormatType::Conjunction: + return ULISTFMT_TYPE_AND; + case ListFormatType::Disjunction: + return ULISTFMT_TYPE_OR; + case ListFormatType::Unit: + return ULISTFMT_TYPE_UNITS; + } + + VERIFY_NOT_REACHED(); +} + +static constexpr UListFormatterWidth icu_list_format_width(Style style) +{ + switch (style) { + case Style::Long: + return ULISTFMT_WIDTH_WIDE; + case Style::Short: + return ULISTFMT_WIDTH_SHORT; + case Style::Narrow: + return ULISTFMT_WIDTH_NARROW; + } + + VERIFY_NOT_REACHED(); +} + +static constexpr StringView icu_list_format_field_to_string(i32 field) +{ + switch (field) { + case ULISTFMT_LITERAL_FIELD: + return "literal"sv; + case ULISTFMT_ELEMENT_FIELD: + return "element"sv; + } + + VERIFY_NOT_REACHED(); +} + +struct FormatResult { + icu::FormattedList list; + icu::UnicodeString string; +}; + +static Optional format_list_impl(StringView locale, ListFormatType type, Style style, ReadonlySpan list) +{ + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + UErrorCode status = U_ZERO_ERROR; + + auto list_formatter = adopt_own(*icu::ListFormatter::createInstance(locale_data->locale(), icu_list_format_type(type), icu_list_format_width(style), status)); + if (icu_failure(status)) + return {}; + + auto icu_list = icu_string_list(list); + + auto formatted_list = list_formatter->formatStringsToValue(icu_list.data(), static_cast(icu_list.size()), status); + if (icu_failure(status)) + return {}; + + auto formatted_string = formatted_list.toString(status); + if (icu_failure(status)) + return {}; + + return FormatResult { move(formatted_list), move(formatted_string) }; +} + +String format_list(StringView locale, ListFormatType type, Style style, ReadonlySpan list) +{ + auto formatted = format_list_impl(locale, type, style, list); + if (!formatted.has_value()) + return {}; + + return icu_string_to_string(formatted->string); +} + +Vector format_list_to_parts(StringView locale, ListFormatType type, Style style, ReadonlySpan list) +{ + UErrorCode status = U_ZERO_ERROR; + + auto formatted = format_list_impl(locale, type, style, list); + if (!formatted.has_value()) + return {}; + + Vector result; + + icu::ConstrainedFieldPosition position; + position.constrainCategory(UFIELD_CATEGORY_LIST); + + while (static_cast(formatted->list.nextPosition(position, status)) && icu_success(status)) { + auto type = icu_list_format_field_to_string(position.getField()); + auto part = formatted->string.tempSubStringBetween(position.getStart(), position.getLimit()); + + result.empend(type, icu_string_to_string(part)); + } + + return result; +} + +} diff --git a/Userland/Libraries/LibLocale/ListFormat.h b/Userland/Libraries/LibLocale/ListFormat.h new file mode 100644 index 00000000000..dc173631b69 --- /dev/null +++ b/Userland/Libraries/LibLocale/ListFormat.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Locale { + +enum class ListFormatType { + Conjunction, + Disjunction, + Unit, +}; + +ListFormatType list_format_type_from_string(StringView list_format_type); +StringView list_format_type_to_string(ListFormatType list_format_type); + +struct ListFormatPart { + StringView type; + String value; +}; + +String format_list(StringView locale, ListFormatType, Style, ReadonlySpan list); +Vector format_list_to_parts(StringView locale, ListFormatType, Style, ReadonlySpan list); + +} diff --git a/Userland/Libraries/LibLocale/Locale.cpp b/Userland/Libraries/LibLocale/Locale.cpp index 59c87835a26..79b7dc4d36a 100644 --- a/Userland/Libraries/LibLocale/Locale.cpp +++ b/Userland/Libraries/LibLocale/Locale.cpp @@ -556,7 +556,6 @@ ReadonlySpan __attribute__((weak)) get_available_collation_types() { ReadonlySpan __attribute__((weak)) get_available_hour_cycles() { return {}; } ReadonlySpan __attribute__((weak)) get_available_number_systems() { return {}; } Optional __attribute__((weak)) locale_from_string(StringView) { return {}; } -Optional __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; } Optional __attribute__((weak)) key_from_string(StringView) { return {}; } Optional __attribute__((weak)) keyword_ca_from_string(StringView) { return {}; } Optional __attribute__((weak)) keyword_co_from_string(StringView) { return {}; } @@ -596,8 +595,6 @@ Vector available_currencies() return result; } -Optional __attribute__((weak)) get_locale_list_patterns(StringView, StringView, Style) { return {}; } - static void apply_extensions_to_locale(icu::Locale& locale, icu::Locale const& locale_with_extensions) { UErrorCode status = U_ZERO_ERROR; diff --git a/Userland/Libraries/LibLocale/Locale.h b/Userland/Libraries/LibLocale/Locale.h index 4fbe4e40798..830c943c6c0 100644 --- a/Userland/Libraries/LibLocale/Locale.h +++ b/Userland/Libraries/LibLocale/Locale.h @@ -84,18 +84,6 @@ enum class Style : u8 { Narrow, }; -struct DisplayPattern { - StringView locale_pattern; - StringView locale_separator; -}; - -struct ListPatterns { - StringView start; - StringView middle; - StringView end; - StringView pair; -}; - // Note: These methods only verify that the provided strings match the EBNF grammar of the // Unicode identifier subtag (i.e. no validation is done that the tags actually exist). constexpr bool is_unicode_language_subtag(StringView subtag) @@ -159,8 +147,6 @@ Style style_from_string(StringView style); StringView style_to_string(Style style); Optional locale_from_string(StringView locale); -Optional list_pattern_type_from_string(StringView list_pattern_type); - Optional key_from_string(StringView key); Optional keyword_ca_from_string(StringView ca); Optional keyword_co_from_string(StringView co); @@ -171,8 +157,6 @@ Optional keyword_nu_from_string(StringView nu); Vector get_keywords_for_locale(StringView locale, StringView key); Optional get_preferred_keyword_value_for_locale(StringView locale, StringView key); -Optional get_locale_list_patterns(StringView locale, StringView type, Style style); - Optional add_likely_subtags(StringView); Optional remove_likely_subtags(StringView);