Explorar el Código

LibUnicode: Parse number system digits from the CLDR

We had a hard-coded table of number system digits copied from ECMA-402.
Turns out these digits are in the CLDR, so let's parse the digits from
there instead of hard-coding them.
Timothy Flynn hace 3 años
padre
commit
c5138f0f2b

+ 1 - 1
Meta/CMake/unicode_data.cmake

@@ -181,7 +181,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
         "${UNICODE_META_TARGET_PREFIX}"
         "${UNICODE_NUMBER_FORMAT_HEADER}"
         "${UNICODE_NUMBER_FORMAT_IMPLEMENTATION}"
-        arguments -n "${CLDR_NUMBERS_PATH}" -u "${CLDR_UNITS_PATH}"
+        arguments -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -u "${CLDR_UNITS_PATH}"
     )
 
     set(UNICODE_DATA_SOURCES

+ 97 - 12
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp

@@ -268,10 +268,47 @@ struct UnicodeLocaleData {
     UniqueStorage<NumberSystem, NumberSystemIndexType> unique_systems;
     UniqueStorage<Unit, UnitIndexType> unique_units;
 
+    HashMap<String, Array<u32, 10>> number_system_digits;
+    Vector<String> number_systems;
+
     HashMap<String, Locale> locales;
     size_t max_identifier_count { 0 };
 };
 
+static ErrorOr<void> parse_number_system_digits(String core_supplemental_path, UnicodeLocaleData& locale_data)
+{
+    LexicalPath number_systems_path(move(core_supplemental_path));
+    number_systems_path = number_systems_path.append("numberingSystems.json"sv);
+
+    auto number_systems_file = TRY(Core::File::open(number_systems_path.string(), Core::OpenMode::ReadOnly));
+    auto number_systems = TRY(JsonValue::from_string(number_systems_file->read_all()));
+
+    auto const& supplemental_object = number_systems.as_object().get("supplemental"sv);
+    auto const& number_systems_object = supplemental_object.as_object().get("numberingSystems"sv);
+
+    number_systems_object.as_object().for_each_member([&](auto const& number_system, auto const& digits_object) {
+        auto type = digits_object.as_object().get("_type"sv).as_string();
+        if (type != "numeric"sv)
+            return;
+
+        auto digits = digits_object.as_object().get("_digits"sv).as_string();
+
+        Utf8View utf8_digits { digits };
+        VERIFY(utf8_digits.length() == 10);
+
+        auto& number_system_digits = locale_data.number_system_digits.ensure(number_system);
+        size_t index = 0;
+
+        for (u32 digit : utf8_digits)
+            number_system_digits[index++] = digit;
+
+        if (!locale_data.number_systems.contains_slow(number_system))
+            locale_data.number_systems.append(number_system);
+    });
+
+    return {};
+}
+
 static String parse_identifiers(String pattern, StringView replacement, UnicodeLocaleData& locale_data, NumberFormat& format)
 {
     static Utf8View whitespace { "\u0020\u00a0\u200f"sv };
@@ -671,11 +708,17 @@ static ErrorOr<void> parse_units(String locale_units_path, UnicodeLocaleData& lo
     return {};
 }
 
-static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, UnicodeLocaleData& locale_data)
+static ErrorOr<void> parse_all_locales(String core_path, String numbers_path, String units_path, UnicodeLocaleData& locale_data)
 {
     auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path)));
     auto units_iterator = TRY(path_to_dir_iterator(move(units_path)));
 
+    LexicalPath core_supplemental_path(move(core_path));
+    core_supplemental_path = core_supplemental_path.append("supplemental"sv);
+    VERIFY(Core::File::is_directory(core_supplemental_path.string()));
+
+    TRY(parse_number_system_digits(core_supplemental_path.string(), locale_data));
+
     auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
         auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
 
@@ -708,14 +751,28 @@ static ErrorOr<void> parse_all_locales(String numbers_path, String units_path, U
     return {};
 }
 
-static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData&)
+static String format_identifier(StringView, String identifier)
+{
+    return identifier.to_titlecase();
+}
+
+static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data)
 {
     StringBuilder builder;
     SourceGenerator generator { builder };
 
-    // FIXME: Update unicode_data.cmake to not require a header.
     generator.append(R"~~~(
+#include <AK/Types.h>
+
 #pragma once
+
+namespace Unicode {
+)~~~");
+
+    generate_enum(generator, format_identifier, "NumberSystem"sv, {}, locale_data.number_systems);
+
+    generator.append(R"~~~(
+}
 )~~~");
 
     VERIFY(file.write(generator.as_string_view()));
@@ -775,7 +832,7 @@ struct NumberFormatImpl {
     Array<@string_index_type@, @identifier_count@> identifiers {};
 };
 
-struct NumberSystem {
+struct NumberSystemData {
     @string_index_type@ system { 0 };
     @numeric_symbol_list_index_type@ symbols { 0 };
 
@@ -806,7 +863,7 @@ struct Unit {
     locale_data.unique_formats.generate(generator, "NumberFormatImpl"sv, "s_number_formats"sv, 10);
     locale_data.unique_format_lists.generate(generator, s_number_format_index_type, "s_number_format_lists"sv);
     locale_data.unique_symbols.generate(generator, s_string_index_type, "s_numeric_symbol_lists"sv);
-    locale_data.unique_systems.generate(generator, "NumberSystem"sv, "s_number_systems"sv, 10);
+    locale_data.unique_systems.generate(generator, "NumberSystemData"sv, "s_number_systems"sv, 10);
     locale_data.unique_units.generate(generator, "Unit"sv, "s_units"sv, 10);
 
     auto append_map = [&](String name, auto type, auto const& map) {
@@ -820,18 +877,44 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
         bool first = true;
         for (auto const& item : map) {
             generator.append(first ? " " : ", ");
-            generator.append(String::number(item.value));
+            if constexpr (requires { item.value; })
+                generator.append(String::number(item.value));
+            else
+                generator.append(String::number(item));
             first = false;
         }
 
         generator.append(" } };");
     };
 
+    generate_mapping(generator, locale_data.number_system_digits, "u32"sv, "s_number_systems_digits"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_map(name, "u32"sv, value); });
     generate_mapping(generator, locale_data.locales, s_number_system_index_type, "s_locale_number_systems"sv, "s_number_systems_{}", nullptr, [&](auto const& name, auto const& value) { append_map(name, s_number_system_index_type, value.number_systems); });
     generate_mapping(generator, locale_data.locales, s_unit_index_type, "s_locale_units"sv, "s_units_{}", nullptr, [&](auto const& name, auto const& value) { append_map(name, s_unit_index_type, value.units); });
 
+    auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values) {
+        HashValueMap<String> hashes;
+        hashes.ensure_capacity(values.size());
+
+        for (auto const& value : values)
+            hashes.set(value.hash(), format_identifier(enum_title, value));
+
+        generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
+    };
+
+    append_from_string("NumberSystem"sv, "number_system"sv, locale_data.number_systems);
+
     generator.append(R"~~~(
-static NumberSystem const* find_number_system(StringView locale, StringView system)
+Optional<Span<u32 const>> get_digits_for_number_system(StringView system)
+{
+    auto number_system_value = number_system_from_string(system);
+    if (!number_system_value.has_value())
+        return {};
+
+    auto number_system_index = to_underlying(*number_system_value);
+    return s_number_systems_digits[number_system_index];
+}
+
+static NumberSystemData const* find_number_system(StringView locale, StringView system)
 {
     auto locale_value = locale_from_string(locale);
     if (!locale_value.has_value())
@@ -991,14 +1074,16 @@ Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style
 
 ErrorOr<int> serenity_main(Main::Arguments arguments)
 {
-    StringView generated_header_path = nullptr;
-    StringView generated_implementation_path = nullptr;
-    StringView numbers_path = nullptr;
-    StringView units_path = nullptr;
+    StringView generated_header_path;
+    StringView generated_implementation_path;
+    StringView core_path;
+    StringView numbers_path;
+    StringView units_path;
 
     Core::ArgsParser args_parser;
     args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
     args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
+    args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
     args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
     args_parser.add_option(units_path, "Path to cldr-units directory", "units-path", 'u', "units-path");
     args_parser.parse(arguments);
@@ -1016,7 +1101,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     auto generated_implementation_file = TRY(open_file(generated_implementation_path));
 
     UnicodeLocaleData locale_data;
-    TRY(parse_all_locales(numbers_path, units_path, locale_data));
+    TRY(parse_all_locales(core_path, numbers_path, units_path, locale_data));
 
     generate_unicode_locale_header(generated_header_file, locale_data);
     generate_unicode_locale_implementation(generated_implementation_file, locale_data);

+ 1 - 0
Userland/Libraries/LibUnicode/Forward.h

@@ -28,6 +28,7 @@ enum class ListPatternStyle : u8;
 enum class ListPatternType : u8;
 enum class Locale : u16;
 enum class Month : u8;
+enum class NumberSystem : u8;
 enum class NumericSymbol : u8;
 enum class Property : u8;
 enum class Script : u8;

+ 10 - 73
Userland/Libraries/LibUnicode/NumberFormat.cpp

@@ -5,7 +5,6 @@
  */
 
 #include <AK/CharacterTypes.h>
-#include <AK/HashMap.h>
 #include <AK/Utf8View.h>
 #include <LibUnicode/CharacterTypes.h>
 #include <LibUnicode/Locale.h>
@@ -17,6 +16,7 @@
 
 namespace Unicode {
 
+Optional<NumberSystem> __attribute__((weak)) number_system_from_string(StringView) { return {}; }
 Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
 Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; }
 Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; }
@@ -33,81 +33,18 @@ Optional<StringView> get_default_number_system(StringView locale)
     return {};
 }
 
-String replace_digits_for_number_system(StringView system, StringView number)
+Optional<Span<u32 const>> __attribute__((weak)) get_digits_for_number_system(StringView)
 {
-    // https://tc39.es/ecma402/#table-numbering-system-digits
-    static HashMap<StringView, AK::Array<u32, 10>> s_numbering_system_digits = {
-        { "adlm"sv, { 0x1e950, 0x1e951, 0x1e952, 0x1e953, 0x1e954, 0x1e955, 0x1e956, 0x1e957, 0x1e958, 0x1e959 } },
-        { "ahom"sv, { 0x11730, 0x11731, 0x11732, 0x11733, 0x11734, 0x11735, 0x11736, 0x11737, 0x11738, 0x11739 } },
-        { "arab"sv, { 0x660, 0x661, 0x662, 0x663, 0x664, 0x665, 0x666, 0x667, 0x668, 0x669 } },
-        { "arabext"sv, { 0x6f0, 0x6f1, 0x6f2, 0x6f3, 0x6f4, 0x6f5, 0x6f6, 0x6f7, 0x6f8, 0x6f9 } },
-        { "bali"sv, { 0x1b50, 0x1b51, 0x1b52, 0x1b53, 0x1b54, 0x1b55, 0x1b56, 0x1b57, 0x1b58, 0x1b59 } },
-        { "beng"sv, { 0x9e6, 0x9e7, 0x9e8, 0x9e9, 0x9ea, 0x9eb, 0x9ec, 0x9ed, 0x9ee, 0x9ef } },
-        { "bhks"sv, { 0x11c50, 0x11c51, 0x11c52, 0x11c53, 0x11c54, 0x11c55, 0x11c56, 0x11c57, 0x11c58, 0x11c59 } },
-        { "brah"sv, { 0x11066, 0x11067, 0x11068, 0x11069, 0x1106a, 0x1106b, 0x1106c, 0x1106d, 0x1106e, 0x1106f } },
-        { "cakm"sv, { 0x11136, 0x11137, 0x11138, 0x11139, 0x1113a, 0x1113b, 0x1113c, 0x1113d, 0x1113e, 0x1113f } },
-        { "cham"sv, { 0xaa50, 0xaa51, 0xaa52, 0xaa53, 0xaa54, 0xaa55, 0xaa56, 0xaa57, 0xaa58, 0xaa59 } },
-        { "deva"sv, { 0x966, 0x967, 0x968, 0x969, 0x96a, 0x96b, 0x96c, 0x96d, 0x96e, 0x96f } },
-        { "diak"sv, { 0x11950, 0x11951, 0x11952, 0x11953, 0x11954, 0x11955, 0x11956, 0x11957, 0x11958, 0x11959 } },
-        { "fullwide"sv, { 0xff10, 0xff11, 0xff12, 0xff13, 0xff14, 0xff15, 0xff16, 0xff17, 0xff18, 0xff19 } },
-        { "gong"sv, { 0x11da0, 0x11da1, 0x11da2, 0x11da3, 0x11da4, 0x11da5, 0x11da6, 0x11da7, 0x11da8, 0x11da9 } },
-        { "gonm"sv, { 0x11d50, 0x11d51, 0x11d52, 0x11d53, 0x11d54, 0x11d55, 0x11d56, 0x11d57, 0x11d58, 0x11d59 } },
-        { "gujr"sv, { 0xae6, 0xae7, 0xae8, 0xae9, 0xaea, 0xaeb, 0xaec, 0xaed, 0xaee, 0xaef } },
-        { "guru"sv, { 0xa66, 0xa67, 0xa68, 0xa69, 0xa6a, 0xa6b, 0xa6c, 0xa6d, 0xa6e, 0xa6f } },
-        { "hanidec"sv, { 0x3007, 0x4e00, 0x4e8c, 0x4e09, 0x56db, 0x4e94, 0x516d, 0x4e03, 0x516b, 0x4e5d } },
-        { "hmng"sv, { 0x16b50, 0x16b51, 0x16b52, 0x16b53, 0x16b54, 0x16b55, 0x16b56, 0x16b57, 0x16b58, 0x16b59 } },
-        { "hmnp"sv, { 0x1e140, 0x1e141, 0x1e142, 0x1e143, 0x1e144, 0x1e145, 0x1e146, 0x1e147, 0x1e148, 0x1e149 } },
-        { "java"sv, { 0xa9d0, 0xa9d1, 0xa9d2, 0xa9d3, 0xa9d4, 0xa9d5, 0xa9d6, 0xa9d7, 0xa9d8, 0xa9d9 } },
-        { "kali"sv, { 0xa900, 0xa901, 0xa902, 0xa903, 0xa904, 0xa905, 0xa906, 0xa907, 0xa908, 0xa909 } },
-        { "khmr"sv, { 0x17e0, 0x17e1, 0x17e2, 0x17e3, 0x17e4, 0x17e5, 0x17e6, 0x17e7, 0x17e8, 0x17e9 } },
-        { "knda"sv, { 0xce6, 0xce7, 0xce8, 0xce9, 0xcea, 0xceb, 0xcec, 0xced, 0xcee, 0xcef } },
-        { "lana"sv, { 0x1a80, 0x1a81, 0x1a82, 0x1a83, 0x1a84, 0x1a85, 0x1a86, 0x1a87, 0x1a88, 0x1a89 } },
-        { "lanatham"sv, { 0x1a90, 0x1a91, 0x1a92, 0x1a93, 0x1a94, 0x1a95, 0x1a96, 0x1a97, 0x1a98, 0x1a99 } },
-        { "laoo"sv, { 0xed0, 0xed1, 0xed2, 0xed3, 0xed4, 0xed5, 0xed6, 0xed7, 0xed8, 0xed9 } },
-        { "latn"sv, { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } },
-        { "lepc"sv, { 0x1c40, 0x1c41, 0x1c42, 0x1c43, 0x1c44, 0x1c45, 0x1c46, 0x1c47, 0x1c48, 0x1c49 } },
-        { "limb"sv, { 0x1946, 0x1947, 0x1948, 0x1949, 0x194a, 0x194b, 0x194c, 0x194d, 0x194e, 0x194f } },
-        { "mathbold"sv, { 0x1d7ce, 0x1d7cf, 0x1d7d0, 0x1d7d1, 0x1d7d2, 0x1d7d3, 0x1d7d4, 0x1d7d5, 0x1d7d6, 0x1d7d7 } },
-        { "mathdbl"sv, { 0x1d7d8, 0x1d7d9, 0x1d7da, 0x1d7db, 0x1d7dc, 0x1d7dd, 0x1d7de, 0x1d7df, 0x1d7e0, 0x1d7e1 } },
-        { "mathmono"sv, { 0x1d7f6, 0x1d7f7, 0x1d7f8, 0x1d7f9, 0x1d7fa, 0x1d7fb, 0x1d7fc, 0x1d7fd, 0x1d7fe, 0x1d7ff } },
-        { "mathsanb"sv, { 0x1d7ec, 0x1d7ed, 0x1d7ee, 0x1d7ef, 0x1d7f0, 0x1d7f1, 0x1d7f2, 0x1d7f3, 0x1d7f4, 0x1d7f5 } },
-        { "mathsans"sv, { 0x1d7e2, 0x1d7e3, 0x1d7e4, 0x1d7e5, 0x1d7e6, 0x1d7e7, 0x1d7e8, 0x1d7e9, 0x1d7ea, 0x1d7eb } },
-        { "mlym"sv, { 0xd66, 0xd67, 0xd68, 0xd69, 0xd6a, 0xd6b, 0xd6c, 0xd6d, 0xd6e, 0xd6f } },
-        { "modi"sv, { 0x11650, 0x11651, 0x11652, 0x11653, 0x11654, 0x11655, 0x11656, 0x11657, 0x11658, 0x11659 } },
-        { "mong"sv, { 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817, 0x1818, 0x1819 } },
-        { "mroo"sv, { 0x16a60, 0x16a61, 0x16a62, 0x16a63, 0x16a64, 0x16a65, 0x16a66, 0x16a67, 0x16a68, 0x16a69 } },
-        { "mtei"sv, { 0xabf0, 0xabf1, 0xabf2, 0xabf3, 0xabf4, 0xabf5, 0xabf6, 0xabf7, 0xabf8, 0xabf9 } },
-        { "mymr"sv, { 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049 } },
-        { "mymrshan"sv, { 0x1090, 0x1091, 0x1092, 0x1093, 0x1094, 0x1095, 0x1096, 0x1097, 0x1098, 0x1099 } },
-        { "mymrtlng"sv, { 0xa9f0, 0xa9f1, 0xa9f2, 0xa9f3, 0xa9f4, 0xa9f5, 0xa9f6, 0xa9f7, 0xa9f8, 0xa9f9 } },
-        { "newa"sv, { 0x11450, 0x11451, 0x11452, 0x11453, 0x11454, 0x11455, 0x11456, 0x11457, 0x11458, 0x11459 } },
-        { "nkoo"sv, { 0x7c0, 0x7c1, 0x7c2, 0x7c3, 0x7c4, 0x7c5, 0x7c6, 0x7c7, 0x7c8, 0x7c9 } },
-        { "olck"sv, { 0x1c50, 0x1c51, 0x1c52, 0x1c53, 0x1c54, 0x1c55, 0x1c56, 0x1c57, 0x1c58, 0x1c59 } },
-        { "orya"sv, { 0xb66, 0xb67, 0xb68, 0xb69, 0xb6a, 0xb6b, 0xb6c, 0xb6d, 0xb6e, 0xb6f } },
-        { "osma"sv, { 0x104a0, 0x104a1, 0x104a2, 0x104a3, 0x104a4, 0x104a5, 0x104a6, 0x104a7, 0x104a8, 0x104a9 } },
-        { "rohg"sv, { 0x10d30, 0x10d31, 0x10d32, 0x10d33, 0x10d34, 0x10d35, 0x10d36, 0x10d37, 0x10d38, 0x10d39 } },
-        { "saur"sv, { 0xa8d0, 0xa8d1, 0xa8d2, 0xa8d3, 0xa8d4, 0xa8d5, 0xa8d6, 0xa8d7, 0xa8d8, 0xa8d9 } },
-        { "segment"sv, { 0x1fbf0, 0x1fbf1, 0x1fbf2, 0x1fbf3, 0x1fbf4, 0x1fbf5, 0x1fbf6, 0x1fbf7, 0x1fbf8, 0x1fbf9 } },
-        { "shrd"sv, { 0x111d0, 0x111d1, 0x111d2, 0x111d3, 0x111d4, 0x111d5, 0x111d6, 0x111d7, 0x111d8, 0x111d9 } },
-        { "sind"sv, { 0x112f0, 0x112f1, 0x112f2, 0x112f3, 0x112f4, 0x112f5, 0x112f6, 0x112f7, 0x112f8, 0x112f9 } },
-        { "sinh"sv, { 0xde6, 0xde7, 0xde8, 0xde9, 0xdea, 0xdeb, 0xdec, 0xded, 0xdee, 0xdef } },
-        { "sora"sv, { 0x110f0, 0x110f1, 0x110f2, 0x110f3, 0x110f4, 0x110f5, 0x110f6, 0x110f7, 0x110f8, 0x110f9 } },
-        { "sund"sv, { 0x1bb0, 0x1bb1, 0x1bb2, 0x1bb3, 0x1bb4, 0x1bb5, 0x1bb6, 0x1bb7, 0x1bb8, 0x1bb9 } },
-        { "takr"sv, { 0x116c0, 0x116c1, 0x116c2, 0x116c3, 0x116c4, 0x116c5, 0x116c6, 0x116c7, 0x116c8, 0x116c9 } },
-        { "talu"sv, { 0x19d0, 0x19d1, 0x19d2, 0x19d3, 0x19d4, 0x19d5, 0x19d6, 0x19d7, 0x19d8, 0x19d9 } },
-        { "tamldec"sv, { 0xbe6, 0xbe7, 0xbe8, 0xbe9, 0xbea, 0xbeb, 0xbec, 0xbed, 0xbee, 0xbef } },
-        { "telu"sv, { 0xc66, 0xc67, 0xc68, 0xc69, 0xc6a, 0xc6b, 0xc6c, 0xc6d, 0xc6e, 0xc6f } },
-        { "thai"sv, { 0xe50, 0xe51, 0xe52, 0xe53, 0xe54, 0xe55, 0xe56, 0xe57, 0xe58, 0xe59 } },
-        { "tibt"sv, { 0xf20, 0xf21, 0xf22, 0xf23, 0xf24, 0xf25, 0xf26, 0xf27, 0xf28, 0xf29 } },
-        { "tirh"sv, { 0x114d0, 0x114d1, 0x114d2, 0x114d3, 0x114d4, 0x114d5, 0x114d6, 0x114d7, 0x114d8, 0x114d9 } },
-        { "vaii"sv, { 0xa620, 0xa621, 0xa622, 0xa623, 0xa624, 0xa625, 0xa626, 0xa627, 0xa628, 0xa629 } },
-        { "wara"sv, { 0x118e0, 0x118e1, 0x118e2, 0x118e3, 0x118e4, 0x118e5, 0x118e6, 0x118e7, 0x118e8, 0x118e9 } },
-        { "wcho"sv, { 0x1e2f0, 0x1e2f1, 0x1e2f2, 0x1e2f3, 0x1e2f4, 0x1e2f5, 0x1e2f6, 0x1e2f7, 0x1e2f8, 0x1e2f9 } },
-    };
+    // Fall back to "latn" digits when Unicode data generation is disabled.
+    constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } };
+    return digits.span();
+}
 
-    auto digits = s_numbering_system_digits.get(system);
+String replace_digits_for_number_system(StringView system, StringView number)
+{
+    auto digits = get_digits_for_number_system(system);
     if (!digits.has_value())
-        digits = s_numbering_system_digits.get("latn"sv);
+        digits = get_digits_for_number_system("latn"sv);
     VERIFY(digits.has_value());
 
     StringBuilder builder;

+ 3 - 0
Userland/Libraries/LibUnicode/NumberFormat.h

@@ -65,10 +65,13 @@ enum class NumericSymbol : u8 {
     PlusSign,
 };
 
+Optional<NumberSystem> number_system_from_string(StringView system);
 Optional<StringView> get_default_number_system(StringView locale);
 
 Optional<StringView> get_number_system_symbol(StringView locale, StringView system, NumericSymbol symbol);
 Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
+
+Optional<Span<u32 const>> get_digits_for_number_system(StringView system);
 String replace_digits_for_number_system(StringView system, StringView number);
 
 Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);