Quellcode durchsuchen

LibJS+LibUnicode: Separate number formatting methods from Locale.h

Currently, we generate separate data files for locale and number format
related tables/methods, but provide public accessors for all of the data
in one Locale.h file. Rather than continuing this trend for date-time,
relative time, etc. formatting, it's a bit easier to reason about if the
public accessors are also in separate files.
Timothy Flynn vor 3 Jahren
Ursprung
Commit
914675e826

+ 2 - 0
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeNumberFormat.cpp

@@ -24,6 +24,7 @@
 #include <LibCore/DirIterator.h>
 #include <LibCore/File.h>
 #include <LibUnicode/Locale.h>
+#include <LibUnicode/NumberFormat.h>
 #include <math.h>
 
 using StringIndexType = u16;
@@ -541,6 +542,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
 #include <AK/BinarySearch.h>
 #include <AK/Span.h>
 #include <LibUnicode/Locale.h>
+#include <LibUnicode/NumberFormat.h>
 #include <LibUnicode/UnicodeNumberFormat.h>
 
 namespace Unicode::Detail {

+ 1 - 0
Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp

@@ -11,6 +11,7 @@
 #include <LibJS/Runtime/Intl/NumberFormat.h>
 #include <LibJS/Runtime/Intl/NumberFormatFunction.h>
 #include <LibUnicode/CurrencyCode.h>
+#include <LibUnicode/Locale.h>
 #include <math.h>
 #include <stdlib.h>
 

+ 1 - 1
Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h

@@ -10,7 +10,7 @@
 #include <AK/String.h>
 #include <LibJS/Runtime/Intl/AbstractOperations.h>
 #include <LibJS/Runtime/Object.h>
-#include <LibUnicode/Locale.h>
+#include <LibUnicode/NumberFormat.h>
 
 namespace JS::Intl {
 

+ 1 - 0
Userland/Libraries/LibUnicode/CMakeLists.txt

@@ -5,6 +5,7 @@ SET(SOURCES
     CharacterTypes.cpp
     CurrencyCode.cpp
     Locale.cpp
+    NumberFormat.cpp
 )
 
 serenity_lib(LibUnicode unicode)

+ 0 - 130
Userland/Libraries/LibUnicode/Locale.cpp

@@ -8,14 +8,11 @@
 #include <AK/GenericLexer.h>
 #include <AK/QuickSort.h>
 #include <AK/StringBuilder.h>
-#include <AK/Utf8View.h>
 #include <LibUnicode/CharacterTypes.h>
 #include <LibUnicode/Locale.h>
 
 #if ENABLE_UNICODE_DATA
-#    include <LibUnicode/UnicodeData.h>
 #    include <LibUnicode/UnicodeLocale.h>
-#    include <LibUnicode/UnicodeNumberFormat.h>
 #endif
 
 namespace Unicode {
@@ -815,51 +812,6 @@ Vector<StringView> get_locale_key_mapping([[maybe_unused]] StringView locale, [[
     return {};
 }
 
-Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
-{
-#if ENABLE_UNICODE_DATA
-    return Detail::get_number_system_symbol(locale, system, symbol);
-#else
-    return {};
-#endif
-}
-
-Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
-{
-#if ENABLE_UNICODE_DATA
-    return Detail::get_number_system_groupings(locale, system);
-#else
-    return {};
-#endif
-}
-
-Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
-{
-#if ENABLE_UNICODE_DATA
-    return Detail::get_compact_number_system_formats(locale, system, type);
-#else
-    return {};
-#endif
-}
-
-Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
-{
-#if ENABLE_UNICODE_DATA
-    return Detail::get_standard_number_system_format(locale, system, type);
-#else
-    return {};
-#endif
-}
-
-Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
-{
-#if ENABLE_UNICODE_DATA
-    return Detail::get_unit_formats(locale, unit, style);
-#else
-    return {};
-#endif
-}
-
 Optional<ListPatterns> get_locale_list_patterns([[maybe_unused]] StringView locale, [[maybe_unused]] StringView type, [[maybe_unused]] StringView style)
 {
 #if ENABLE_UNICODE_DATA
@@ -984,88 +936,6 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language
     return aliases[0].to_string();
 }
 
-Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
-{
-    // FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
-    //        rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
-    //        Once those rules are implemented for LibJS, we better use them instead.
-    auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
-        if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
-            return *it;
-        return {};
-    };
-
-    if (number == 0) {
-        if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
-            return patterns;
-    } else if (number == 1) {
-        if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
-            return patterns;
-    } else if (number == 2) {
-        if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
-            return patterns;
-    } else if (number > 2) {
-        if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
-            return patterns;
-    }
-
-    return find_plurality(NumberFormat::Plurality::Other);
-}
-
-// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
-Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
-{
-#if ENABLE_UNICODE_DATA
-    constexpr auto number_key = "{number}"sv;
-    constexpr auto currency_key = "{currency}"sv;
-    constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
-
-    auto number_index = base_pattern.find(number_key);
-    VERIFY(number_index.has_value());
-
-    auto currency_index = base_pattern.find(currency_key);
-    VERIFY(currency_index.has_value());
-
-    Utf8View utf8_currency_display { currency_display };
-    Optional<String> currency_key_with_spacing;
-
-    auto last_code_point = [](StringView string) {
-        Utf8View utf8_string { string };
-        u32 code_point = 0;
-
-        for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
-            code_point = *it;
-
-        return code_point;
-    };
-
-    if (*number_index < *currency_index) {
-        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
-
-        if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
-            u32 first_currency_code_point = *utf8_currency_display.begin();
-
-            if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
-                currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
-        }
-    } else {
-        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
-
-        if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
-            u32 last_currency_code_point = last_code_point(currency_display);
-
-            if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
-                currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
-        }
-    }
-
-    if (currency_key_with_spacing.has_value())
-        return base_pattern.replace(currency_key, *currency_key_with_spacing);
-#endif
-
-    return {};
-}
-
 String LanguageID::to_string() const
 {
     StringBuilder builder;

+ 0 - 47
Userland/Libraries/LibUnicode/Locale.h

@@ -85,46 +85,6 @@ enum class Style : u8 {
     Numeric,
 };
 
-struct NumberGroupings {
-    u8 primary_grouping_size { 0 };
-    u8 secondary_grouping_size { 0 };
-};
-
-enum class StandardNumberFormatType : u8 {
-    Decimal,
-    Currency,
-    Accounting,
-    Percent,
-    Scientific,
-};
-
-enum class CompactNumberFormatType : u8 {
-    DecimalLong,
-    DecimalShort,
-    CurrencyUnit,
-    CurrencyShort,
-};
-
-struct NumberFormat {
-    enum class Plurality : u8 {
-        Other,
-        Zero,
-        Single,
-        One,
-        Two,
-        Few,
-        Many,
-    };
-
-    u8 magnitude { 0 };
-    u8 exponent { 0 };
-    Plurality plurality { Plurality::Other };
-    StringView zero_format {};
-    StringView positive_format {};
-    StringView negative_format {};
-    Vector<StringView> identifiers {};
-};
-
 struct ListPatterns {
     StringView start;
     StringView middle;
@@ -188,10 +148,6 @@ Optional<StringView> get_locale_script_mapping(StringView locale, StringView scr
 Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style);
 Vector<StringView> get_locale_key_mapping(StringView locale, StringView keyword);
 Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
-Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
-Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
-Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
-Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
 Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView type, StringView style);
 
 Optional<StringView> resolve_language_alias(StringView language);
@@ -204,7 +160,4 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id);
 Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id);
 String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
 
-Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
-Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
-
 }

+ 146 - 0
Userland/Libraries/LibUnicode/NumberFormat.cpp

@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Utf8View.h>
+#include <LibUnicode/CharacterTypes.h>
+#include <LibUnicode/Locale.h>
+#include <LibUnicode/NumberFormat.h>
+
+#if ENABLE_UNICODE_DATA
+#    include <LibUnicode/UnicodeData.h>
+#    include <LibUnicode/UnicodeNumberFormat.h>
+#endif
+
+namespace Unicode {
+
+Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
+{
+#if ENABLE_UNICODE_DATA
+    return Detail::get_number_system_symbol(locale, system, symbol);
+#else
+    return {};
+#endif
+}
+
+Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
+{
+#if ENABLE_UNICODE_DATA
+    return Detail::get_number_system_groupings(locale, system);
+#else
+    return {};
+#endif
+}
+
+Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
+{
+#if ENABLE_UNICODE_DATA
+    return Detail::get_standard_number_system_format(locale, system, type);
+#else
+    return {};
+#endif
+}
+
+Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
+{
+#if ENABLE_UNICODE_DATA
+    return Detail::get_compact_number_system_formats(locale, system, type);
+#else
+    return {};
+#endif
+}
+
+Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
+{
+#if ENABLE_UNICODE_DATA
+    return Detail::get_unit_formats(locale, unit, style);
+#else
+    return {};
+#endif
+}
+
+Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
+{
+    // FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
+    //        rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
+    //        Once those rules are implemented for LibJS, we better use them instead.
+    auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
+        if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
+            return *it;
+        return {};
+    };
+
+    if (number == 0) {
+        if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
+            return patterns;
+    } else if (number == 1) {
+        if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
+            return patterns;
+    } else if (number == 2) {
+        if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
+            return patterns;
+    } else if (number > 2) {
+        if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
+            return patterns;
+    }
+
+    return find_plurality(NumberFormat::Plurality::Other);
+}
+
+// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
+Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
+{
+#if ENABLE_UNICODE_DATA
+    constexpr auto number_key = "{number}"sv;
+    constexpr auto currency_key = "{currency}"sv;
+    constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
+
+    auto number_index = base_pattern.find(number_key);
+    VERIFY(number_index.has_value());
+
+    auto currency_index = base_pattern.find(currency_key);
+    VERIFY(currency_index.has_value());
+
+    Utf8View utf8_currency_display { currency_display };
+    Optional<String> currency_key_with_spacing;
+
+    auto last_code_point = [](StringView string) {
+        Utf8View utf8_string { string };
+        u32 code_point = 0;
+
+        for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
+            code_point = *it;
+
+        return code_point;
+    };
+
+    if (*number_index < *currency_index) {
+        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
+
+        if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
+            u32 first_currency_code_point = *utf8_currency_display.begin();
+
+            if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
+                currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
+        }
+    } else {
+        u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
+
+        if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
+            u32 last_currency_code_point = last_code_point(currency_display);
+
+            if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
+                currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
+        }
+    }
+
+    if (currency_key_with_spacing.has_value())
+        return base_pattern.replace(currency_key, *currency_key_with_spacing);
+#endif
+
+    return {};
+}
+
+}

+ 65 - 0
Userland/Libraries/LibUnicode/NumberFormat.h

@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Optional.h>
+#include <AK/String.h>
+#include <AK/StringView.h>
+#include <AK/Vector.h>
+#include <LibUnicode/Forward.h>
+
+namespace Unicode {
+
+struct NumberGroupings {
+    u8 primary_grouping_size { 0 };
+    u8 secondary_grouping_size { 0 };
+};
+
+enum class StandardNumberFormatType : u8 {
+    Decimal,
+    Currency,
+    Accounting,
+    Percent,
+    Scientific,
+};
+
+enum class CompactNumberFormatType : u8 {
+    DecimalLong,
+    DecimalShort,
+    CurrencyUnit,
+    CurrencyShort,
+};
+
+struct NumberFormat {
+    enum class Plurality : u8 {
+        Other,
+        Zero,
+        Single,
+        One,
+        Two,
+        Few,
+        Many,
+    };
+
+    u8 magnitude { 0 };
+    u8 exponent { 0 };
+    Plurality plurality { Plurality::Other };
+    StringView zero_format {};
+    StringView positive_format {};
+    StringView negative_format {};
+    Vector<StringView> identifiers {};
+};
+
+Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
+Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
+Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
+Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
+Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
+Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
+Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
+
+}