LibJS+LibUnicode: Separate number formatting methods from Locale.h
Currently, we generate separate data files for locale and number format related tables/methods, but provide public accessors for all of the data in one Locale.h file. Rather than continuing this trend for date-time, relative time, etc. formatting, it's a bit easier to reason about if the public accessors are also in separate files.
This commit is contained in:
parent
bb11437792
commit
914675e826
Notes:
sideshowbarker
2024-07-17 23:23:01 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/914675e826b Pull-request: https://github.com/SerenityOS/serenity/pull/11128 Reviewed-by: https://github.com/linusg ✅
8 changed files with 216 additions and 178 deletions
Meta/Lagom/Tools/CodeGenerators/LibUnicode
Userland/Libraries
LibJS/Runtime/Intl
LibUnicode
|
@ -24,6 +24,7 @@
|
|||
#include <LibCore/DirIterator.h>
|
||||
#include <LibCore/File.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/NumberFormat.h>
|
||||
#include <math.h>
|
||||
|
||||
using StringIndexType = u16;
|
||||
|
@ -541,6 +542,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
|
|||
#include <AK/BinarySearch.h>
|
||||
#include <AK/Span.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/NumberFormat.h>
|
||||
#include <LibUnicode/UnicodeNumberFormat.h>
|
||||
|
||||
namespace Unicode::Detail {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <LibJS/Runtime/Intl/NumberFormat.h>
|
||||
#include <LibJS/Runtime/Intl/NumberFormatFunction.h>
|
||||
#include <LibUnicode/CurrencyCode.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <AK/String.h>
|
||||
#include <LibJS/Runtime/Intl/AbstractOperations.h>
|
||||
#include <LibJS/Runtime/Object.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/NumberFormat.h>
|
||||
|
||||
namespace JS::Intl {
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ SET(SOURCES
|
|||
CharacterTypes.cpp
|
||||
CurrencyCode.cpp
|
||||
Locale.cpp
|
||||
NumberFormat.cpp
|
||||
)
|
||||
|
||||
serenity_lib(LibUnicode unicode)
|
||||
|
|
|
@ -8,14 +8,11 @@
|
|||
#include <AK/GenericLexer.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
|
||||
#if ENABLE_UNICODE_DATA
|
||||
# include <LibUnicode/UnicodeData.h>
|
||||
# include <LibUnicode/UnicodeLocale.h>
|
||||
# include <LibUnicode/UnicodeNumberFormat.h>
|
||||
#endif
|
||||
|
||||
namespace Unicode {
|
||||
|
@ -815,51 +812,6 @@ Vector<StringView> get_locale_key_mapping([[maybe_unused]] StringView locale, [[
|
|||
return {};
|
||||
}
|
||||
|
||||
Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_number_system_symbol(locale, system, symbol);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_number_system_groupings(locale, system);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_compact_number_system_formats(locale, system, type);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_standard_number_system_format(locale, system, type);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_unit_formats(locale, unit, style);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<ListPatterns> get_locale_list_patterns([[maybe_unused]] StringView locale, [[maybe_unused]] StringView type, [[maybe_unused]] StringView style)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
|
@ -984,88 +936,6 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language
|
|||
return aliases[0].to_string();
|
||||
}
|
||||
|
||||
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
|
||||
{
|
||||
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
|
||||
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
||||
// Once those rules are implemented for LibJS, we better use them instead.
|
||||
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
|
||||
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
|
||||
return *it;
|
||||
return {};
|
||||
};
|
||||
|
||||
if (number == 0) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 1) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 2) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number > 2) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
|
||||
return patterns;
|
||||
}
|
||||
|
||||
return find_plurality(NumberFormat::Plurality::Other);
|
||||
}
|
||||
|
||||
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
||||
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
constexpr auto number_key = "{number}"sv;
|
||||
constexpr auto currency_key = "{currency}"sv;
|
||||
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
|
||||
|
||||
auto number_index = base_pattern.find(number_key);
|
||||
VERIFY(number_index.has_value());
|
||||
|
||||
auto currency_index = base_pattern.find(currency_key);
|
||||
VERIFY(currency_index.has_value());
|
||||
|
||||
Utf8View utf8_currency_display { currency_display };
|
||||
Optional<String> currency_key_with_spacing;
|
||||
|
||||
auto last_code_point = [](StringView string) {
|
||||
Utf8View utf8_string { string };
|
||||
u32 code_point = 0;
|
||||
|
||||
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
|
||||
code_point = *it;
|
||||
|
||||
return code_point;
|
||||
};
|
||||
|
||||
if (*number_index < *currency_index) {
|
||||
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
|
||||
|
||||
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
|
||||
u32 first_currency_code_point = *utf8_currency_display.begin();
|
||||
|
||||
if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
|
||||
currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
|
||||
}
|
||||
} else {
|
||||
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
|
||||
|
||||
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
|
||||
u32 last_currency_code_point = last_code_point(currency_display);
|
||||
|
||||
if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
|
||||
currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
|
||||
}
|
||||
}
|
||||
|
||||
if (currency_key_with_spacing.has_value())
|
||||
return base_pattern.replace(currency_key, *currency_key_with_spacing);
|
||||
#endif
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
String LanguageID::to_string() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
|
|
@ -85,46 +85,6 @@ enum class Style : u8 {
|
|||
Numeric,
|
||||
};
|
||||
|
||||
struct NumberGroupings {
|
||||
u8 primary_grouping_size { 0 };
|
||||
u8 secondary_grouping_size { 0 };
|
||||
};
|
||||
|
||||
enum class StandardNumberFormatType : u8 {
|
||||
Decimal,
|
||||
Currency,
|
||||
Accounting,
|
||||
Percent,
|
||||
Scientific,
|
||||
};
|
||||
|
||||
enum class CompactNumberFormatType : u8 {
|
||||
DecimalLong,
|
||||
DecimalShort,
|
||||
CurrencyUnit,
|
||||
CurrencyShort,
|
||||
};
|
||||
|
||||
struct NumberFormat {
|
||||
enum class Plurality : u8 {
|
||||
Other,
|
||||
Zero,
|
||||
Single,
|
||||
One,
|
||||
Two,
|
||||
Few,
|
||||
Many,
|
||||
};
|
||||
|
||||
u8 magnitude { 0 };
|
||||
u8 exponent { 0 };
|
||||
Plurality plurality { Plurality::Other };
|
||||
StringView zero_format {};
|
||||
StringView positive_format {};
|
||||
StringView negative_format {};
|
||||
Vector<StringView> identifiers {};
|
||||
};
|
||||
|
||||
struct ListPatterns {
|
||||
StringView start;
|
||||
StringView middle;
|
||||
|
@ -188,10 +148,6 @@ Optional<StringView> get_locale_script_mapping(StringView locale, StringView scr
|
|||
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style);
|
||||
Vector<StringView> get_locale_key_mapping(StringView locale, StringView keyword);
|
||||
Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
|
||||
Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
|
||||
Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
|
||||
Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
|
||||
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
|
||||
Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView type, StringView style);
|
||||
|
||||
Optional<StringView> resolve_language_alias(StringView language);
|
||||
|
@ -204,7 +160,4 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id);
|
|||
Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id);
|
||||
String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
|
||||
|
||||
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
|
||||
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
|
||||
|
||||
}
|
||||
|
|
146
Userland/Libraries/LibUnicode/NumberFormat.cpp
Normal file
146
Userland/Libraries/LibUnicode/NumberFormat.cpp
Normal file
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <LibUnicode/Locale.h>
|
||||
#include <LibUnicode/NumberFormat.h>
|
||||
|
||||
#if ENABLE_UNICODE_DATA
|
||||
# include <LibUnicode/UnicodeData.h>
|
||||
# include <LibUnicode/UnicodeNumberFormat.h>
|
||||
#endif
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_number_system_symbol(locale, system, symbol);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_number_system_groupings(locale, system);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_standard_number_system_format(locale, system, type);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_compact_number_system_formats(locale, system, type);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_unit_formats(locale, unit, style);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
|
||||
{
|
||||
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
|
||||
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
||||
// Once those rules are implemented for LibJS, we better use them instead.
|
||||
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
|
||||
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
|
||||
return *it;
|
||||
return {};
|
||||
};
|
||||
|
||||
if (number == 0) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 1) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number == 2) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
|
||||
return patterns;
|
||||
} else if (number > 2) {
|
||||
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
|
||||
return patterns;
|
||||
}
|
||||
|
||||
return find_plurality(NumberFormat::Plurality::Other);
|
||||
}
|
||||
|
||||
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
||||
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
constexpr auto number_key = "{number}"sv;
|
||||
constexpr auto currency_key = "{currency}"sv;
|
||||
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
|
||||
|
||||
auto number_index = base_pattern.find(number_key);
|
||||
VERIFY(number_index.has_value());
|
||||
|
||||
auto currency_index = base_pattern.find(currency_key);
|
||||
VERIFY(currency_index.has_value());
|
||||
|
||||
Utf8View utf8_currency_display { currency_display };
|
||||
Optional<String> currency_key_with_spacing;
|
||||
|
||||
auto last_code_point = [](StringView string) {
|
||||
Utf8View utf8_string { string };
|
||||
u32 code_point = 0;
|
||||
|
||||
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
|
||||
code_point = *it;
|
||||
|
||||
return code_point;
|
||||
};
|
||||
|
||||
if (*number_index < *currency_index) {
|
||||
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
|
||||
|
||||
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
|
||||
u32 first_currency_code_point = *utf8_currency_display.begin();
|
||||
|
||||
if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
|
||||
currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
|
||||
}
|
||||
} else {
|
||||
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
|
||||
|
||||
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
|
||||
u32 last_currency_code_point = last_code_point(currency_display);
|
||||
|
||||
if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
|
||||
currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
|
||||
}
|
||||
}
|
||||
|
||||
if (currency_key_with_spacing.has_value())
|
||||
return base_pattern.replace(currency_key, *currency_key_with_spacing);
|
||||
#endif
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
65
Userland/Libraries/LibUnicode/NumberFormat.h
Normal file
65
Userland/Libraries/LibUnicode/NumberFormat.h
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibUnicode/Forward.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
struct NumberGroupings {
|
||||
u8 primary_grouping_size { 0 };
|
||||
u8 secondary_grouping_size { 0 };
|
||||
};
|
||||
|
||||
enum class StandardNumberFormatType : u8 {
|
||||
Decimal,
|
||||
Currency,
|
||||
Accounting,
|
||||
Percent,
|
||||
Scientific,
|
||||
};
|
||||
|
||||
enum class CompactNumberFormatType : u8 {
|
||||
DecimalLong,
|
||||
DecimalShort,
|
||||
CurrencyUnit,
|
||||
CurrencyShort,
|
||||
};
|
||||
|
||||
struct NumberFormat {
|
||||
enum class Plurality : u8 {
|
||||
Other,
|
||||
Zero,
|
||||
Single,
|
||||
One,
|
||||
Two,
|
||||
Few,
|
||||
Many,
|
||||
};
|
||||
|
||||
u8 magnitude { 0 };
|
||||
u8 exponent { 0 };
|
||||
Plurality plurality { Plurality::Other };
|
||||
StringView zero_format {};
|
||||
StringView positive_format {};
|
||||
StringView negative_format {};
|
||||
Vector<StringView> identifiers {};
|
||||
};
|
||||
|
||||
Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
|
||||
Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
|
||||
Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
|
||||
Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
|
||||
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
|
||||
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
|
||||
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue