LibJS+LibUnicode: Separate number formatting methods from Locale.h

Currently, we generate separate data files for locale and number format
related tables/methods, but provide public accessors for all of the data
in one Locale.h file. Rather than continuing this trend for date-time,
relative time, etc. formatting, it's a bit easier to reason about if the
public accessors are also in separate files.
This commit is contained in:
Timothy Flynn 2021-11-27 10:53:42 -05:00 committed by Linus Groh
parent bb11437792
commit 914675e826
Notes: sideshowbarker 2024-07-17 23:23:01 +09:00
8 changed files with 216 additions and 178 deletions
Meta/Lagom/Tools/CodeGenerators/LibUnicode
Userland/Libraries

View file

@ -24,6 +24,7 @@
#include <LibCore/DirIterator.h>
#include <LibCore/File.h>
#include <LibUnicode/Locale.h>
#include <LibUnicode/NumberFormat.h>
#include <math.h>
using StringIndexType = u16;
@ -541,6 +542,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
#include <AK/BinarySearch.h>
#include <AK/Span.h>
#include <LibUnicode/Locale.h>
#include <LibUnicode/NumberFormat.h>
#include <LibUnicode/UnicodeNumberFormat.h>
namespace Unicode::Detail {

View file

@ -11,6 +11,7 @@
#include <LibJS/Runtime/Intl/NumberFormat.h>
#include <LibJS/Runtime/Intl/NumberFormatFunction.h>
#include <LibUnicode/CurrencyCode.h>
#include <LibUnicode/Locale.h>
#include <math.h>
#include <stdlib.h>

View file

@ -10,7 +10,7 @@
#include <AK/String.h>
#include <LibJS/Runtime/Intl/AbstractOperations.h>
#include <LibJS/Runtime/Object.h>
#include <LibUnicode/Locale.h>
#include <LibUnicode/NumberFormat.h>
namespace JS::Intl {

View file

@ -5,6 +5,7 @@ SET(SOURCES
CharacterTypes.cpp
CurrencyCode.cpp
Locale.cpp
NumberFormat.cpp
)
serenity_lib(LibUnicode unicode)

View file

@ -8,14 +8,11 @@
#include <AK/GenericLexer.h>
#include <AK/QuickSort.h>
#include <AK/StringBuilder.h>
#include <AK/Utf8View.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/Locale.h>
#if ENABLE_UNICODE_DATA
# include <LibUnicode/UnicodeData.h>
# include <LibUnicode/UnicodeLocale.h>
# include <LibUnicode/UnicodeNumberFormat.h>
#endif
namespace Unicode {
@ -815,51 +812,6 @@ Vector<StringView> get_locale_key_mapping([[maybe_unused]] StringView locale, [[
return {};
}
Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
{
#if ENABLE_UNICODE_DATA
return Detail::get_number_system_symbol(locale, system, symbol);
#else
return {};
#endif
}
Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
{
#if ENABLE_UNICODE_DATA
return Detail::get_number_system_groupings(locale, system);
#else
return {};
#endif
}
Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
{
#if ENABLE_UNICODE_DATA
return Detail::get_compact_number_system_formats(locale, system, type);
#else
return {};
#endif
}
Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
{
#if ENABLE_UNICODE_DATA
return Detail::get_standard_number_system_format(locale, system, type);
#else
return {};
#endif
}
Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
{
#if ENABLE_UNICODE_DATA
return Detail::get_unit_formats(locale, unit, style);
#else
return {};
#endif
}
Optional<ListPatterns> get_locale_list_patterns([[maybe_unused]] StringView locale, [[maybe_unused]] StringView type, [[maybe_unused]] StringView style)
{
#if ENABLE_UNICODE_DATA
@ -984,88 +936,6 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language
return aliases[0].to_string();
}
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
{
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
// Once those rules are implemented for LibJS, we better use them instead.
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
return *it;
return {};
};
if (number == 0) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
return patterns;
} else if (number == 1) {
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
return patterns;
} else if (number == 2) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
return patterns;
} else if (number > 2) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
return patterns;
}
return find_plurality(NumberFormat::Plurality::Other);
}
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
{
#if ENABLE_UNICODE_DATA
constexpr auto number_key = "{number}"sv;
constexpr auto currency_key = "{currency}"sv;
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
auto number_index = base_pattern.find(number_key);
VERIFY(number_index.has_value());
auto currency_index = base_pattern.find(currency_key);
VERIFY(currency_index.has_value());
Utf8View utf8_currency_display { currency_display };
Optional<String> currency_key_with_spacing;
auto last_code_point = [](StringView string) {
Utf8View utf8_string { string };
u32 code_point = 0;
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
code_point = *it;
return code_point;
};
if (*number_index < *currency_index) {
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
u32 first_currency_code_point = *utf8_currency_display.begin();
if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
}
} else {
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
u32 last_currency_code_point = last_code_point(currency_display);
if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
}
}
if (currency_key_with_spacing.has_value())
return base_pattern.replace(currency_key, *currency_key_with_spacing);
#endif
return {};
}
String LanguageID::to_string() const
{
StringBuilder builder;

View file

@ -85,46 +85,6 @@ enum class Style : u8 {
Numeric,
};
struct NumberGroupings {
u8 primary_grouping_size { 0 };
u8 secondary_grouping_size { 0 };
};
enum class StandardNumberFormatType : u8 {
Decimal,
Currency,
Accounting,
Percent,
Scientific,
};
enum class CompactNumberFormatType : u8 {
DecimalLong,
DecimalShort,
CurrencyUnit,
CurrencyShort,
};
struct NumberFormat {
enum class Plurality : u8 {
Other,
Zero,
Single,
One,
Two,
Few,
Many,
};
u8 magnitude { 0 };
u8 exponent { 0 };
Plurality plurality { Plurality::Other };
StringView zero_format {};
StringView positive_format {};
StringView negative_format {};
Vector<StringView> identifiers {};
};
struct ListPatterns {
StringView start;
StringView middle;
@ -188,10 +148,6 @@ Optional<StringView> get_locale_script_mapping(StringView locale, StringView scr
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style);
Vector<StringView> get_locale_key_mapping(StringView locale, StringView keyword);
Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView type, StringView style);
Optional<StringView> resolve_language_alias(StringView language);
@ -204,7 +160,4 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id);
Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id);
String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias);
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
}

View file

@ -0,0 +1,146 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Utf8View.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/Locale.h>
#include <LibUnicode/NumberFormat.h>
#if ENABLE_UNICODE_DATA
# include <LibUnicode/UnicodeData.h>
# include <LibUnicode/UnicodeNumberFormat.h>
#endif
namespace Unicode {
Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol)
{
#if ENABLE_UNICODE_DATA
return Detail::get_number_system_symbol(locale, system, symbol);
#else
return {};
#endif
}
Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system)
{
#if ENABLE_UNICODE_DATA
return Detail::get_number_system_groupings(locale, system);
#else
return {};
#endif
}
Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type)
{
#if ENABLE_UNICODE_DATA
return Detail::get_standard_number_system_format(locale, system, type);
#else
return {};
#endif
}
Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type)
{
#if ENABLE_UNICODE_DATA
return Detail::get_compact_number_system_formats(locale, system, type);
#else
return {};
#endif
}
Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style)
{
#if ENABLE_UNICODE_DATA
return Detail::get_unit_formats(locale, unit, style);
#else
return {};
#endif
}
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number)
{
// FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization
// rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
// Once those rules are implemented for LibJS, we better use them instead.
auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> {
if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end())
return *it;
return {};
};
if (number == 0) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value())
return patterns;
} else if (number == 1) {
if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value())
return patterns;
} else if (number == 2) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value())
return patterns;
} else if (number > 2) {
if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value())
return patterns;
}
return find_plurality(NumberFormat::Plurality::Other);
}
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
{
#if ENABLE_UNICODE_DATA
constexpr auto number_key = "{number}"sv;
constexpr auto currency_key = "{currency}"sv;
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
auto number_index = base_pattern.find(number_key);
VERIFY(number_index.has_value());
auto currency_index = base_pattern.find(currency_key);
VERIFY(currency_index.has_value());
Utf8View utf8_currency_display { currency_display };
Optional<String> currency_key_with_spacing;
auto last_code_point = [](StringView string) {
Utf8View utf8_string { string };
u32 code_point = 0;
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
code_point = *it;
return code_point;
};
if (*number_index < *currency_index) {
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
u32 first_currency_code_point = *utf8_currency_display.begin();
if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
}
} else {
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
u32 last_currency_code_point = last_code_point(currency_display);
if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
}
}
if (currency_key_with_spacing.has_value())
return base_pattern.replace(currency_key, *currency_key_with_spacing);
#endif
return {};
}
}

View file

@ -0,0 +1,65 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Optional.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Vector.h>
#include <LibUnicode/Forward.h>
namespace Unicode {
struct NumberGroupings {
u8 primary_grouping_size { 0 };
u8 secondary_grouping_size { 0 };
};
enum class StandardNumberFormatType : u8 {
Decimal,
Currency,
Accounting,
Percent,
Scientific,
};
enum class CompactNumberFormatType : u8 {
DecimalLong,
DecimalShort,
CurrencyUnit,
CurrencyShort,
};
struct NumberFormat {
enum class Plurality : u8 {
Other,
Zero,
Single,
One,
Two,
Few,
Many,
};
u8 magnitude { 0 };
u8 exponent { 0 };
Plurality plurality { Plurality::Other };
StringView zero_format {};
StringView positive_format {};
StringView negative_format {};
Vector<StringView> identifiers {};
};
Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol);
Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system);
Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type);
Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type);
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number);
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
}