2021-11-27 15:53:42 +00:00
|
|
|
/*
|
2023-01-22 16:55:26 +00:00
|
|
|
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
2021-11-27 15:53:42 +00:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
2022-01-11 15:07:45 +00:00
|
|
|
#include <AK/CharacterTypes.h>
|
2021-11-27 15:53:42 +00:00
|
|
|
#include <AK/Utf8View.h>
|
2022-09-02 16:11:30 +00:00
|
|
|
#include <LibLocale/Locale.h>
|
|
|
|
#include <LibLocale/NumberFormat.h>
|
2021-11-27 15:53:42 +00:00
|
|
|
#include <LibUnicode/CharacterTypes.h>
|
|
|
|
|
|
|
|
#if ENABLE_UNICODE_DATA
|
|
|
|
# include <LibUnicode/UnicodeData.h>
|
|
|
|
#endif
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
namespace Locale {
|
2021-11-27 15:53:42 +00:00
|
|
|
|
2023-08-22 19:39:18 +00:00
|
|
|
Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
|
|
|
|
Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; }
|
2023-08-22 20:12:51 +00:00
|
|
|
Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; }
|
|
|
|
Vector<NumberFormat> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return {}; }
|
|
|
|
Vector<NumberFormat> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return {}; }
|
2021-11-27 15:53:42 +00:00
|
|
|
|
2023-02-05 19:02:54 +00:00
|
|
|
Optional<ReadonlySpan<u32>> __attribute__((weak)) get_digits_for_number_system(StringView)
|
2022-01-11 15:07:45 +00:00
|
|
|
{
|
2022-01-11 23:42:07 +00:00
|
|
|
// Fall back to "latn" digits when Unicode data generation is disabled.
|
|
|
|
constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } };
|
|
|
|
return digits.span();
|
|
|
|
}
|
2022-01-11 15:07:45 +00:00
|
|
|
|
2023-08-22 20:12:51 +00:00
|
|
|
String replace_digits_for_number_system(StringView system, StringView number)
|
2022-01-11 23:42:07 +00:00
|
|
|
{
|
|
|
|
auto digits = get_digits_for_number_system(system);
|
2022-01-11 15:07:45 +00:00
|
|
|
if (!digits.has_value())
|
2022-01-11 23:42:07 +00:00
|
|
|
digits = get_digits_for_number_system("latn"sv);
|
2022-01-11 15:07:45 +00:00
|
|
|
VERIFY(digits.has_value());
|
|
|
|
|
|
|
|
StringBuilder builder;
|
|
|
|
|
|
|
|
for (auto ch : number) {
|
|
|
|
if (is_ascii_digit(ch)) {
|
|
|
|
u32 digit = digits->at(parse_ascii_digit(ch));
|
2023-08-22 20:12:51 +00:00
|
|
|
builder.append_code_point(digit);
|
2022-01-11 15:07:45 +00:00
|
|
|
} else {
|
2023-08-22 20:12:51 +00:00
|
|
|
builder.append(ch);
|
2022-01-11 15:07:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-22 20:12:51 +00:00
|
|
|
return MUST(builder.to_string());
|
2022-01-11 15:07:45 +00:00
|
|
|
}
|
|
|
|
|
2022-08-25 12:53:40 +00:00
|
|
|
#if ENABLE_UNICODE_DATA
|
2022-07-20 17:52:36 +00:00
|
|
|
static u32 last_code_point(StringView string)
|
|
|
|
{
|
|
|
|
Utf8View utf8_string { string };
|
|
|
|
u32 code_point = 0;
|
|
|
|
|
|
|
|
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
|
|
|
|
code_point = *it;
|
|
|
|
|
|
|
|
return code_point;
|
|
|
|
}
|
2022-08-25 12:53:40 +00:00
|
|
|
#endif
|
2022-07-20 17:52:36 +00:00
|
|
|
|
2021-11-27 15:53:42 +00:00
|
|
|
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
|
2023-08-22 20:12:51 +00:00
|
|
|
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
|
2021-11-27 15:53:42 +00:00
|
|
|
{
|
|
|
|
#if ENABLE_UNICODE_DATA
|
|
|
|
constexpr auto number_key = "{number}"sv;
|
|
|
|
constexpr auto currency_key = "{currency}"sv;
|
|
|
|
constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
|
|
|
|
|
|
|
|
auto number_index = base_pattern.find(number_key);
|
|
|
|
VERIFY(number_index.has_value());
|
|
|
|
|
|
|
|
auto currency_index = base_pattern.find(currency_key);
|
|
|
|
VERIFY(currency_index.has_value());
|
|
|
|
|
|
|
|
Utf8View utf8_currency_display { currency_display };
|
2023-01-22 16:55:26 +00:00
|
|
|
Optional<String> currency_key_with_spacing;
|
2021-11-27 15:53:42 +00:00
|
|
|
|
|
|
|
if (*number_index < *currency_index) {
|
|
|
|
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
|
2021-11-27 15:53:42 +00:00
|
|
|
u32 first_currency_code_point = *utf8_currency_display.begin();
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(first_currency_code_point, Unicode::GeneralCategory::Symbol))
|
2023-08-22 20:12:51 +00:00
|
|
|
currency_key_with_spacing = MUST(String::formatted("{}{}", spacing, currency_key));
|
2021-11-27 15:53:42 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) {
|
2021-11-27 15:53:42 +00:00
|
|
|
u32 last_currency_code_point = last_code_point(currency_display);
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(last_currency_code_point, Unicode::GeneralCategory::Symbol))
|
2023-08-22 20:12:51 +00:00
|
|
|
currency_key_with_spacing = MUST(String::formatted("{}{}", currency_key, spacing));
|
2021-11-27 15:53:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (currency_key_with_spacing.has_value())
|
2023-08-22 20:12:51 +00:00
|
|
|
return MUST(MUST(String::from_utf8(base_pattern)).replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly));
|
2021-11-27 15:53:42 +00:00
|
|
|
#endif
|
|
|
|
|
2023-08-22 20:12:51 +00:00
|
|
|
return {};
|
2021-11-27 15:53:42 +00:00
|
|
|
}
|
|
|
|
|
2022-07-20 17:52:36 +00:00
|
|
|
// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
|
2023-08-22 20:12:51 +00:00
|
|
|
Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper)
|
2022-07-20 17:52:36 +00:00
|
|
|
{
|
|
|
|
#if ENABLE_UNICODE_DATA
|
|
|
|
auto range_pattern_with_spacing = [&]() {
|
2023-08-22 20:12:51 +00:00
|
|
|
return MUST(String::formatted(" {} ", range_separator));
|
2022-07-20 17:52:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Utf8View utf8_range_separator { range_separator };
|
|
|
|
Utf8View utf8_upper { upper };
|
|
|
|
|
|
|
|
// NOTE: Our implementation does the prescribed checks backwards for simplicity.
|
|
|
|
|
|
|
|
// To determine whether to add spacing, the currently recommended heuristic is:
|
|
|
|
// 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
|
|
|
|
for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
|
2022-09-02 16:01:10 +00:00
|
|
|
if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space))
|
2023-08-22 20:12:51 +00:00
|
|
|
return {};
|
2022-07-20 17:52:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
|
|
|
|
if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number))
|
2022-07-20 17:52:36 +00:00
|
|
|
return range_pattern_with_spacing();
|
|
|
|
}
|
|
|
|
|
2022-09-02 16:01:10 +00:00
|
|
|
if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number))
|
2022-07-20 17:52:36 +00:00
|
|
|
return range_pattern_with_spacing();
|
|
|
|
#endif
|
|
|
|
|
2023-08-22 20:12:51 +00:00
|
|
|
return {};
|
2022-07-20 17:52:36 +00:00
|
|
|
}
|
|
|
|
|
2021-11-27 15:53:42 +00:00
|
|
|
}
|