NumberFormat.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibUnicode/CharacterTypes.h>
  9. #include <LibUnicode/Locale.h>
  10. #include <LibUnicode/NumberFormat.h>
  11. #if ENABLE_UNICODE_DATA
  12. # include <LibUnicode/UnicodeData.h>
  13. #endif
  14. namespace Unicode {
  15. Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
  16. Optional<NumberGroupings> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return {}; }
  17. Optional<NumberFormat> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return {}; }
  18. Vector<NumberFormat> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return {}; }
  19. Vector<NumberFormat> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return {}; }
  20. Optional<Span<u32 const>> __attribute__((weak)) get_digits_for_number_system(StringView)
  21. {
  22. // Fall back to "latn" digits when Unicode data generation is disabled.
  23. constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } };
  24. return digits.span();
  25. }
  26. String replace_digits_for_number_system(StringView system, StringView number)
  27. {
  28. auto digits = get_digits_for_number_system(system);
  29. if (!digits.has_value())
  30. digits = get_digits_for_number_system("latn"sv);
  31. VERIFY(digits.has_value());
  32. StringBuilder builder;
  33. for (auto ch : number) {
  34. if (is_ascii_digit(ch)) {
  35. u32 digit = digits->at(parse_ascii_digit(ch));
  36. builder.append_code_point(digit);
  37. } else {
  38. builder.append(ch);
  39. }
  40. }
  41. return builder.build();
  42. }
  43. static u32 last_code_point(StringView string)
  44. {
  45. Utf8View utf8_string { string };
  46. u32 code_point = 0;
  47. for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
  48. code_point = *it;
  49. return code_point;
  50. }
  51. // https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
  52. Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
  53. {
  54. #if ENABLE_UNICODE_DATA
  55. constexpr auto number_key = "{number}"sv;
  56. constexpr auto currency_key = "{currency}"sv;
  57. constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
  58. auto number_index = base_pattern.find(number_key);
  59. VERIFY(number_index.has_value());
  60. auto currency_index = base_pattern.find(currency_key);
  61. VERIFY(currency_index.has_value());
  62. Utf8View utf8_currency_display { currency_display };
  63. Optional<String> currency_key_with_spacing;
  64. if (*number_index < *currency_index) {
  65. u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
  66. if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
  67. u32 first_currency_code_point = *utf8_currency_display.begin();
  68. if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol))
  69. currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key);
  70. }
  71. } else {
  72. u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index));
  73. if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) {
  74. u32 last_currency_code_point = last_code_point(currency_display);
  75. if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol))
  76. currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing);
  77. }
  78. }
  79. if (currency_key_with_spacing.has_value())
  80. return base_pattern.replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly);
  81. #endif
  82. return {};
  83. }
  84. // https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
  85. Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper)
  86. {
  87. #if ENABLE_UNICODE_DATA
  88. auto range_pattern_with_spacing = [&]() {
  89. return String::formatted(" {} ", range_separator);
  90. };
  91. Utf8View utf8_range_separator { range_separator };
  92. Utf8View utf8_upper { upper };
  93. // NOTE: Our implementation does the prescribed checks backwards for simplicity.
  94. // To determine whether to add spacing, the currently recommended heuristic is:
  95. // 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
  96. for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
  97. if (code_point_has_property(*it, Property::White_Space))
  98. return {};
  99. }
  100. // 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
  101. if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
  102. if (!code_point_has_general_category(*it, GeneralCategory::Decimal_Number))
  103. return range_pattern_with_spacing();
  104. }
  105. if (!code_point_has_general_category(last_code_point(lower), GeneralCategory::Decimal_Number))
  106. return range_pattern_with_spacing();
  107. #endif
  108. return {};
  109. }
  110. }